diff --git a/.github/actions/custom-build-and-push/action.yml b/.github/actions/custom-build-and-push/action.yml index 48344237059..fbee0554995 100644 --- a/.github/actions/custom-build-and-push/action.yml +++ b/.github/actions/custom-build-and-push/action.yml @@ -32,16 +32,20 @@ inputs: description: 'Cache destinations' required: false retry-wait-time: - description: 'Time to wait before retry in seconds' + description: 'Time to wait before attempt 2 in seconds' required: false - default: '5' + default: '60' + retry-wait-time-2: + description: 'Time to wait before attempt 3 in seconds' + required: false + default: '120' runs: using: "composite" steps: - - name: Build and push Docker image (First Attempt) + - name: Build and push Docker image (Attempt 1 of 3) id: buildx1 - uses: docker/build-push-action@v5 + uses: docker/build-push-action@v6 continue-on-error: true with: context: ${{ inputs.context }} @@ -54,16 +58,39 @@ runs: cache-from: ${{ inputs.cache-from }} cache-to: ${{ inputs.cache-to }} - - name: Wait to retry + - name: Wait before attempt 2 if: steps.buildx1.outcome != 'success' run: | echo "First attempt failed. Waiting ${{ inputs.retry-wait-time }} seconds before retry..." sleep ${{ inputs.retry-wait-time }} shell: bash - - name: Build and push Docker image (Retry Attempt) + - name: Build and push Docker image (Attempt 2 of 3) + id: buildx2 if: steps.buildx1.outcome != 'success' - uses: docker/build-push-action@v5 + uses: docker/build-push-action@v6 + with: + context: ${{ inputs.context }} + file: ${{ inputs.file }} + platforms: ${{ inputs.platforms }} + pull: ${{ inputs.pull }} + push: ${{ inputs.push }} + load: ${{ inputs.load }} + tags: ${{ inputs.tags }} + cache-from: ${{ inputs.cache-from }} + cache-to: ${{ inputs.cache-to }} + + - name: Wait before attempt 3 + if: steps.buildx1.outcome != 'success' && steps.buildx2.outcome != 'success' + run: | + echo "Second attempt failed. Waiting ${{ inputs.retry-wait-time-2 }} seconds before retry..." + sleep ${{ inputs.retry-wait-time-2 }} + shell: bash + + - name: Build and push Docker image (Attempt 3 of 3) + id: buildx3 + if: steps.buildx1.outcome != 'success' && steps.buildx2.outcome != 'success' + uses: docker/build-push-action@v6 with: context: ${{ inputs.context }} file: ${{ inputs.file }} @@ -74,3 +101,9 @@ runs: tags: ${{ inputs.tags }} cache-from: ${{ inputs.cache-from }} cache-to: ${{ inputs.cache-to }} + + - name: Report failure + if: steps.buildx1.outcome != 'success' && steps.buildx2.outcome != 'success' && steps.buildx3.outcome != 'success' + run: | + echo "All attempts failed. Possible transient infrastucture issues? Try again later or inspect logs for details." + shell: bash diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index e57283f0377..8287f9b5300 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -6,20 +6,24 @@ [Describe the tests you ran to verify your changes] -## Accepted Risk -[Any know risks or failure modes to point out to reviewers] +## Accepted Risk (provide if relevant) +N/A -## Related Issue(s) -[If applicable, link to the issue(s) this PR addresses] +## Related Issue(s) (provide if relevant) +N/A -## Checklist: -- [ ] All of the automated tests pass -- [ ] All PR comments are addressed and marked resolved -- [ ] If there are migrations, they have been rebased to latest main -- [ ] If there are new dependencies, they are added to the requirements -- [ ] If there are new environment variables, they are added to all of the deployment methods -- [ ] If there are new APIs that don't require auth, they are added to PUBLIC_ENDPOINT_SPECS -- [ ] Docker images build and basic functionalities work -- [ ] Author has done a final read through of the PR right before merge +## Mental Checklist: +- All of the automated tests pass +- All PR comments are addressed and marked resolved +- If there are migrations, they have been rebased to latest main +- If there are new dependencies, they are added to the requirements +- If there are new environment variables, they are added to all of the deployment methods +- If there are new APIs that don't require auth, they are added to PUBLIC_ENDPOINT_SPECS +- Docker images build and basic functionalities work +- Author has done a final read through of the PR right before merge + +## Backporting (check the box to trigger backport action) +Note: You have to check that the action passes, otherwise resolve the conflicts manually and tag the patches. +- [ ] This PR should be backported (make sure to check that the backport attempt succeeds) diff --git a/.github/workflows/docker-build-push-cloud-web-container-on-tag.yml b/.github/workflows/docker-build-push-cloud-web-container-on-tag.yml new file mode 100644 index 00000000000..871c96841ad --- /dev/null +++ b/.github/workflows/docker-build-push-cloud-web-container-on-tag.yml @@ -0,0 +1,136 @@ +name: Build and Push Cloud Web Image on Tag +# Identical to the web container build, but with correct image tag and build args + +on: + push: + tags: + - '*' + +env: + REGISTRY_IMAGE: danswer/danswer-cloud-web-server + LATEST_TAG: ${{ contains(github.ref_name, 'latest') }} + +jobs: + build: + runs-on: + - runs-on + - runner=${{ matrix.platform == 'linux/amd64' && '8cpu-linux-x64' || '8cpu-linux-arm64' }} + - run-id=${{ github.run_id }} + - tag=platform-${{ matrix.platform }} + strategy: + fail-fast: false + matrix: + platform: + - linux/amd64 + - linux/arm64 + + steps: + - name: Prepare + run: | + platform=${{ matrix.platform }} + echo "PLATFORM_PAIR=${platform//\//-}" >> $GITHUB_ENV + + - name: Checkout + uses: actions/checkout@v4 + + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY_IMAGE }} + tags: | + type=raw,value=${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }} + type=raw,value=${{ env.LATEST_TAG == 'true' && format('{0}:latest', env.REGISTRY_IMAGE) || '' }} + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_TOKEN }} + + - name: Build and push by digest + id: build + uses: docker/build-push-action@v5 + with: + context: ./web + file: ./web/Dockerfile + platforms: ${{ matrix.platform }} + push: true + build-args: | + DANSWER_VERSION=${{ github.ref_name }} + NEXT_PUBLIC_CLOUD_ENABLED=true + NEXT_PUBLIC_POSTHOG_KEY=${{ secrets.POSTHOG_KEY }} + NEXT_PUBLIC_POSTHOG_HOST=${{ secrets.POSTHOG_HOST }} + NEXT_PUBLIC_SENTRY_DSN=${{ secrets.SENTRY_DSN }} + # needed due to weird interactions with the builds for different platforms + no-cache: true + labels: ${{ steps.meta.outputs.labels }} + outputs: type=image,name=${{ env.REGISTRY_IMAGE }},push-by-digest=true,name-canonical=true,push=true + + - name: Export digest + run: | + mkdir -p /tmp/digests + digest="${{ steps.build.outputs.digest }}" + touch "/tmp/digests/${digest#sha256:}" + + - name: Upload digest + uses: actions/upload-artifact@v4 + with: + name: digests-${{ env.PLATFORM_PAIR }} + path: /tmp/digests/* + if-no-files-found: error + retention-days: 1 + + merge: + runs-on: ubuntu-latest + needs: + - build + steps: + - name: Download digests + uses: actions/download-artifact@v4 + with: + path: /tmp/digests + pattern: digests-* + merge-multiple: true + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY_IMAGE }} + + - name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_TOKEN }} + + - name: Create manifest list and push + working-directory: /tmp/digests + run: | + docker buildx imagetools create $(jq -cr '.tags | map("-t " + .) | join(" ")' <<< "$DOCKER_METADATA_OUTPUT_JSON") \ + $(printf '${{ env.REGISTRY_IMAGE }}@sha256:%s ' *) + + - name: Inspect image + run: | + docker buildx imagetools inspect ${{ env.REGISTRY_IMAGE }}:${{ steps.meta.outputs.version }} + + # trivy has their own rate limiting issues causing this action to flake + # we worked around it by hardcoding to different db repos in env + # can re-enable when they figure it out + # https://github.com/aquasecurity/trivy/discussions/7538 + # https://github.com/aquasecurity/trivy-action/issues/389 + - name: Run Trivy vulnerability scanner + uses: aquasecurity/trivy-action@master + env: + TRIVY_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-db:2' + TRIVY_JAVA_DB_REPOSITORY: 'public.ecr.aws/aquasecurity/trivy-java-db:1' + with: + image-ref: docker.io/${{ env.REGISTRY_IMAGE }}:${{ github.ref_name }} + severity: 'CRITICAL,HIGH' diff --git a/.github/workflows/helm-build-push.yml b/.github/workflows/helm-build-push.yml index a9080239b40..a591eaecd3c 100644 --- a/.github/workflows/helm-build-push.yml +++ b/.github/workflows/helm-build-push.yml @@ -62,7 +62,7 @@ jobs: permissions: contents: write runs-on: ubuntu-latest - # needs: helm_chart_version_check + needs: helm_chart_version_check # if: ${{ needs.helm_chart_version_check.outputs.version_changed == 'true' }} steps: - name: Checkout diff --git a/.github/workflows/hotfix-release-branches.yml b/.github/workflows/hotfix-release-branches.yml new file mode 100644 index 00000000000..0e921f8d694 --- /dev/null +++ b/.github/workflows/hotfix-release-branches.yml @@ -0,0 +1,172 @@ +# This workflow is intended to be manually triggered via the GitHub Action tab. +# Given a hotfix branch, it will attempt to open a PR to all release branches and +# by default auto merge them + +name: Hotfix release branches + +on: + workflow_dispatch: + inputs: + hotfix_commit: + description: 'Hotfix commit hash' + required: true + hotfix_suffix: + description: 'Hotfix branch suffix (e.g. hotfix/v0.8-{suffix})' + required: true + release_branch_pattern: + description: 'Release branch pattern (regex)' + required: true + default: 'release/.*' + auto_merge: + description: 'Automatically merge the hotfix PRs' + required: true + type: choice + default: 'true' + options: + - true + - false + +jobs: + hotfix_release_branches: + permissions: write-all + # See https://runs-on.com/runners/linux/ + # use a lower powered instance since this just does i/o to docker hub + runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}"] + steps: + + # needs RKUO_DEPLOY_KEY for write access to merge PR's + - name: Checkout Repository + uses: actions/checkout@v4 + with: + ssh-key: "${{ secrets.RKUO_DEPLOY_KEY }}" + fetch-depth: 0 + + - name: Set up Git user + run: | + git config user.name "Richard Kuo [bot]" + git config user.email "rkuo[bot]@danswer.ai" + + - name: Fetch All Branches + run: | + git fetch --all --prune + + - name: Verify Hotfix Commit Exists + run: | + git rev-parse --verify "${{ github.event.inputs.hotfix_commit }}" || { echo "Commit not found: ${{ github.event.inputs.hotfix_commit }}"; exit 1; } + + - name: Get Release Branches + id: get_release_branches + run: | + BRANCHES=$(git branch -r | grep -E "${{ github.event.inputs.release_branch_pattern }}" | sed 's|origin/||' | tr -d ' ') + if [ -z "$BRANCHES" ]; then + echo "No release branches found matching pattern '${{ github.event.inputs.release_branch_pattern }}'." + exit 1 + fi + + echo "Found release branches:" + echo "$BRANCHES" + + # Join the branches into a single line separated by commas + BRANCHES_JOINED=$(echo "$BRANCHES" | tr '\n' ',' | sed 's/,$//') + + # Set the branches as an output + echo "branches=$BRANCHES_JOINED" >> $GITHUB_OUTPUT + + # notes on all the vagaries of wiring up automated PR's + # https://github.com/peter-evans/create-pull-request/blob/main/docs/concepts-guidelines.md#triggering-further-workflow-runs + # we must use a custom token for GH_TOKEN to trigger the subsequent PR checks + - name: Create and Merge Pull Requests to Matching Release Branches + env: + HOTFIX_COMMIT: ${{ github.event.inputs.hotfix_commit }} + HOTFIX_SUFFIX: ${{ github.event.inputs.hotfix_suffix }} + AUTO_MERGE: ${{ github.event.inputs.auto_merge }} + GH_TOKEN: ${{ secrets.RKUO_PERSONAL_ACCESS_TOKEN }} + run: | + # Get the branches from the previous step + BRANCHES="${{ steps.get_release_branches.outputs.branches }}" + + # Convert BRANCHES to an array + IFS=$',' read -ra BRANCH_ARRAY <<< "$BRANCHES" + + # Loop through each release branch and create and merge a PR + for RELEASE_BRANCH in "${BRANCH_ARRAY[@]}"; do + echo "Processing $RELEASE_BRANCH..." + + # Parse out the release version by removing "release/" from the branch name + RELEASE_VERSION=${RELEASE_BRANCH#release/} + echo "Release version parsed: $RELEASE_VERSION" + + HOTFIX_BRANCH="hotfix/${RELEASE_VERSION}-${HOTFIX_SUFFIX}" + echo "Creating PR from $HOTFIX_BRANCH to $RELEASE_BRANCH" + + # Checkout the release branch + echo "Checking out $RELEASE_BRANCH" + git checkout "$RELEASE_BRANCH" + + # Create the new hotfix branch + if git rev-parse --verify "$HOTFIX_BRANCH" >/dev/null 2>&1; then + echo "Hotfix branch $HOTFIX_BRANCH already exists. Skipping branch creation." + else + echo "Branching $RELEASE_BRANCH to $HOTFIX_BRANCH" + git checkout -b "$HOTFIX_BRANCH" + fi + + # Check if the hotfix commit is a merge commit + if git rev-list --merges -n 1 "$HOTFIX_COMMIT" >/dev/null 2>&1; then + # -m 1 uses the target branch as the base (which is what we want) + echo "Hotfix commit $HOTFIX_COMMIT is a merge commit, using -m 1 for cherry-pick" + CHERRY_PICK_CMD="git cherry-pick -m 1 $HOTFIX_COMMIT" + else + CHERRY_PICK_CMD="git cherry-pick $HOTFIX_COMMIT" + fi + + # Perform the cherry-pick + echo "Executing: $CHERRY_PICK_CMD" + eval "$CHERRY_PICK_CMD" + + if [ $? -ne 0 ]; then + echo "Cherry-pick failed for $HOTFIX_COMMIT on $HOTFIX_BRANCH. Aborting..." + git cherry-pick --abort + continue + fi + + # Push the hotfix branch to the remote + echo "Pushing $HOTFIX_BRANCH..." + git push origin "$HOTFIX_BRANCH" + echo "Hotfix branch $HOTFIX_BRANCH created and pushed." + + # Check if PR already exists + EXISTING_PR=$(gh pr list --head "$HOTFIX_BRANCH" --base "$RELEASE_BRANCH" --state open --json number --jq '.[0].number') + + if [ -n "$EXISTING_PR" ]; then + echo "An open PR already exists: #$EXISTING_PR. Skipping..." + continue + fi + + # Create a new PR and capture the output + PR_OUTPUT=$(gh pr create --title "Merge $HOTFIX_BRANCH into $RELEASE_BRANCH" \ + --body "Automated PR to merge \`$HOTFIX_BRANCH\` into \`$RELEASE_BRANCH\`." \ + --head "$HOTFIX_BRANCH" --base "$RELEASE_BRANCH") + + # Extract the URL from the output + PR_URL=$(echo "$PR_OUTPUT" | grep -Eo 'https://github.com/[^ ]+') + echo "Pull request created: $PR_URL" + + # Extract PR number from URL + PR_NUMBER=$(basename "$PR_URL") + echo "Pull request created: $PR_NUMBER" + + if [ "$AUTO_MERGE" == "true" ]; then + echo "Attempting to merge pull request #$PR_NUMBER" + + # Attempt to merge the PR + gh pr merge "$PR_NUMBER" --merge --auto --delete-branch + + if [ $? -eq 0 ]; then + echo "Pull request #$PR_NUMBER merged successfully." + else + # Optionally, handle the error or continue + echo "Failed to merge pull request #$PR_NUMBER." + fi + fi + done \ No newline at end of file diff --git a/.github/workflows/nightly-close-stale-issues.yml b/.github/workflows/nightly-close-stale-issues.yml new file mode 100644 index 00000000000..a7d296e0a92 --- /dev/null +++ b/.github/workflows/nightly-close-stale-issues.yml @@ -0,0 +1,23 @@ +name: 'Nightly - Close stale issues and PRs' +on: + schedule: + - cron: '0 11 * * *' # Runs every day at 3 AM PST / 4 AM PDT / 11 AM UTC + +permissions: + # contents: write # only for delete-branch option + issues: write + pull-requests: write + +jobs: + stale: + runs-on: ubuntu-latest + steps: + - uses: actions/stale@v9 + with: + stale-issue-message: 'This issue is stale because it has been open 75 days with no activity. Remove stale label or comment or this will be closed in 15 days.' + stale-pr-message: 'This PR is stale because it has been open 75 days with no activity. Remove stale label or comment or this will be closed in 15 days.' + close-issue-message: 'This issue was closed because it has been stalled for 90 days with no activity.' + close-pr-message: 'This PR was closed because it has been stalled for 90 days with no activity.' + days-before-stale: 75 +# days-before-close: 90 # uncomment after we test stale behavior + \ No newline at end of file diff --git a/.github/workflows/nightly-scan-licenses.yml b/.github/workflows/nightly-scan-licenses.yml new file mode 100644 index 00000000000..9aa7030e0b9 --- /dev/null +++ b/.github/workflows/nightly-scan-licenses.yml @@ -0,0 +1,76 @@ +# Scan for problematic software licenses + +# trivy has their own rate limiting issues causing this action to flake +# we worked around it by hardcoding to different db repos in env +# can re-enable when they figure it out +# https://github.com/aquasecurity/trivy/discussions/7538 +# https://github.com/aquasecurity/trivy-action/issues/389 + +name: 'Nightly - Scan licenses' +on: +# schedule: +# - cron: '0 14 * * *' # Runs every day at 6 AM PST / 7 AM PDT / 2 PM UTC + workflow_dispatch: # Allows manual triggering + +permissions: + actions: read + contents: read + security-events: write + +jobs: + scan-licenses: + # See https://runs-on.com/runners/linux/ + runs-on: [runs-on,runner=2cpu-linux-x64,"run-id=${{ github.run_id }}"] + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + cache: 'pip' + cache-dependency-path: | + backend/requirements/default.txt + backend/requirements/dev.txt + backend/requirements/model_server.txt + + - name: Get explicit and transitive dependencies + run: | + python -m pip install --upgrade pip + pip install --retries 5 --timeout 30 -r backend/requirements/default.txt + pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt + pip install --retries 5 --timeout 30 -r backend/requirements/model_server.txt + pip freeze > requirements-all.txt + + - name: Check python + id: license_check_report + uses: pilosus/action-pip-license-checker@v2 + with: + requirements: 'requirements-all.txt' + fail: 'Copyleft' + exclude: '(?i)^(pylint|aio[-_]*).*' + + - name: Print report + if: ${{ always() }} + run: echo "${{ steps.license_check_report.outputs.report }}" + + - name: Install npm dependencies + working-directory: ./web + run: npm ci + + - name: Run Trivy vulnerability scanner in repo mode + uses: aquasecurity/trivy-action@0.28.0 + with: + scan-type: fs + scanners: license + format: table +# format: sarif +# output: trivy-results.sarif + severity: HIGH,CRITICAL + +# - name: Upload Trivy scan results to GitHub Security tab +# uses: github/codeql-action/upload-sarif@v3 +# with: +# sarif_file: trivy-results.sarif diff --git a/.github/workflows/run-it.yml b/.github/workflows/pr-Integration-tests.yml similarity index 50% rename from .github/workflows/run-it.yml rename to .github/workflows/pr-Integration-tests.yml index cbb9954e918..1f28866d6ee 100644 --- a/.github/workflows/run-it.yml +++ b/.github/workflows/pr-Integration-tests.yml @@ -1,4 +1,4 @@ -name: Run Integration Tests +name: Run Integration Tests v2 concurrency: group: Run-Integration-Tests-${{ github.workflow }}-${{ github.head_ref || github.event.workflow_run.head_branch || github.run_id }} cancel-in-progress: true @@ -12,11 +12,12 @@ on: env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} jobs: integration-tests: # See https://runs-on.com/runners/linux/ - runs-on: [runs-on,runner=8cpu-linux-x64,ram=32,"run-id=${{ github.run_id }}"] + runs-on: [runs-on,runner=8cpu-linux-x64,ram=16,"run-id=${{ github.run_id }}"] steps: - name: Checkout code uses: actions/checkout@v4 @@ -30,25 +31,35 @@ jobs: username: ${{ secrets.DOCKER_USERNAME }} password: ${{ secrets.DOCKER_TOKEN }} - # NOTE: we don't need to build the Web Docker image since it's not used - # during the IT for now. We have a separate action to verify it builds - # succesfully + # tag every docker image with "test" so that we can spin up the correct set + # of images during testing + + # We don't need to build the Web Docker image since it's not yet used + # in the integration tests. We have a separate action to verify that it builds + # successfully. - name: Pull Web Docker image run: | docker pull danswer/danswer-web-server:latest - docker tag danswer/danswer-web-server:latest danswer/danswer-web-server:it + docker tag danswer/danswer-web-server:latest danswer/danswer-web-server:test + # we use the runs-on cache for docker builds + # in conjunction with runs-on runners, it has better speed and unlimited caching + # https://runs-on.com/caching/s3-cache-for-github-actions/ + # https://runs-on.com/caching/docker/ + # https://github.com/moby/buildkit#s3-cache-experimental + + # images are built and run locally for testing purposes. Not pushed. - name: Build Backend Docker image uses: ./.github/actions/custom-build-and-push with: context: ./backend file: ./backend/Dockerfile platforms: linux/amd64 - tags: danswer/danswer-backend:it - cache-from: type=registry,ref=danswer/danswer-backend:it - cache-to: | - type=registry,ref=danswer/danswer-backend:it,mode=max - type=inline + tags: danswer/danswer-backend:test + push: false + load: true + cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }} + cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/backend/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max - name: Build Model Server Docker image uses: ./.github/actions/custom-build-and-push @@ -56,32 +67,83 @@ jobs: context: ./backend file: ./backend/Dockerfile.model_server platforms: linux/amd64 - tags: danswer/danswer-model-server:it - cache-from: type=registry,ref=danswer/danswer-model-server:it - cache-to: | - type=registry,ref=danswer/danswer-model-server:it,mode=max - type=inline - + tags: danswer/danswer-model-server:test + push: false + load: true + cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }} + cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/model-server/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max + - name: Build integration test Docker image uses: ./.github/actions/custom-build-and-push with: context: ./backend file: ./backend/tests/integration/Dockerfile platforms: linux/amd64 - tags: danswer/integration-test-runner:it - cache-from: type=registry,ref=danswer/integration-test-runner:it - cache-to: | - type=registry,ref=danswer/integration-test-runner:it,mode=max - type=inline + tags: danswer/danswer-integration:test + push: false + load: true + cache-from: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }} + cache-to: type=s3,prefix=cache/${{ github.repository }}/integration-tests/integration/,region=${{ env.RUNS_ON_AWS_REGION }},bucket=${{ env.RUNS_ON_S3_BUCKET_CACHE }},mode=max + + # Start containers for multi-tenant tests + - name: Start Docker containers for multi-tenant tests + run: | + cd deployment/docker_compose + ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \ + MULTI_TENANT=true \ + AUTH_TYPE=basic \ + REQUIRE_EMAIL_VERIFICATION=false \ + DISABLE_TELEMETRY=true \ + IMAGE_TAG=test \ + docker compose -f docker-compose.dev.yml -p danswer-stack up -d + id: start_docker_multi_tenant - - name: Start Docker containers + # In practice, `cloud` Auth type would require OAUTH credentials to be set. + - name: Run Multi-Tenant Integration Tests + run: | + echo "Running integration tests..." + docker run --rm --network danswer-stack_default \ + --name test-runner \ + -e POSTGRES_HOST=relational_db \ + -e POSTGRES_USER=postgres \ + -e POSTGRES_PASSWORD=password \ + -e POSTGRES_DB=postgres \ + -e VESPA_HOST=index \ + -e REDIS_HOST=cache \ + -e API_SERVER_HOST=api_server \ + -e OPENAI_API_KEY=${OPENAI_API_KEY} \ + -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \ + -e TEST_WEB_HOSTNAME=test-runner \ + -e AUTH_TYPE=cloud \ + -e MULTI_TENANT=true \ + danswer/danswer-integration:test \ + /app/tests/integration/multitenant_tests + continue-on-error: true + id: run_multitenant_tests + + - name: Check multi-tenant test results + run: | + if [ ${{ steps.run_tests.outcome }} == 'failure' ]; then + echo "Integration tests failed. Exiting with error." + exit 1 + else + echo "All integration tests passed successfully." + fi + + - name: Stop multi-tenant Docker containers + run: | + cd deployment/docker_compose + docker compose -f docker-compose.dev.yml -p danswer-stack down -v + + + - name: Start Docker containers run: | cd deployment/docker_compose ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=true \ AUTH_TYPE=basic \ REQUIRE_EMAIL_VERIFICATION=false \ DISABLE_TELEMETRY=true \ - IMAGE_TAG=it \ + IMAGE_TAG=test \ docker compose -f docker-compose.dev.yml -p danswer-stack up -d id: start_docker @@ -119,7 +181,7 @@ jobs: done echo "Finished waiting for service." - - name: Run integration tests + - name: Run Standard Integration Tests run: | echo "Running integration tests..." docker run --rm --network danswer-stack_default \ @@ -132,8 +194,10 @@ jobs: -e REDIS_HOST=cache \ -e API_SERVER_HOST=api_server \ -e OPENAI_API_KEY=${OPENAI_API_KEY} \ + -e SLACK_BOT_TOKEN=${SLACK_BOT_TOKEN} \ -e TEST_WEB_HOSTNAME=test-runner \ - danswer/integration-test-runner:it + danswer/danswer-integration:test \ + /app/tests/integration/tests continue-on-error: true id: run_tests @@ -146,6 +210,11 @@ jobs: echo "All integration tests passed successfully." fi + - name: Stop Docker containers + run: | + cd deployment/docker_compose + docker compose -f docker-compose.dev.yml -p danswer-stack down -v + - name: Save Docker logs if: success() || failure() run: | @@ -155,7 +224,7 @@ jobs: - name: Upload logs if: success() || failure() - uses: actions/upload-artifact@v3 + uses: actions/upload-artifact@v4 with: name: docker-logs path: ${{ github.workspace }}/docker-compose.log diff --git a/.github/workflows/pr-backport-autotrigger.yml b/.github/workflows/pr-backport-autotrigger.yml new file mode 100644 index 00000000000..273f00a5c5a --- /dev/null +++ b/.github/workflows/pr-backport-autotrigger.yml @@ -0,0 +1,124 @@ +name: Backport on Merge + +# Note this workflow does not trigger the builds, be sure to manually tag the branches to trigger the builds + +on: + pull_request: + types: [closed] # Later we check for merge so only PRs that go in can get backported + +permissions: + contents: write + actions: write + +jobs: + backport: + if: github.event.pull_request.merged == true + runs-on: ubuntu-latest + env: + GITHUB_TOKEN: ${{ secrets.YUHONG_GH_ACTIONS }} + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + ssh-key: "${{ secrets.RKUO_DEPLOY_KEY }}" + fetch-depth: 0 + + - name: Set up Git user + run: | + git config user.name "Richard Kuo [bot]" + git config user.email "rkuo[bot]@danswer.ai" + git fetch --prune + + - name: Check for Backport Checkbox + id: checkbox-check + run: | + PR_BODY="${{ github.event.pull_request.body }}" + if [[ "$PR_BODY" == *"[x] This PR should be backported"* ]]; then + echo "backport=true" >> $GITHUB_OUTPUT + else + echo "backport=false" >> $GITHUB_OUTPUT + fi + + - name: List and sort release branches + id: list-branches + run: | + git fetch --all --tags + BRANCHES=$(git for-each-ref --format='%(refname:short)' refs/remotes/origin/release/* | sed 's|origin/release/||' | sort -Vr) + BETA=$(echo "$BRANCHES" | head -n 1) + STABLE=$(echo "$BRANCHES" | head -n 2 | tail -n 1) + echo "beta=release/$BETA" >> $GITHUB_OUTPUT + echo "stable=release/$STABLE" >> $GITHUB_OUTPUT + # Fetch latest tags for beta and stable + LATEST_BETA_TAG=$(git tag -l "v[0-9]*.[0-9]*.[0-9]*-beta.[0-9]*" | grep -E "^v[0-9]+\.[0-9]+\.[0-9]+-beta\.[0-9]+$" | grep -v -- "-cloud" | sort -Vr | head -n 1) + LATEST_STABLE_TAG=$(git tag -l "v[0-9]*.[0-9]*.[0-9]*" | grep -E "^v[0-9]+\.[0-9]+\.[0-9]+$" | sort -Vr | head -n 1) + + # Handle case where no beta tags exist + if [[ -z "$LATEST_BETA_TAG" ]]; then + NEW_BETA_TAG="v1.0.0-beta.1" + else + NEW_BETA_TAG=$(echo $LATEST_BETA_TAG | awk -F '[.-]' '{print $1 "." $2 "." $3 "-beta." ($NF+1)}') + fi + + # Increment latest stable tag + NEW_STABLE_TAG=$(echo $LATEST_STABLE_TAG | awk -F '.' '{print $1 "." $2 "." ($3+1)}') + echo "latest_beta_tag=$LATEST_BETA_TAG" >> $GITHUB_OUTPUT + echo "latest_stable_tag=$LATEST_STABLE_TAG" >> $GITHUB_OUTPUT + echo "new_beta_tag=$NEW_BETA_TAG" >> $GITHUB_OUTPUT + echo "new_stable_tag=$NEW_STABLE_TAG" >> $GITHUB_OUTPUT + + - name: Echo branch and tag information + run: | + echo "Beta branch: ${{ steps.list-branches.outputs.beta }}" + echo "Stable branch: ${{ steps.list-branches.outputs.stable }}" + echo "Latest beta tag: ${{ steps.list-branches.outputs.latest_beta_tag }}" + echo "Latest stable tag: ${{ steps.list-branches.outputs.latest_stable_tag }}" + echo "New beta tag: ${{ steps.list-branches.outputs.new_beta_tag }}" + echo "New stable tag: ${{ steps.list-branches.outputs.new_stable_tag }}" + + - name: Trigger Backport + if: steps.checkbox-check.outputs.backport == 'true' + run: | + set -e + echo "Backporting to beta ${{ steps.list-branches.outputs.beta }} and stable ${{ steps.list-branches.outputs.stable }}" + + # Echo the merge commit SHA + echo "Merge commit SHA: ${{ github.event.pull_request.merge_commit_sha }}" + + # Fetch all history for all branches and tags + git fetch --prune + + # Reset and prepare the beta branch + git checkout ${{ steps.list-branches.outputs.beta }} + echo "Last 5 commits on beta branch:" + git log -n 5 --pretty=format:"%H" + echo "" # Newline for formatting + + # Cherry-pick the merge commit from the merged PR + git cherry-pick -m 1 ${{ github.event.pull_request.merge_commit_sha }} || { + echo "Cherry-pick to beta failed due to conflicts." + exit 1 + } + + # Create new beta branch/tag + git tag ${{ steps.list-branches.outputs.new_beta_tag }} + # Push the changes and tag to the beta branch using PAT + git push origin ${{ steps.list-branches.outputs.beta }} + git push origin ${{ steps.list-branches.outputs.new_beta_tag }} + + # Reset and prepare the stable branch + git checkout ${{ steps.list-branches.outputs.stable }} + echo "Last 5 commits on stable branch:" + git log -n 5 --pretty=format:"%H" + echo "" # Newline for formatting + + # Cherry-pick the merge commit from the merged PR + git cherry-pick -m 1 ${{ github.event.pull_request.merge_commit_sha }} || { + echo "Cherry-pick to stable failed due to conflicts." + exit 1 + } + + # Create new stable branch/tag + git tag ${{ steps.list-branches.outputs.new_stable_tag }} + # Push the changes and tag to the stable branch using PAT + git push origin ${{ steps.list-branches.outputs.stable }} + git push origin ${{ steps.list-branches.outputs.new_stable_tag }} diff --git a/.github/workflows/pr-python-checks.yml b/.github/workflows/pr-python-checks.yml index 0a9e9f96a63..db16848bd2f 100644 --- a/.github/workflows/pr-python-checks.yml +++ b/.github/workflows/pr-python-checks.yml @@ -14,10 +14,10 @@ jobs: steps: - name: Checkout code - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.11' cache: 'pip' diff --git a/.github/workflows/pr-python-connector-tests.yml b/.github/workflows/pr-python-connector-tests.yml index 642618000d2..88053427726 100644 --- a/.github/workflows/pr-python-connector-tests.yml +++ b/.github/workflows/pr-python-connector-tests.yml @@ -18,6 +18,11 @@ env: # Jira JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }} JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }} + # Google + GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR: ${{ secrets.GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR }} + GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR: ${{ secrets.GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR }} + GOOGLE_GMAIL_SERVICE_ACCOUNT_JSON_STR: ${{ secrets.GOOGLE_GMAIL_SERVICE_ACCOUNT_JSON_STR }} + GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR: ${{ secrets.GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR }} jobs: connectors-check: @@ -32,7 +37,7 @@ jobs: uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: "3.11" cache: "pip" diff --git a/.github/workflows/pr-python-model-tests.yml b/.github/workflows/pr-python-model-tests.yml new file mode 100644 index 00000000000..a070eea27a7 --- /dev/null +++ b/.github/workflows/pr-python-model-tests.yml @@ -0,0 +1,58 @@ +name: Connector Tests + +on: + schedule: + # This cron expression runs the job daily at 16:00 UTC (9am PT) + - cron: "0 16 * * *" + +env: + # Bedrock + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_REGION_NAME: ${{ secrets.AWS_REGION_NAME }} + + # OpenAI + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + +jobs: + model-check: + # See https://runs-on.com/runners/linux/ + runs-on: [runs-on,runner=8cpu-linux-x64,"run-id=${{ github.run_id }}"] + + env: + PYTHONPATH: ./backend + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: "pip" + cache-dependency-path: | + backend/requirements/default.txt + backend/requirements/dev.txt + + - name: Install Dependencies + run: | + python -m pip install --upgrade pip + pip install --retries 5 --timeout 30 -r backend/requirements/default.txt + pip install --retries 5 --timeout 30 -r backend/requirements/dev.txt + + - name: Run Tests + shell: script -q -e -c "bash --noprofile --norc -eo pipefail {0}" + run: | + py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/llm + py.test -o junit_family=xunit2 -xv --ff backend/tests/daily/embedding + + - name: Alert on Failure + if: failure() && github.event_name == 'schedule' + env: + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} + run: | + curl -X POST \ + -H 'Content-type: application/json' \ + --data '{"text":"Scheduled Model Tests failed! Check the run at: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}"}' \ + $SLACK_WEBHOOK diff --git a/.github/workflows/pr-python-tests.yml b/.github/workflows/pr-python-tests.yml index ce57a7a5814..5637300615b 100644 --- a/.github/workflows/pr-python-tests.yml +++ b/.github/workflows/pr-python-tests.yml @@ -21,7 +21,7 @@ jobs: uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.11' cache: 'pip' diff --git a/.github/workflows/pr-quality-checks.yml b/.github/workflows/pr-quality-checks.yml index 128317a79ce..3ba206669a6 100644 --- a/.github/workflows/pr-quality-checks.yml +++ b/.github/workflows/pr-quality-checks.yml @@ -18,6 +18,6 @@ jobs: - uses: actions/setup-python@v5 with: python-version: "3.11" - - uses: pre-commit/action@v3.0.0 + - uses: pre-commit/action@v3.0.1 with: extra_args: ${{ github.event_name == 'pull_request' && format('--from-ref {0} --to-ref {1}', github.event.pull_request.base.sha, github.event.pull_request.head.sha) || '' }} diff --git a/.github/workflows/tag-nightly.yml b/.github/workflows/tag-nightly.yml index bf2699d9fd4..50bb20808a3 100644 --- a/.github/workflows/tag-nightly.yml +++ b/.github/workflows/tag-nightly.yml @@ -2,7 +2,7 @@ name: Nightly Tag Push on: schedule: - - cron: '0 0 * * *' # Runs every day at midnight UTC + - cron: '0 10 * * *' # Runs every day at 2 AM PST / 3 AM PDT / 10 AM UTC permissions: contents: write # Allows pushing tags to the repository diff --git a/.vscode/launch.template.jsonc b/.vscode/launch.template.jsonc index c733800981c..87875907cd5 100644 --- a/.vscode/launch.template.jsonc +++ b/.vscode/launch.template.jsonc @@ -6,19 +6,69 @@ // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 "version": "0.2.0", "compounds": [ + { + // Dummy entry used to label the group + "name": "--- Compound ---", + "configurations": [ + "--- Individual ---" + ], + "presentation": { + "group": "1", + } + }, { "name": "Run All Danswer Services", "configurations": [ "Web Server", "Model Server", "API Server", - "Indexing", - "Background Jobs", - "Slack Bot" - ] - } + "Slack Bot", + "Celery primary", + "Celery light", + "Celery heavy", + "Celery indexing", + "Celery beat", + ], + "presentation": { + "group": "1", + } + }, + { + "name": "Web / Model / API", + "configurations": [ + "Web Server", + "Model Server", + "API Server", + ], + "presentation": { + "group": "1", + } + }, + { + "name": "Celery (all)", + "configurations": [ + "Celery primary", + "Celery light", + "Celery heavy", + "Celery indexing", + "Celery beat" + ], + "presentation": { + "group": "1", + } + } ], "configurations": [ + { + // Dummy entry used to label the group + "name": "--- Individual ---", + "type": "node", + "request": "launch", + "presentation": { + "group": "2", + "order": 0 + } + }, { "name": "Web Server", "type": "node", @@ -29,7 +79,11 @@ "runtimeArgs": [ "run", "dev" ], - "console": "integratedTerminal" + "presentation": { + "group": "2", + }, + "console": "integratedTerminal", + "consoleTitle": "Web Server Console" }, { "name": "Model Server", @@ -48,7 +102,11 @@ "--reload", "--port", "9000" - ] + ], + "presentation": { + "group": "2", + }, + "consoleTitle": "Model Server Console" }, { "name": "API Server", @@ -68,57 +126,171 @@ "--reload", "--port", "8080" - ] + ], + "presentation": { + "group": "2", + }, + "consoleTitle": "API Server Console" }, + // For the listener to access the Slack API, + // DANSWER_BOT_SLACK_APP_TOKEN & DANSWER_BOT_SLACK_BOT_TOKEN need to be set in .env file located in the root of the project { - "name": "Indexing", - "consoleName": "Indexing", + "name": "Slack Bot", + "consoleName": "Slack Bot", "type": "debugpy", "request": "launch", - "program": "danswer/background/update.py", + "program": "danswer/danswerbot/slack/listener.py", "cwd": "${workspaceFolder}/backend", "envFile": "${workspaceFolder}/.vscode/.env", "env": { - "ENABLE_MULTIPASS_INDEXING": "false", "LOG_LEVEL": "DEBUG", "PYTHONUNBUFFERED": "1", "PYTHONPATH": "." - } + }, + "presentation": { + "group": "2", + }, + "consoleTitle": "Slack Bot Console" }, - // Celery and all async jobs, usually would include indexing as well but this is handled separately above for dev { - "name": "Background Jobs", - "consoleName": "Background Jobs", + "name": "Celery primary", "type": "debugpy", "request": "launch", - "program": "scripts/dev_run_background_jobs.py", + "module": "celery", "cwd": "${workspaceFolder}/backend", "envFile": "${workspaceFolder}/.vscode/.env", "env": { - "LOG_DANSWER_MODEL_INTERACTIONS": "True", + "LOG_LEVEL": "INFO", + "PYTHONUNBUFFERED": "1", + "PYTHONPATH": "." + }, + "args": [ + "-A", + "danswer.background.celery.versioned_apps.primary", + "worker", + "--pool=threads", + "--concurrency=4", + "--prefetch-multiplier=1", + "--loglevel=INFO", + "--hostname=primary@%n", + "-Q", + "celery", + ], + "presentation": { + "group": "2", + }, + "consoleTitle": "Celery primary Console" + }, + { + "name": "Celery light", + "type": "debugpy", + "request": "launch", + "module": "celery", + "cwd": "${workspaceFolder}/backend", + "envFile": "${workspaceFolder}/.vscode/.env", + "env": { + "LOG_LEVEL": "INFO", + "PYTHONUNBUFFERED": "1", + "PYTHONPATH": "." + }, + "args": [ + "-A", + "danswer.background.celery.versioned_apps.light", + "worker", + "--pool=threads", + "--concurrency=64", + "--prefetch-multiplier=8", + "--loglevel=INFO", + "--hostname=light@%n", + "-Q", + "vespa_metadata_sync,connector_deletion", + ], + "presentation": { + "group": "2", + }, + "consoleTitle": "Celery light Console" + }, + { + "name": "Celery heavy", + "type": "debugpy", + "request": "launch", + "module": "celery", + "cwd": "${workspaceFolder}/backend", + "envFile": "${workspaceFolder}/.vscode/.env", + "env": { + "LOG_LEVEL": "INFO", + "PYTHONUNBUFFERED": "1", + "PYTHONPATH": "." + }, + "args": [ + "-A", + "danswer.background.celery.versioned_apps.heavy", + "worker", + "--pool=threads", + "--concurrency=4", + "--prefetch-multiplier=1", + "--loglevel=INFO", + "--hostname=heavy@%n", + "-Q", + "connector_pruning", + ], + "presentation": { + "group": "2", + }, + "consoleTitle": "Celery heavy Console" + }, + { + "name": "Celery indexing", + "type": "debugpy", + "request": "launch", + "module": "celery", + "cwd": "${workspaceFolder}/backend", + "envFile": "${workspaceFolder}/.vscode/.env", + "env": { + "ENABLE_MULTIPASS_INDEXING": "false", "LOG_LEVEL": "DEBUG", "PYTHONUNBUFFERED": "1", "PYTHONPATH": "." }, "args": [ - "--no-indexing" - ] + "-A", + "danswer.background.celery.versioned_apps.indexing", + "worker", + "--pool=threads", + "--concurrency=1", + "--prefetch-multiplier=1", + "--loglevel=INFO", + "--hostname=indexing@%n", + "-Q", + "connector_indexing", + ], + "presentation": { + "group": "2", + }, + "consoleTitle": "Celery indexing Console" }, - // For the listner to access the Slack API, - // DANSWER_BOT_SLACK_APP_TOKEN & DANSWER_BOT_SLACK_BOT_TOKEN need to be set in .env file located in the root of the project { - "name": "Slack Bot", - "consoleName": "Slack Bot", + "name": "Celery beat", "type": "debugpy", "request": "launch", - "program": "danswer/danswerbot/slack/listener.py", + "module": "celery", "cwd": "${workspaceFolder}/backend", "envFile": "${workspaceFolder}/.vscode/.env", "env": { "LOG_LEVEL": "DEBUG", "PYTHONUNBUFFERED": "1", "PYTHONPATH": "." - } + }, + "args": [ + "-A", + "danswer.background.celery.versioned_apps.beat", + "beat", + "--loglevel=INFO", + ], + "presentation": { + "group": "2", + }, + "consoleTitle": "Celery beat Console" }, { "name": "Pytest", @@ -137,8 +309,22 @@ "-v" // Specify a sepcific module/test to run or provide nothing to run all tests //"tests/unit/danswer/llm/answering/test_prune_and_merge.py" - ] + ], + "presentation": { + "group": "2", + }, + "consoleTitle": "Pytest Console" }, + { + // Dummy entry used to label the group + "name": "--- Tasks ---", + "type": "node", + "request": "launch", + "presentation": { + "group": "3", + "order": 0 + } + }, { "name": "Clear and Restart External Volumes and Containers", "type": "node", @@ -147,7 +333,27 @@ "runtimeArgs": ["${workspaceFolder}/backend/scripts/restart_containers.sh"], "cwd": "${workspaceFolder}", "console": "integratedTerminal", - "stopOnEntry": true - } + "stopOnEntry": true, + "presentation": { + "group": "3", + }, + }, + { + // Celery jobs launched through a single background script (legacy) + // Recommend using the "Celery (all)" compound launch instead. + "name": "Background Jobs", + "consoleName": "Background Jobs", + "type": "debugpy", + "request": "launch", + "program": "scripts/dev_run_background_jobs.py", + "cwd": "${workspaceFolder}/backend", + "envFile": "${workspaceFolder}/.vscode/.env", + "env": { + "LOG_DANSWER_MODEL_INTERACTIONS": "True", + "LOG_LEVEL": "DEBUG", + "PYTHONUNBUFFERED": "1", + "PYTHONPATH": "." + }, + }, ] } diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 3e4415188a1..779cabbb491 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -22,7 +22,7 @@ Your input is vital to making sure that Danswer moves in the right direction. Before starting on implementation, please raise a GitHub issue. And always feel free to message us (Chris Weaver / Yuhong Sun) on -[Slack](https://join.slack.com/t/danswer/shared_invite/zt-2afut44lv-Rw3kSWu6_OmdAXRpCv80DQ) / +[Slack](https://join.slack.com/t/danswer/shared_invite/zt-2lcmqw703-071hBuZBfNEOGUsLa5PXvQ) / [Discord](https://discord.gg/TDJ59cGV2X) directly about anything at all. diff --git a/README.md b/README.md index aff3cd57d5a..0cc95ff4464 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ +

@@ -68,13 +69,13 @@ We also have built-in support for deployment on Kubernetes. Files for that can b ## 🚧 Roadmap * Chat/Prompt sharing with specific teammates and user groups. -* Multi-Model model support, chat with images, video etc. +* Multimodal model support, chat with images, video etc. * Choosing between LLMs and parameters during chat session. * Tool calling and agent configurations options. * Organizational understanding and ability to locate and suggest experts from your team. -## Other Noteable Benefits of Danswer +## Other Notable Benefits of Danswer * User Authentication with document level access management. * Best in class Hybrid Search across all sources (BM-25 + prefix aware embedding models). * Admin Dashboard to configure connectors, document-sets, access, etc. @@ -127,3 +128,19 @@ To try the Danswer Enterprise Edition: ## 💡 Contributing Looking to contribute? Please check out the [Contribution Guide](CONTRIBUTING.md) for more details. + +## ⭐Star History + +[![Star History Chart](https://api.star-history.com/svg?repos=danswer-ai/danswer&type=Date)](https://star-history.com/#danswer-ai/danswer&Date) + +## ✨Contributors + + + contributors + + +

+ + ↑ Back to Top ↑ + +

diff --git a/backend/Dockerfile b/backend/Dockerfile index 89bacdc7020..d526e44cded 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -8,10 +8,11 @@ LABEL com.danswer.description="This image is the web/frontend container of Dansw founders@danswer.ai for more information. Please visit https://github.com/danswer-ai/danswer" # Default DANSWER_VERSION, typically overriden during builds by GitHub Actions. -ARG DANSWER_VERSION=0.3-dev +ARG DANSWER_VERSION=0.8-dev ENV DANSWER_VERSION=${DANSWER_VERSION} \ DANSWER_RUNNING_IN_DOCKER="true" + RUN echo "DANSWER_VERSION: ${DANSWER_VERSION}" # Install system dependencies # cmake needed for psycopg (postgres) @@ -36,6 +37,8 @@ RUN apt-get update && \ rm -rf /var/lib/apt/lists/* && \ apt-get clean + + # Install Python dependencies # Remove py which is pulled in by retry, py is not needed and is a CVE COPY ./requirements/default.txt /tmp/requirements.txt @@ -74,7 +77,6 @@ RUN apt-get update && \ RUN python -c "from tokenizers import Tokenizer; \ Tokenizer.from_pretrained('nomic-ai/nomic-embed-text-v1')" - # Pre-downloading NLTK for setups with limited egress RUN python -c "import nltk; \ nltk.download('stopwords', quiet=True); \ @@ -92,6 +94,7 @@ COPY supervisord.conf /etc/supervisor/conf.d/supervisord.conf COPY ./danswer /app/danswer COPY ./shared_configs /app/shared_configs COPY ./alembic /app/alembic +COPY ./alembic_tenants /app/alembic_tenants COPY ./alembic.ini /app/alembic.ini COPY supervisord.conf /usr/etc/supervisord.conf diff --git a/backend/Dockerfile.model_server b/backend/Dockerfile.model_server index 05a284a2baa..c7b6d2006d0 100644 --- a/backend/Dockerfile.model_server +++ b/backend/Dockerfile.model_server @@ -7,7 +7,7 @@ You can find it at https://hub.docker.com/r/danswer/danswer-model-server. For mo visit https://github.com/danswer-ai/danswer." # Default DANSWER_VERSION, typically overriden during builds by GitHub Actions. -ARG DANSWER_VERSION=0.3-dev +ARG DANSWER_VERSION=0.8-dev ENV DANSWER_VERSION=${DANSWER_VERSION} \ DANSWER_RUNNING_IN_DOCKER="true" diff --git a/backend/alembic.ini b/backend/alembic.ini index 10ae5cfdd27..599c46fadd7 100644 --- a/backend/alembic.ini +++ b/backend/alembic.ini @@ -1,6 +1,6 @@ # A generic, single database configuration. -[alembic] +[DEFAULT] # path to migration scripts script_location = alembic @@ -47,7 +47,8 @@ prepend_sys_path = . # version_path_separator = : # version_path_separator = ; # version_path_separator = space -version_path_separator = os # Use os.pathsep. Default configuration used for new projects. +version_path_separator = os +# Use os.pathsep. Default configuration used for new projects. # set to 'true' to search source files recursively # in each "version_locations" directory @@ -106,3 +107,12 @@ formatter = generic [formatter_generic] format = %(levelname)-5.5s [%(name)s] %(message)s datefmt = %H:%M:%S + + +[alembic] +script_location = alembic +version_locations = %(script_location)s/versions + +[schema_private] +script_location = alembic_tenants +version_locations = %(script_location)s/versions diff --git a/backend/alembic/env.py b/backend/alembic/env.py index 154d6ff3d66..019ea94b836 100644 --- a/backend/alembic/env.py +++ b/backend/alembic/env.py @@ -1,107 +1,203 @@ +from sqlalchemy.engine.base import Connection +from typing import Any import asyncio from logging.config import fileConfig +import logging from alembic import context -from danswer.db.engine import build_connection_string -from danswer.db.models import Base from sqlalchemy import pool -from sqlalchemy.engine import Connection from sqlalchemy.ext.asyncio import create_async_engine +from sqlalchemy.sql import text + +from shared_configs.configs import MULTI_TENANT +from danswer.db.engine import build_connection_string +from danswer.db.models import Base from celery.backends.database.session import ResultModelBase # type: ignore -from sqlalchemy.schema import SchemaItem +from danswer.db.engine import get_all_tenant_ids +from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA -# this is the Alembic Config object, which provides -# access to the values within the .ini file in use. +# Alembic Config object config = context.config # Interpret the config file for Python logging. -# This line sets up loggers basically. if config.config_file_name is not None and config.attributes.get( "configure_logger", True ): fileConfig(config.config_file_name) -# add your model's MetaData object here -# for 'autogenerate' support -# from myapp import mymodel -# target_metadata = mymodel.Base.metadata +# Add your model's MetaData object here for 'autogenerate' support target_metadata = [Base.metadata, ResultModelBase.metadata] -# other values from the config, defined by the needs of env.py, -# can be acquired: -# my_important_option = config.get_main_option("my_important_option") -# ... etc. - EXCLUDE_TABLES = {"kombu_queue", "kombu_message"} +# Set up logging +logger = logging.getLogger(__name__) + def include_object( - object: SchemaItem, - name: str, - type_: str, - reflected: bool, - compare_to: SchemaItem | None, + object: Any, name: str, type_: str, reflected: bool, compare_to: Any ) -> bool: + """ + Determines whether a database object should be included in migrations. + Excludes specified tables from migrations. + """ if type_ == "table" and name in EXCLUDE_TABLES: return False return True -def run_migrations_offline() -> None: - """Run migrations in 'offline' mode. - - This configures the context with just a URL - and not an Engine, though an Engine is acceptable - here as well. By skipping the Engine creation - we don't even need a DBAPI to be available. - - Calls to context.execute() here emit the given string to the - script output. - +def get_schema_options() -> tuple[str, bool, bool]: """ - url = build_connection_string() - context.configure( - url=url, - target_metadata=target_metadata, # type: ignore - literal_binds=True, - dialect_opts={"paramstyle": "named"}, - ) + Parses command-line options passed via '-x' in Alembic commands. + Recognizes 'schema', 'create_schema', and 'upgrade_all_tenants' options. + """ + x_args_raw = context.get_x_argument() + x_args = {} + for arg in x_args_raw: + for pair in arg.split(","): + if "=" in pair: + key, value = pair.split("=", 1) + x_args[key.strip()] = value.strip() + schema_name = x_args.get("schema", POSTGRES_DEFAULT_SCHEMA) + create_schema = x_args.get("create_schema", "true").lower() == "true" + upgrade_all_tenants = x_args.get("upgrade_all_tenants", "false").lower() == "true" + + if ( + MULTI_TENANT + and schema_name == POSTGRES_DEFAULT_SCHEMA + and not upgrade_all_tenants + ): + raise ValueError( + "Cannot run default migrations in public schema when multi-tenancy is enabled. " + "Please specify a tenant-specific schema." + ) + + return schema_name, create_schema, upgrade_all_tenants + + +def do_run_migrations( + connection: Connection, schema_name: str, create_schema: bool +) -> None: + """ + Executes migrations in the specified schema. + """ + logger.info(f"About to migrate schema: {schema_name}") - with context.begin_transaction(): - context.run_migrations() + if create_schema: + connection.execute(text(f'CREATE SCHEMA IF NOT EXISTS "{schema_name}"')) + connection.execute(text("COMMIT")) + # Set search_path to the target schema + connection.execute(text(f'SET search_path TO "{schema_name}"')) -def do_run_migrations(connection: Connection) -> None: context.configure( connection=connection, target_metadata=target_metadata, # type: ignore include_object=include_object, - ) # type: ignore + version_table_schema=schema_name, + include_schemas=True, + compare_type=True, + compare_server_default=True, + script_location=config.get_main_option("script_location"), + ) with context.begin_transaction(): context.run_migrations() async def run_async_migrations() -> None: - """In this scenario we need to create an Engine - and associate a connection with the context. - """ + Determines whether to run migrations for a single schema or all schemas, + and executes migrations accordingly. + """ + schema_name, create_schema, upgrade_all_tenants = get_schema_options() - connectable = create_async_engine( + engine = create_async_engine( build_connection_string(), poolclass=pool.NullPool, ) - async with connectable.connect() as connection: - await connection.run_sync(do_run_migrations) + if upgrade_all_tenants: + # Run migrations for all tenant schemas sequentially + tenant_schemas = get_all_tenant_ids() + + for schema in tenant_schemas: + try: + logger.info(f"Migrating schema: {schema}") + async with engine.connect() as connection: + await connection.run_sync( + do_run_migrations, + schema_name=schema, + create_schema=create_schema, + ) + except Exception as e: + logger.error(f"Error migrating schema {schema}: {e}") + raise + else: + try: + logger.info(f"Migrating schema: {schema_name}") + async with engine.connect() as connection: + await connection.run_sync( + do_run_migrations, + schema_name=schema_name, + create_schema=create_schema, + ) + except Exception as e: + logger.error(f"Error migrating schema {schema_name}: {e}") + raise + + await engine.dispose() - await connectable.dispose() +def run_migrations_offline() -> None: + """ + Run migrations in 'offline' mode. + """ + schema_name, _, upgrade_all_tenants = get_schema_options() + url = build_connection_string() -def run_migrations_online() -> None: - """Run migrations in 'online' mode.""" + if upgrade_all_tenants: + # Run offline migrations for all tenant schemas + engine = create_async_engine(url) + tenant_schemas = get_all_tenant_ids() + engine.sync_engine.dispose() + + for schema in tenant_schemas: + logger.info(f"Migrating schema: {schema}") + context.configure( + url=url, + target_metadata=target_metadata, # type: ignore + literal_binds=True, + include_object=include_object, + version_table_schema=schema, + include_schemas=True, + script_location=config.get_main_option("script_location"), + dialect_opts={"paramstyle": "named"}, + ) + + with context.begin_transaction(): + context.run_migrations() + else: + logger.info(f"Migrating schema: {schema_name}") + context.configure( + url=url, + target_metadata=target_metadata, # type: ignore + literal_binds=True, + include_object=include_object, + version_table_schema=schema_name, + include_schemas=True, + script_location=config.get_main_option("script_location"), + dialect_opts={"paramstyle": "named"}, + ) + + with context.begin_transaction(): + context.run_migrations() + +def run_migrations_online() -> None: + """ + Runs migrations in 'online' mode using an asynchronous engine. + """ asyncio.run(run_async_migrations()) diff --git a/backend/alembic/versions/1b10e1fda030_add_additional_data_to_notifications.py b/backend/alembic/versions/1b10e1fda030_add_additional_data_to_notifications.py new file mode 100644 index 00000000000..71c31e2c862 --- /dev/null +++ b/backend/alembic/versions/1b10e1fda030_add_additional_data_to_notifications.py @@ -0,0 +1,26 @@ +"""add additional data to notifications + +Revision ID: 1b10e1fda030 +Revises: 6756efa39ada +Create Date: 2024-10-15 19:26:44.071259 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "1b10e1fda030" +down_revision = "6756efa39ada" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.add_column( + "notification", sa.Column("additional_data", postgresql.JSONB(), nullable=True) + ) + + +def downgrade() -> None: + op.drop_column("notification", "additional_data") diff --git a/backend/alembic/versions/33cb72ea4d80_single_tool_call_per_message.py b/backend/alembic/versions/33cb72ea4d80_single_tool_call_per_message.py new file mode 100644 index 00000000000..0cd3da444bc --- /dev/null +++ b/backend/alembic/versions/33cb72ea4d80_single_tool_call_per_message.py @@ -0,0 +1,50 @@ +"""single tool call per message + +Revision ID: 33cb72ea4d80 +Revises: 5b29123cd710 +Create Date: 2024-11-01 12:51:01.535003 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "33cb72ea4d80" +down_revision = "5b29123cd710" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # Step 1: Delete extraneous ToolCall entries + # Keep only the ToolCall with the smallest 'id' for each 'message_id' + op.execute( + sa.text( + """ + DELETE FROM tool_call + WHERE id NOT IN ( + SELECT MIN(id) + FROM tool_call + WHERE message_id IS NOT NULL + GROUP BY message_id + ); + """ + ) + ) + + # Step 2: Add a unique constraint on message_id + op.create_unique_constraint( + constraint_name="uq_tool_call_message_id", + table_name="tool_call", + columns=["message_id"], + ) + + +def downgrade() -> None: + # Step 1: Drop the unique constraint on message_id + op.drop_constraint( + constraint_name="uq_tool_call_message_id", + table_name="tool_call", + type_="unique", + ) diff --git a/backend/alembic/versions/5b29123cd710_nullable_search_settings_for_historic_.py b/backend/alembic/versions/5b29123cd710_nullable_search_settings_for_historic_.py new file mode 100644 index 00000000000..58164cd4c14 --- /dev/null +++ b/backend/alembic/versions/5b29123cd710_nullable_search_settings_for_historic_.py @@ -0,0 +1,70 @@ +"""nullable search settings for historic index attempts + +Revision ID: 5b29123cd710 +Revises: 949b4a92a401 +Create Date: 2024-10-30 19:37:59.630704 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "5b29123cd710" +down_revision = "949b4a92a401" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # Drop the existing foreign key constraint + op.drop_constraint( + "fk_index_attempt_search_settings", "index_attempt", type_="foreignkey" + ) + + # Modify the column to be nullable + op.alter_column( + "index_attempt", "search_settings_id", existing_type=sa.INTEGER(), nullable=True + ) + + # Add back the foreign key with ON DELETE SET NULL + op.create_foreign_key( + "fk_index_attempt_search_settings", + "index_attempt", + "search_settings", + ["search_settings_id"], + ["id"], + ondelete="SET NULL", + ) + + +def downgrade() -> None: + # Warning: This will delete all index attempts that don't have search settings + op.execute( + """ + DELETE FROM index_attempt + WHERE search_settings_id IS NULL + """ + ) + + # Drop foreign key constraint + op.drop_constraint( + "fk_index_attempt_search_settings", "index_attempt", type_="foreignkey" + ) + + # Modify the column to be not nullable + op.alter_column( + "index_attempt", + "search_settings_id", + existing_type=sa.INTEGER(), + nullable=False, + ) + + # Add back the foreign key without ON DELETE SET NULL + op.create_foreign_key( + "fk_index_attempt_search_settings", + "index_attempt", + "search_settings", + ["search_settings_id"], + ["id"], + ) diff --git a/backend/alembic/versions/5d12a446f5c0_add_api_version_and_deployment_name_to_.py b/backend/alembic/versions/5d12a446f5c0_add_api_version_and_deployment_name_to_.py new file mode 100644 index 00000000000..85b5431ecc3 --- /dev/null +++ b/backend/alembic/versions/5d12a446f5c0_add_api_version_and_deployment_name_to_.py @@ -0,0 +1,30 @@ +"""add api_version and deployment_name to search settings + +Revision ID: 5d12a446f5c0 +Revises: e4334d5b33ba +Create Date: 2024-10-08 15:56:07.975636 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "5d12a446f5c0" +down_revision = "e4334d5b33ba" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.add_column( + "embedding_provider", sa.Column("api_version", sa.String(), nullable=True) + ) + op.add_column( + "embedding_provider", sa.Column("deployment_name", sa.String(), nullable=True) + ) + + +def downgrade() -> None: + op.drop_column("embedding_provider", "deployment_name") + op.drop_column("embedding_provider", "api_version") diff --git a/backend/alembic/versions/6756efa39ada_id_uuid_for_chat_session.py b/backend/alembic/versions/6756efa39ada_id_uuid_for_chat_session.py new file mode 100644 index 00000000000..083fececd87 --- /dev/null +++ b/backend/alembic/versions/6756efa39ada_id_uuid_for_chat_session.py @@ -0,0 +1,153 @@ +"""Migrate chat_session and chat_message tables to use UUID primary keys + +Revision ID: 6756efa39ada +Revises: 5d12a446f5c0 +Create Date: 2024-10-15 17:47:44.108537 + +""" +from alembic import op +import sqlalchemy as sa + +revision = "6756efa39ada" +down_revision = "5d12a446f5c0" +branch_labels = None +depends_on = None + +""" +This script: +1. Adds UUID columns to chat_session and chat_message +2. Populates new columns with UUIDs +3. Updates foreign key relationships +4. Removes old integer ID columns + +Note: Downgrade will assign new integer IDs, not restore original ones. +""" + + +def upgrade() -> None: + op.execute("CREATE EXTENSION IF NOT EXISTS pgcrypto;") + + op.add_column( + "chat_session", + sa.Column( + "new_id", + sa.UUID(as_uuid=True), + server_default=sa.text("gen_random_uuid()"), + nullable=False, + ), + ) + + op.execute("UPDATE chat_session SET new_id = gen_random_uuid();") + + op.add_column( + "chat_message", + sa.Column("new_chat_session_id", sa.UUID(as_uuid=True), nullable=True), + ) + + op.execute( + """ + UPDATE chat_message + SET new_chat_session_id = cs.new_id + FROM chat_session cs + WHERE chat_message.chat_session_id = cs.id; + """ + ) + + op.drop_constraint( + "chat_message_chat_session_id_fkey", "chat_message", type_="foreignkey" + ) + + op.drop_column("chat_message", "chat_session_id") + op.alter_column( + "chat_message", "new_chat_session_id", new_column_name="chat_session_id" + ) + + op.drop_constraint("chat_session_pkey", "chat_session", type_="primary") + op.drop_column("chat_session", "id") + op.alter_column("chat_session", "new_id", new_column_name="id") + + op.create_primary_key("chat_session_pkey", "chat_session", ["id"]) + + op.create_foreign_key( + "chat_message_chat_session_id_fkey", + "chat_message", + "chat_session", + ["chat_session_id"], + ["id"], + ondelete="CASCADE", + ) + + +def downgrade() -> None: + op.drop_constraint( + "chat_message_chat_session_id_fkey", "chat_message", type_="foreignkey" + ) + + op.add_column( + "chat_session", + sa.Column("old_id", sa.Integer, autoincrement=True, nullable=True), + ) + + op.execute("CREATE SEQUENCE chat_session_old_id_seq OWNED BY chat_session.old_id;") + op.execute( + "ALTER TABLE chat_session ALTER COLUMN old_id SET DEFAULT nextval('chat_session_old_id_seq');" + ) + + op.execute( + "UPDATE chat_session SET old_id = nextval('chat_session_old_id_seq') WHERE old_id IS NULL;" + ) + + op.alter_column("chat_session", "old_id", nullable=False) + + op.drop_constraint("chat_session_pkey", "chat_session", type_="primary") + op.create_primary_key("chat_session_pkey", "chat_session", ["old_id"]) + + op.add_column( + "chat_message", + sa.Column("old_chat_session_id", sa.Integer, nullable=True), + ) + + op.execute( + """ + UPDATE chat_message + SET old_chat_session_id = cs.old_id + FROM chat_session cs + WHERE chat_message.chat_session_id = cs.id; + """ + ) + + op.drop_column("chat_message", "chat_session_id") + op.alter_column( + "chat_message", "old_chat_session_id", new_column_name="chat_session_id" + ) + + op.create_foreign_key( + "chat_message_chat_session_id_fkey", + "chat_message", + "chat_session", + ["chat_session_id"], + ["old_id"], + ondelete="CASCADE", + ) + + op.drop_column("chat_session", "id") + op.alter_column("chat_session", "old_id", new_column_name="id") + + op.alter_column( + "chat_session", + "id", + type_=sa.Integer(), + existing_type=sa.Integer(), + existing_nullable=False, + existing_server_default=False, + ) + + # Rename the sequence + op.execute("ALTER SEQUENCE chat_session_old_id_seq RENAME TO chat_session_id_seq;") + + # Update the default value to use the renamed sequence + op.alter_column( + "chat_session", + "id", + server_default=sa.text("nextval('chat_session_id_seq'::regclass)"), + ) diff --git a/backend/alembic/versions/703313b75876_add_tokenratelimit_tables.py b/backend/alembic/versions/703313b75876_add_tokenratelimit_tables.py index ed1993efed3..9e1fdf3cb9e 100644 --- a/backend/alembic/versions/703313b75876_add_tokenratelimit_tables.py +++ b/backend/alembic/versions/703313b75876_add_tokenratelimit_tables.py @@ -9,7 +9,7 @@ from typing import cast from alembic import op import sqlalchemy as sa -from danswer.dynamic_configs.factory import get_dynamic_config_store +from danswer.key_value_store.factory import get_kv_store # revision identifiers, used by Alembic. revision = "703313b75876" @@ -54,9 +54,7 @@ def upgrade() -> None: ) try: - settings_json = cast( - str, get_dynamic_config_store().load("token_budget_settings") - ) + settings_json = cast(str, get_kv_store().load("token_budget_settings")) settings = json.loads(settings_json) is_enabled = settings.get("enable_token_budget", False) @@ -71,7 +69,7 @@ def upgrade() -> None: ) # Delete the dynamic config - get_dynamic_config_store().delete("token_budget_settings") + get_kv_store().delete("token_budget_settings") except Exception: # Ignore if the dynamic config is not found diff --git a/backend/alembic/versions/949b4a92a401_remove_rt.py b/backend/alembic/versions/949b4a92a401_remove_rt.py new file mode 100644 index 00000000000..6dbb7859cd7 --- /dev/null +++ b/backend/alembic/versions/949b4a92a401_remove_rt.py @@ -0,0 +1,74 @@ +"""remove rt + +Revision ID: 949b4a92a401 +Revises: 1b10e1fda030 +Create Date: 2024-10-26 13:06:06.937969 + +""" +from alembic import op +from sqlalchemy.orm import Session + +# Import your models and constants +from danswer.db.models import ( + Connector, + ConnectorCredentialPair, + Credential, + IndexAttempt, +) +from danswer.configs.constants import DocumentSource + + +# revision identifiers, used by Alembic. +revision = "949b4a92a401" +down_revision = "1b10e1fda030" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # Deletes all RequestTracker connectors and associated data + bind = op.get_bind() + session = Session(bind=bind) + + connectors_to_delete = ( + session.query(Connector) + .filter(Connector.source == DocumentSource.REQUESTTRACKER) + .all() + ) + + connector_ids = [connector.id for connector in connectors_to_delete] + + if connector_ids: + cc_pairs_to_delete = ( + session.query(ConnectorCredentialPair) + .filter(ConnectorCredentialPair.connector_id.in_(connector_ids)) + .all() + ) + + cc_pair_ids = [cc_pair.id for cc_pair in cc_pairs_to_delete] + + if cc_pair_ids: + session.query(IndexAttempt).filter( + IndexAttempt.connector_credential_pair_id.in_(cc_pair_ids) + ).delete(synchronize_session=False) + + session.query(ConnectorCredentialPair).filter( + ConnectorCredentialPair.id.in_(cc_pair_ids) + ).delete(synchronize_session=False) + + credential_ids = [cc_pair.credential_id for cc_pair in cc_pairs_to_delete] + if credential_ids: + session.query(Credential).filter(Credential.id.in_(credential_ids)).delete( + synchronize_session=False + ) + + session.query(Connector).filter(Connector.id.in_(connector_ids)).delete( + synchronize_session=False + ) + + session.commit() + + +def downgrade() -> None: + # No-op downgrade as we cannot restore deleted data + pass diff --git a/backend/alembic/versions/ac5eaac849f9_add_last_pruned_to_connector_table.py b/backend/alembic/versions/ac5eaac849f9_add_last_pruned_to_connector_table.py new file mode 100644 index 00000000000..b2c33e1688d --- /dev/null +++ b/backend/alembic/versions/ac5eaac849f9_add_last_pruned_to_connector_table.py @@ -0,0 +1,27 @@ +"""add last_pruned to the connector_credential_pair table + +Revision ID: ac5eaac849f9 +Revises: 52a219fb5233 +Create Date: 2024-09-10 15:04:26.437118 + +""" +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = "ac5eaac849f9" +down_revision = "46b7a812670f" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # last pruned represents the last time the connector was pruned + op.add_column( + "connector_credential_pair", + sa.Column("last_pruned", sa.DateTime(timezone=True), nullable=True), + ) + + +def downgrade() -> None: + op.drop_column("connector_credential_pair", "last_pruned") diff --git a/backend/alembic/versions/b082fec533f0_make_last_attempt_status_nullable.py b/backend/alembic/versions/b082fec533f0_make_last_attempt_status_nullable.py index a6938e365c6..db7b330c3e0 100644 --- a/backend/alembic/versions/b082fec533f0_make_last_attempt_status_nullable.py +++ b/backend/alembic/versions/b082fec533f0_make_last_attempt_status_nullable.py @@ -31,6 +31,12 @@ def upgrade() -> None: def downgrade() -> None: + # First, update any null values to a default value + op.execute( + "UPDATE connector_credential_pair SET last_attempt_status = 'NOT_STARTED' WHERE last_attempt_status IS NULL" + ) + + # Then, make the column non-nullable op.alter_column( "connector_credential_pair", "last_attempt_status", diff --git a/backend/alembic/versions/da4c21c69164_chosen_assistants_changed_to_jsonb.py b/backend/alembic/versions/da4c21c69164_chosen_assistants_changed_to_jsonb.py index 95b53cbeb41..8e0a8e6072d 100644 --- a/backend/alembic/versions/da4c21c69164_chosen_assistants_changed_to_jsonb.py +++ b/backend/alembic/versions/da4c21c69164_chosen_assistants_changed_to_jsonb.py @@ -20,7 +20,7 @@ def upgrade() -> None: conn = op.get_bind() existing_ids_and_chosen_assistants = conn.execute( - sa.text("select id, chosen_assistants from public.user") + sa.text('select id, chosen_assistants from "user"') ) op.drop_column( "user", @@ -37,7 +37,7 @@ def upgrade() -> None: for id, chosen_assistants in existing_ids_and_chosen_assistants: conn.execute( sa.text( - "update public.user set chosen_assistants = :chosen_assistants where id = :id" + 'update "user" set chosen_assistants = :chosen_assistants where id = :id' ), {"chosen_assistants": json.dumps(chosen_assistants), "id": id}, ) @@ -46,7 +46,7 @@ def upgrade() -> None: def downgrade() -> None: conn = op.get_bind() existing_ids_and_chosen_assistants = conn.execute( - sa.text("select id, chosen_assistants from public.user") + sa.text('select id, chosen_assistants from "user"') ) op.drop_column( "user", @@ -59,7 +59,7 @@ def downgrade() -> None: for id, chosen_assistants in existing_ids_and_chosen_assistants: conn.execute( sa.text( - "update public.user set chosen_assistants = :chosen_assistants where id = :id" + 'update "user" set chosen_assistants = :chosen_assistants where id = :id' ), {"chosen_assistants": chosen_assistants, "id": id}, ) diff --git a/backend/alembic/versions/e4334d5b33ba_add_deployment_name_to_llmprovider.py b/backend/alembic/versions/e4334d5b33ba_add_deployment_name_to_llmprovider.py new file mode 100644 index 00000000000..e837b87e3e0 --- /dev/null +++ b/backend/alembic/versions/e4334d5b33ba_add_deployment_name_to_llmprovider.py @@ -0,0 +1,26 @@ +"""add_deployment_name_to_llmprovider + +Revision ID: e4334d5b33ba +Revises: ac5eaac849f9 +Create Date: 2024-10-04 09:52:34.896867 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "e4334d5b33ba" +down_revision = "ac5eaac849f9" +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.add_column( + "llm_provider", sa.Column("deployment_name", sa.String(), nullable=True) + ) + + +def downgrade() -> None: + op.drop_column("llm_provider", "deployment_name") diff --git a/backend/alembic_tenants/README.md b/backend/alembic_tenants/README.md new file mode 100644 index 00000000000..f075b958305 --- /dev/null +++ b/backend/alembic_tenants/README.md @@ -0,0 +1,3 @@ +These files are for public table migrations when operating with multi tenancy. + +If you are not a Danswer developer, you can ignore this directory entirely. \ No newline at end of file diff --git a/backend/alembic_tenants/env.py b/backend/alembic_tenants/env.py new file mode 100644 index 00000000000..f0f1178ce09 --- /dev/null +++ b/backend/alembic_tenants/env.py @@ -0,0 +1,111 @@ +import asyncio +from logging.config import fileConfig + +from sqlalchemy import pool +from sqlalchemy.engine import Connection +from sqlalchemy.ext.asyncio import create_async_engine +from sqlalchemy.schema import SchemaItem + +from alembic import context +from danswer.db.engine import build_connection_string +from danswer.db.models import PublicBase + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +if config.config_file_name is not None and config.attributes.get( + "configure_logger", True +): + fileConfig(config.config_file_name) + +# add your model's MetaData object here +# for 'autogenerate' support +# from myapp import mymodel +# target_metadata = mymodel.Base.metadata +target_metadata = [PublicBase.metadata] + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + +EXCLUDE_TABLES = {"kombu_queue", "kombu_message"} + + +def include_object( + object: SchemaItem, + name: str, + type_: str, + reflected: bool, + compare_to: SchemaItem | None, +) -> bool: + if type_ == "table" and name in EXCLUDE_TABLES: + return False + return True + + +def run_migrations_offline() -> None: + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + url = build_connection_string() + context.configure( + url=url, + target_metadata=target_metadata, # type: ignore + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) + + with context.begin_transaction(): + context.run_migrations() + + +def do_run_migrations(connection: Connection) -> None: + context.configure( + connection=connection, + target_metadata=target_metadata, # type: ignore + include_object=include_object, + ) # type: ignore + + with context.begin_transaction(): + context.run_migrations() + + +async def run_async_migrations() -> None: + """In this scenario we need to create an Engine + and associate a connection with the context. + + """ + + connectable = create_async_engine( + build_connection_string(), + poolclass=pool.NullPool, + ) + + async with connectable.connect() as connection: + await connection.run_sync(do_run_migrations) + + await connectable.dispose() + + +def run_migrations_online() -> None: + """Run migrations in 'online' mode.""" + + asyncio.run(run_async_migrations()) + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/backend/alembic_tenants/script.py.mako b/backend/alembic_tenants/script.py.mako new file mode 100644 index 00000000000..55df2863d20 --- /dev/null +++ b/backend/alembic_tenants/script.py.mako @@ -0,0 +1,24 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision = ${repr(up_revision)} +down_revision = ${repr(down_revision)} +branch_labels = ${repr(branch_labels)} +depends_on = ${repr(depends_on)} + + +def upgrade() -> None: + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + ${downgrades if downgrades else "pass"} diff --git a/backend/alembic_tenants/versions/14a83a331951_create_usertenantmapping_table.py b/backend/alembic_tenants/versions/14a83a331951_create_usertenantmapping_table.py new file mode 100644 index 00000000000..f8f3016bab1 --- /dev/null +++ b/backend/alembic_tenants/versions/14a83a331951_create_usertenantmapping_table.py @@ -0,0 +1,24 @@ +import sqlalchemy as sa + +from alembic import op + +# revision identifiers, used by Alembic. +revision = "14a83a331951" +down_revision = None +branch_labels = None +depends_on = None + + +def upgrade() -> None: + op.create_table( + "user_tenant_mapping", + sa.Column("email", sa.String(), nullable=False), + sa.Column("tenant_id", sa.String(), nullable=False), + sa.UniqueConstraint("email", "tenant_id", name="uq_user_tenant"), + sa.UniqueConstraint("email", name="uq_email"), + schema="public", + ) + + +def downgrade() -> None: + op.drop_table("user_tenant_mapping", schema="public") diff --git a/backend/danswer/__init__.py b/backend/danswer/__init__.py index e2d480be4e6..ea33fd83292 100644 --- a/backend/danswer/__init__.py +++ b/backend/danswer/__init__.py @@ -1,3 +1,3 @@ import os -__version__ = os.environ.get("DANSWER_VERSION", "") or "0.3-dev" +__version__ = os.environ.get("DANSWER_VERSION", "") or "Development" diff --git a/backend/danswer/access/models.py b/backend/danswer/access/models.py index af5a021ca97..46b9c0efa93 100644 --- a/backend/danswer/access/models.py +++ b/backend/danswer/access/models.py @@ -70,3 +70,12 @@ def build( user_groups=set(user_groups), is_public=is_public, ) + + +default_public_access = DocumentAccess( + external_user_emails=set(), + external_user_group_ids=set(), + user_emails=set(), + user_groups=set(), + is_public=True, +) diff --git a/backend/danswer/auth/invited_users.py b/backend/danswer/auth/invited_users.py index efce858f265..15ec9abf50e 100644 --- a/backend/danswer/auth/invited_users.py +++ b/backend/danswer/auth/invited_users.py @@ -1,20 +1,21 @@ from typing import cast from danswer.configs.constants import KV_USER_STORE_KEY -from danswer.dynamic_configs.factory import get_dynamic_config_store -from danswer.dynamic_configs.interface import ConfigNotFoundError -from danswer.dynamic_configs.interface import JSON_ro +from danswer.key_value_store.factory import get_kv_store +from danswer.key_value_store.interface import JSON_ro +from danswer.key_value_store.interface import KvKeyNotFoundError def get_invited_users() -> list[str]: try: - store = get_dynamic_config_store() + store = get_kv_store() + return cast(list, store.load(KV_USER_STORE_KEY)) - except ConfigNotFoundError: + except KvKeyNotFoundError: return list() def write_invited_users(emails: list[str]) -> int: - store = get_dynamic_config_store() + store = get_kv_store() store.store(KV_USER_STORE_KEY, cast(JSON_ro, emails)) return len(emails) diff --git a/backend/danswer/auth/noauth_user.py b/backend/danswer/auth/noauth_user.py index 9520ef41c23..9eb589dbb25 100644 --- a/backend/danswer/auth/noauth_user.py +++ b/backend/danswer/auth/noauth_user.py @@ -4,29 +4,29 @@ from danswer.auth.schemas import UserRole from danswer.configs.constants import KV_NO_AUTH_USER_PREFERENCES_KEY -from danswer.dynamic_configs.store import ConfigNotFoundError -from danswer.dynamic_configs.store import DynamicConfigStore +from danswer.key_value_store.store import KeyValueStore +from danswer.key_value_store.store import KvKeyNotFoundError from danswer.server.manage.models import UserInfo from danswer.server.manage.models import UserPreferences def set_no_auth_user_preferences( - store: DynamicConfigStore, preferences: UserPreferences + store: KeyValueStore, preferences: UserPreferences ) -> None: store.store(KV_NO_AUTH_USER_PREFERENCES_KEY, preferences.model_dump()) -def load_no_auth_user_preferences(store: DynamicConfigStore) -> UserPreferences: +def load_no_auth_user_preferences(store: KeyValueStore) -> UserPreferences: try: preferences_data = cast( Mapping[str, Any], store.load(KV_NO_AUTH_USER_PREFERENCES_KEY) ) return UserPreferences(**preferences_data) - except ConfigNotFoundError: + except KvKeyNotFoundError: return UserPreferences(chosen_assistants=None, default_model=None) -def fetch_no_auth_user(store: DynamicConfigStore) -> UserInfo: +def fetch_no_auth_user(store: KeyValueStore) -> UserInfo: return UserInfo( id="__no_auth_user__", email="anonymous@danswer.ai", diff --git a/backend/danswer/auth/schemas.py b/backend/danswer/auth/schemas.py index db8a97ceb04..9c81899a421 100644 --- a/backend/danswer/auth/schemas.py +++ b/backend/danswer/auth/schemas.py @@ -34,6 +34,7 @@ class UserRead(schemas.BaseUser[uuid.UUID]): class UserCreate(schemas.BaseUserCreate): role: UserRole = UserRole.BASIC has_web_login: bool | None = True + tenant_id: str | None = None class UserUpdate(schemas.BaseUserUpdate): diff --git a/backend/danswer/auth/users.py b/backend/danswer/auth/users.py index a9ceb2254f0..c2e65b6f099 100644 --- a/backend/danswer/auth/users.py +++ b/backend/danswer/auth/users.py @@ -5,17 +5,23 @@ from datetime import timezone from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText +from typing import Dict +from typing import List from typing import Optional from typing import Tuple +import jwt from email_validator import EmailNotValidError +from email_validator import EmailUndeliverableError from email_validator import validate_email from fastapi import APIRouter from fastapi import Depends from fastapi import HTTPException +from fastapi import Query from fastapi import Request from fastapi import Response from fastapi import status +from fastapi.responses import RedirectResponse from fastapi.security import OAuth2PasswordRequestForm from fastapi_users import BaseUserManager from fastapi_users import exceptions @@ -25,11 +31,26 @@ from fastapi_users import UUIDIDMixin from fastapi_users.authentication import AuthenticationBackend from fastapi_users.authentication import CookieTransport +from fastapi_users.authentication import JWTStrategy from fastapi_users.authentication import Strategy from fastapi_users.authentication.strategy.db import AccessTokenDatabase from fastapi_users.authentication.strategy.db import DatabaseStrategy +from fastapi_users.exceptions import UserAlreadyExists +from fastapi_users.jwt import decode_jwt +from fastapi_users.jwt import generate_jwt +from fastapi_users.jwt import SecretType +from fastapi_users.manager import UserManagerDependency from fastapi_users.openapi import OpenAPIResponseType +from fastapi_users.router.common import ErrorCode +from fastapi_users.router.common import ErrorModel from fastapi_users_db_sqlalchemy import SQLAlchemyUserDatabase +from httpx_oauth.integrations.fastapi import OAuth2AuthorizeCallback +from httpx_oauth.oauth2 import BaseOAuth2 +from httpx_oauth.oauth2 import OAuth2Token +from pydantic import BaseModel +from sqlalchemy import select +from sqlalchemy import text +from sqlalchemy.orm import attributes from sqlalchemy.orm import Session from danswer.auth.invited_users import get_invited_users @@ -38,6 +59,7 @@ from danswer.auth.schemas import UserUpdate from danswer.configs.app_configs import AUTH_TYPE from danswer.configs.app_configs import DISABLE_AUTH +from danswer.configs.app_configs import DISABLE_VERIFICATION from danswer.configs.app_configs import EMAIL_FROM from danswer.configs.app_configs import REQUIRE_EMAIL_VERIFICATION from danswer.configs.app_configs import SESSION_EXPIRE_TIME_SECONDS @@ -57,15 +79,24 @@ from danswer.db.auth import get_default_admin_user_emails from danswer.db.auth import get_user_count from danswer.db.auth import get_user_db +from danswer.db.auth import SQLAlchemyUserAdminDB +from danswer.db.engine import get_async_session_with_tenant from danswer.db.engine import get_session +from danswer.db.engine import get_session_with_tenant from danswer.db.engine import get_sqlalchemy_engine from danswer.db.models import AccessToken +from danswer.db.models import OAuthAccount from danswer.db.models import User +from danswer.db.models import UserTenantMapping from danswer.db.users import get_user_by_email from danswer.utils.logger import setup_logger from danswer.utils.telemetry import optional_telemetry from danswer.utils.telemetry import RecordType from danswer.utils.variable_functionality import fetch_versioned_implementation +from shared_configs.configs import MULTI_TENANT +from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA +from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR + logger = setup_logger() @@ -104,7 +135,9 @@ def get_display_email(email: str | None, space_less: bool = False) -> str: def user_needs_to_be_verified() -> bool: # all other auth types besides basic should require users to be # verified - return AUTH_TYPE != AuthType.BASIC or REQUIRE_EMAIL_VERIFICATION + return not DISABLE_VERIFICATION and ( + AUTH_TYPE != AuthType.BASIC or REQUIRE_EMAIL_VERIFICATION + ) def verify_email_is_invited(email: str) -> None: @@ -115,7 +148,10 @@ def verify_email_is_invited(email: str) -> None: if not email: raise PermissionError("Email must be specified") - email_info = validate_email(email) # can raise EmailNotValidError + try: + email_info = validate_email(email) + except EmailUndeliverableError: + raise PermissionError("Email is not valid") for email_whitelist in whitelist: try: @@ -133,8 +169,8 @@ def verify_email_is_invited(email: str) -> None: raise PermissionError("User not on allowed user whitelist") -def verify_email_in_whitelist(email: str) -> None: - with Session(get_sqlalchemy_engine()) as db_session: +def verify_email_in_whitelist(email: str, tenant_id: str | None = None) -> None: + with get_session_with_tenant(tenant_id) as db_session: if not get_user_by_email(email, db_session): verify_email_is_invited(email) @@ -154,6 +190,20 @@ def verify_email_domain(email: str) -> None: ) +def get_tenant_id_for_email(email: str) -> str: + if not MULTI_TENANT: + return POSTGRES_DEFAULT_SCHEMA + # Implement logic to get tenant_id from the mapping table + with Session(get_sqlalchemy_engine()) as db_session: + result = db_session.execute( + select(UserTenantMapping.tenant_id).where(UserTenantMapping.email == email) + ) + tenant_id = result.scalar_one_or_none() + if tenant_id is None: + raise exceptions.UserNotExists() + return tenant_id + + def send_user_verification_email( user_email: str, token: str, @@ -198,25 +248,61 @@ async def create( # user_create.role = UserRole.BASIC user = None try: - user = await super().create(user_create, safe=safe, request=request) # type: ignore - except exceptions.UserAlreadyExists: - user = await self.get_by_email(user_create.email) - # Handle case where user has used product outside of web and is now creating an account through web - if ( - not user.has_web_login - and hasattr(user_create, "has_web_login") - and user_create.has_web_login - ): - user_update = UserUpdate( - password=user_create.password, - has_web_login=True, - role=user_create.role, - is_verified=user_create.is_verified, - ) - user = await self.update(user_update, user) - else: - raise exceptions.UserAlreadyExists() - return user + tenant_id = ( + get_tenant_id_for_email(user_create.email) + if MULTI_TENANT + else POSTGRES_DEFAULT_SCHEMA + ) + except exceptions.UserNotExists: + raise HTTPException(status_code=401, detail="User not found") + + if not tenant_id: + raise HTTPException( + status_code=401, detail="User does not belong to an organization" + ) + + async with get_async_session_with_tenant(tenant_id) as db_session: + token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id) + + verify_email_is_invited(user_create.email) + verify_email_domain(user_create.email) + if MULTI_TENANT: + tenant_user_db = SQLAlchemyUserAdminDB(db_session, User, OAuthAccount) + self.user_db = tenant_user_db + self.database = tenant_user_db + + if hasattr(user_create, "role"): + user_count = await get_user_count() + if ( + user_count == 0 + or user_create.email in get_default_admin_user_emails() + ): + user_create.role = UserRole.ADMIN + else: + user_create.role = UserRole.BASIC + user = None + try: + user = await super().create(user_create, safe=safe, request=request) # type: ignore + except exceptions.UserAlreadyExists: + user = await self.get_by_email(user_create.email) + # Handle case where user has used product outside of web and is now creating an account through web + if ( + not user.has_web_login + and hasattr(user_create, "has_web_login") + and user_create.has_web_login + ): + user_update = UserUpdate( + password=user_create.password, + has_web_login=True, + role=user_create.role, + is_verified=user_create.is_verified, + ) + user = await self.update(user_update, user) + else: + raise exceptions.UserAlreadyExists() + + CURRENT_TENANT_ID_CONTEXTVAR.reset(token) + return user async def oauth_callback( self: "BaseUserManager[models.UOAP, models.ID]", @@ -231,45 +317,118 @@ async def oauth_callback( associate_by_email: bool = False, is_verified_by_default: bool = False, ) -> models.UOAP: - verify_email_in_whitelist(account_email) - verify_email_domain(account_email) - - user = await super().oauth_callback( # type: ignore - oauth_name=oauth_name, - access_token=access_token, - account_id=account_id, - account_email=account_email, - expires_at=expires_at, - refresh_token=refresh_token, - request=request, - associate_by_email=associate_by_email, - is_verified_by_default=is_verified_by_default, - ) - - # NOTE: Most IdPs have very short expiry times, and we don't want to force the user to - # re-authenticate that frequently, so by default this is disabled - if expires_at and TRACK_EXTERNAL_IDP_EXPIRY: - oidc_expiry = datetime.fromtimestamp(expires_at, tz=timezone.utc) - await self.user_db.update(user, update_dict={"oidc_expiry": oidc_expiry}) - - # this is needed if an organization goes from `TRACK_EXTERNAL_IDP_EXPIRY=true` to `false` - # otherwise, the oidc expiry will always be old, and the user will never be able to login - if user.oidc_expiry and not TRACK_EXTERNAL_IDP_EXPIRY: - await self.user_db.update(user, update_dict={"oidc_expiry": None}) - - # Handle case where user has used product outside of web and is now creating an account through web - if not user.has_web_login: - await self.user_db.update( - user, - update_dict={ - "is_verified": is_verified_by_default, - "has_web_login": True, - }, + # Get tenant_id from mapping table + try: + tenant_id = ( + get_tenant_id_for_email(account_email) + if MULTI_TENANT + else POSTGRES_DEFAULT_SCHEMA ) - user.is_verified = is_verified_by_default - user.has_web_login = True + except exceptions.UserNotExists: + raise HTTPException(status_code=401, detail="User not found") + + if not tenant_id: + raise HTTPException(status_code=401, detail="User not found") + + token = None + async with get_async_session_with_tenant(tenant_id) as db_session: + token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id) + + verify_email_in_whitelist(account_email, tenant_id) + verify_email_domain(account_email) + + if MULTI_TENANT: + tenant_user_db = SQLAlchemyUserAdminDB(db_session, User, OAuthAccount) + self.user_db = tenant_user_db + self.database = tenant_user_db # type: ignore + + oauth_account_dict = { + "oauth_name": oauth_name, + "access_token": access_token, + "account_id": account_id, + "account_email": account_email, + "expires_at": expires_at, + "refresh_token": refresh_token, + } + + try: + # Attempt to get user by OAuth account + user = await self.get_by_oauth_account(oauth_name, account_id) + + except exceptions.UserNotExists: + try: + # Attempt to get user by email + user = await self.get_by_email(account_email) + if not associate_by_email: + raise exceptions.UserAlreadyExists() + + user = await self.user_db.add_oauth_account( + user, oauth_account_dict + ) + + # If user not found by OAuth account or email, create a new user + except exceptions.UserNotExists: + password = self.password_helper.generate() + user_dict = { + "email": account_email, + "hashed_password": self.password_helper.hash(password), + "is_verified": is_verified_by_default, + } + + user = await self.user_db.create(user_dict) + + # Explicitly set the Postgres schema for this session to ensure + # OAuth account creation happens in the correct tenant schema + await db_session.execute(text(f'SET search_path = "{tenant_id}"')) + user = await self.user_db.add_oauth_account( + user, oauth_account_dict + ) + await self.on_after_register(user, request) + + else: + for existing_oauth_account in user.oauth_accounts: + if ( + existing_oauth_account.account_id == account_id + and existing_oauth_account.oauth_name == oauth_name + ): + user = await self.user_db.update_oauth_account( + user, existing_oauth_account, oauth_account_dict + ) + + # NOTE: Most IdPs have very short expiry times, and we don't want to force the user to + # re-authenticate that frequently, so by default this is disabled + + if expires_at and TRACK_EXTERNAL_IDP_EXPIRY: + oidc_expiry = datetime.fromtimestamp(expires_at, tz=timezone.utc) + await self.user_db.update( + user, update_dict={"oidc_expiry": oidc_expiry} + ) + + # Handle case where user has used product outside of web and is now creating an account through web + if not user.has_web_login: # type: ignore + await self.user_db.update( + user, + { + "is_verified": is_verified_by_default, + "has_web_login": True, + }, + ) + user.is_verified = is_verified_by_default + user.has_web_login = True # type: ignore - return user + # this is needed if an organization goes from `TRACK_EXTERNAL_IDP_EXPIRY=true` to `false` + # otherwise, the oidc expiry will always be old, and the user will never be able to login + if ( + user.oidc_expiry is not None # type: ignore + and not TRACK_EXTERNAL_IDP_EXPIRY + ): + await self.user_db.update(user, {"oidc_expiry": None}) + user.oidc_expiry = None # type: ignore + + if token: + CURRENT_TENANT_ID_CONTEXTVAR.reset(token) + + return user async def on_after_register( self, user: User, request: Optional[Request] = None @@ -300,28 +459,50 @@ async def on_after_request_verify( async def authenticate( self, credentials: OAuth2PasswordRequestForm ) -> Optional[User]: - try: - user = await self.get_by_email(credentials.username) - except exceptions.UserNotExists: + email = credentials.username + + # Get tenant_id from mapping table + tenant_id = get_tenant_id_for_email(email) + if not tenant_id: + # User not found in mapping self.password_helper.hash(credentials.password) return None - if not user.has_web_login: - raise HTTPException( - status_code=status.HTTP_403_FORBIDDEN, - detail="NO_WEB_LOGIN_AND_HAS_NO_PASSWORD", + # Create a tenant-specific session + async with get_async_session_with_tenant(tenant_id) as tenant_session: + tenant_user_db: SQLAlchemyUserDatabase = SQLAlchemyUserDatabase( + tenant_session, User ) + self.user_db = tenant_user_db - verified, updated_password_hash = self.password_helper.verify_and_update( - credentials.password, user.hashed_password - ) - if not verified: - return None + # Proceed with authentication + try: + user = await self.get_by_email(email) - if updated_password_hash is not None: - await self.user_db.update(user, {"hashed_password": updated_password_hash}) + except exceptions.UserNotExists: + self.password_helper.hash(credentials.password) + return None - return user + has_web_login = attributes.get_attribute(user, "has_web_login") + + if not has_web_login: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="NO_WEB_LOGIN_AND_HAS_NO_PASSWORD", + ) + + verified, updated_password_hash = self.password_helper.verify_and_update( + credentials.password, user.hashed_password + ) + if not verified: + return None + + if updated_password_hash is not None: + await self.user_db.update( + user, {"hashed_password": updated_password_hash} + ) + + return user async def get_user_manager( @@ -336,21 +517,44 @@ async def get_user_manager( ) +# This strategy is used to add tenant_id to the JWT token +class TenantAwareJWTStrategy(JWTStrategy): + async def _create_token_data(self, user: User, impersonate: bool = False) -> dict: + tenant_id = get_tenant_id_for_email(user.email) + data = { + "sub": str(user.id), + "aud": self.token_audience, + "tenant_id": tenant_id, + } + return data + + async def write_token(self, user: User) -> str: + data = await self._create_token_data(user) + return generate_jwt( + data, self.encode_key, self.lifetime_seconds, algorithm=self.algorithm + ) + + +def get_jwt_strategy() -> TenantAwareJWTStrategy: + return TenantAwareJWTStrategy( + secret=USER_AUTH_SECRET, + lifetime_seconds=SESSION_EXPIRE_TIME_SECONDS, + ) + + def get_database_strategy( access_token_db: AccessTokenDatabase[AccessToken] = Depends(get_access_token_db), ) -> DatabaseStrategy: - strategy = DatabaseStrategy( + return DatabaseStrategy( access_token_db, lifetime_seconds=SESSION_EXPIRE_TIME_SECONDS # type: ignore ) - return strategy - auth_backend = AuthenticationBackend( - name="database", + name="jwt" if MULTI_TENANT else "database", transport=cookie_transport, - get_strategy=get_database_strategy, -) + get_strategy=get_jwt_strategy if MULTI_TENANT else get_database_strategy, # type: ignore +) # type: ignore class FastAPIUserWithLogoutRouter(FastAPIUsers[models.UP, models.ID]): @@ -364,9 +568,11 @@ def get_logout_router( This way the login router does not need to be included """ router = APIRouter() + get_current_user_token = self.authenticator.current_user_token( active=True, verified=requires_verification ) + logout_responses: OpenAPIResponseType = { **{ status.HTTP_401_UNAUTHORIZED: { @@ -413,8 +619,8 @@ async def optional_user_( async def optional_user( request: Request, - user: User | None = Depends(optional_fastapi_current_user), db_session: Session = Depends(get_session), + user: User | None = Depends(optional_fastapi_current_user), ) -> User | None: versioned_fetch_user = fetch_versioned_implementation( "danswer.auth.users", "optional_user_" @@ -505,3 +711,186 @@ async def current_admin_user(user: User | None = Depends(current_user)) -> User def get_default_admin_user_emails_() -> list[str]: # No default seeding available for Danswer MIT return [] + + +STATE_TOKEN_AUDIENCE = "fastapi-users:oauth-state" + + +class OAuth2AuthorizeResponse(BaseModel): + authorization_url: str + + +def generate_state_token( + data: Dict[str, str], secret: SecretType, lifetime_seconds: int = 3600 +) -> str: + data["aud"] = STATE_TOKEN_AUDIENCE + + return generate_jwt(data, secret, lifetime_seconds) + + +# refer to https://github.com/fastapi-users/fastapi-users/blob/42ddc241b965475390e2bce887b084152ae1a2cd/fastapi_users/fastapi_users.py#L91 + + +def create_danswer_oauth_router( + oauth_client: BaseOAuth2, + backend: AuthenticationBackend, + state_secret: SecretType, + redirect_url: Optional[str] = None, + associate_by_email: bool = False, + is_verified_by_default: bool = False, +) -> APIRouter: + return get_oauth_router( + oauth_client, + backend, + get_user_manager, + state_secret, + redirect_url, + associate_by_email, + is_verified_by_default, + ) + + +def get_oauth_router( + oauth_client: BaseOAuth2, + backend: AuthenticationBackend, + get_user_manager: UserManagerDependency[models.UP, models.ID], + state_secret: SecretType, + redirect_url: Optional[str] = None, + associate_by_email: bool = False, + is_verified_by_default: bool = False, +) -> APIRouter: + """Generate a router with the OAuth routes.""" + router = APIRouter() + callback_route_name = f"oauth:{oauth_client.name}.{backend.name}.callback" + + if redirect_url is not None: + oauth2_authorize_callback = OAuth2AuthorizeCallback( + oauth_client, + redirect_url=redirect_url, + ) + else: + oauth2_authorize_callback = OAuth2AuthorizeCallback( + oauth_client, + route_name=callback_route_name, + ) + + @router.get( + "/authorize", + name=f"oauth:{oauth_client.name}.{backend.name}.authorize", + response_model=OAuth2AuthorizeResponse, + ) + async def authorize( + request: Request, scopes: List[str] = Query(None) + ) -> OAuth2AuthorizeResponse: + if redirect_url is not None: + authorize_redirect_url = redirect_url + else: + authorize_redirect_url = str(request.url_for(callback_route_name)) + + next_url = request.query_params.get("next", "/") + state_data: Dict[str, str] = {"next_url": next_url} + state = generate_state_token(state_data, state_secret) + authorization_url = await oauth_client.get_authorization_url( + authorize_redirect_url, + state, + scopes, + ) + + return OAuth2AuthorizeResponse(authorization_url=authorization_url) + + @router.get( + "/callback", + name=callback_route_name, + description="The response varies based on the authentication backend used.", + responses={ + status.HTTP_400_BAD_REQUEST: { + "model": ErrorModel, + "content": { + "application/json": { + "examples": { + "INVALID_STATE_TOKEN": { + "summary": "Invalid state token.", + "value": None, + }, + ErrorCode.LOGIN_BAD_CREDENTIALS: { + "summary": "User is inactive.", + "value": {"detail": ErrorCode.LOGIN_BAD_CREDENTIALS}, + }, + } + } + }, + }, + }, + ) + async def callback( + request: Request, + access_token_state: Tuple[OAuth2Token, str] = Depends( + oauth2_authorize_callback + ), + user_manager: BaseUserManager[models.UP, models.ID] = Depends(get_user_manager), + strategy: Strategy[models.UP, models.ID] = Depends(backend.get_strategy), + ) -> RedirectResponse: + token, state = access_token_state + account_id, account_email = await oauth_client.get_id_email( + token["access_token"] + ) + + if account_email is None: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ErrorCode.OAUTH_NOT_AVAILABLE_EMAIL, + ) + + try: + state_data = decode_jwt(state, state_secret, [STATE_TOKEN_AUDIENCE]) + except jwt.DecodeError: + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST) + + next_url = state_data.get("next_url", "/") + + # Authenticate user + try: + user = await user_manager.oauth_callback( + oauth_client.name, + token["access_token"], + account_id, + account_email, + token.get("expires_at"), + token.get("refresh_token"), + request, + associate_by_email=associate_by_email, + is_verified_by_default=is_verified_by_default, + ) + except UserAlreadyExists: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ErrorCode.OAUTH_USER_ALREADY_EXISTS, + ) + + if not user.is_active: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=ErrorCode.LOGIN_BAD_CREDENTIALS, + ) + + # Login user + response = await backend.login(strategy, user) + await user_manager.on_after_login(user, request, response) + + # Prepare redirect response + redirect_response = RedirectResponse(next_url, status_code=302) + + # Copy headers and other attributes from 'response' to 'redirect_response' + for header_name, header_value in response.headers.items(): + redirect_response.headers[header_name] = header_value + + if hasattr(response, "body"): + redirect_response.body = response.body + if hasattr(response, "status_code"): + redirect_response.status_code = response.status_code + if hasattr(response, "media_type"): + redirect_response.media_type = response.media_type + + return redirect_response + + return router diff --git a/backend/danswer/background/celery/apps/app_base.py b/backend/danswer/background/celery/apps/app_base.py new file mode 100644 index 00000000000..117293f1c2f --- /dev/null +++ b/backend/danswer/background/celery/apps/app_base.py @@ -0,0 +1,289 @@ +import logging +import multiprocessing +import time +from typing import Any + +import sentry_sdk +from celery import Task +from celery.app import trace +from celery.exceptions import WorkerShutdown +from celery.states import READY_STATES +from celery.utils.log import get_task_logger +from celery.worker import strategy # type: ignore +from sentry_sdk.integrations.celery import CeleryIntegration + +from danswer.background.celery.apps.task_formatters import CeleryTaskColoredFormatter +from danswer.background.celery.apps.task_formatters import CeleryTaskPlainFormatter +from danswer.background.celery.celery_utils import celery_is_worker_primary +from danswer.configs.constants import DanswerRedisLocks +from danswer.redis.redis_connector import RedisConnector +from danswer.redis.redis_connector_credential_pair import RedisConnectorCredentialPair +from danswer.redis.redis_connector_delete import RedisConnectorDelete +from danswer.redis.redis_connector_prune import RedisConnectorPrune +from danswer.redis.redis_document_set import RedisDocumentSet +from danswer.redis.redis_pool import get_redis_client +from danswer.redis.redis_usergroup import RedisUserGroup +from danswer.utils.logger import ColoredFormatter +from danswer.utils.logger import PlainFormatter +from danswer.utils.logger import setup_logger +from shared_configs.configs import MULTI_TENANT +from shared_configs.configs import SENTRY_DSN + + +logger = setup_logger() + +task_logger = get_task_logger(__name__) + +if SENTRY_DSN: + sentry_sdk.init( + dsn=SENTRY_DSN, + integrations=[CeleryIntegration()], + traces_sample_rate=0.1, + ) + logger.info("Sentry initialized") +else: + logger.debug("Sentry DSN not provided, skipping Sentry initialization") + + +def on_task_prerun( + sender: Any | None = None, + task_id: str | None = None, + task: Task | None = None, + args: tuple | None = None, + kwargs: dict | None = None, + **kwds: Any, +) -> None: + pass + + +def on_task_postrun( + sender: Any | None = None, + task_id: str | None = None, + task: Task | None = None, + args: tuple | None = None, + kwargs: dict[str, Any] | None = None, + retval: Any | None = None, + state: str | None = None, + **kwds: Any, +) -> None: + """We handle this signal in order to remove completed tasks + from their respective tasksets. This allows us to track the progress of document set + and user group syncs. + + This function runs after any task completes (both success and failure) + Note that this signal does not fire on a task that failed to complete and is going + to be retried. + + This also does not fire if a worker with acks_late=False crashes (which all of our + long running workers are) + """ + if not task: + return + + task_logger.debug(f"Task {task.name} (ID: {task_id}) completed with state: {state}") + + if state not in READY_STATES: + return + + if not task_id: + return + + # Get tenant_id directly from kwargs- each celery task has a tenant_id kwarg + if not kwargs: + logger.error(f"Task {task.name} (ID: {task_id}) is missing kwargs") + tenant_id = None + else: + tenant_id = kwargs.get("tenant_id") + + task_logger.debug( + f"Task {task.name} (ID: {task_id}) completed with state: {state} " + f"{f'for tenant_id={tenant_id}' if tenant_id else ''}" + ) + + r = get_redis_client(tenant_id=tenant_id) + + if task_id.startswith(RedisConnectorCredentialPair.PREFIX): + r.srem(RedisConnectorCredentialPair.get_taskset_key(), task_id) + return + + if task_id.startswith(RedisDocumentSet.PREFIX): + document_set_id = RedisDocumentSet.get_id_from_task_id(task_id) + if document_set_id is not None: + rds = RedisDocumentSet(tenant_id, int(document_set_id)) + r.srem(rds.taskset_key, task_id) + return + + if task_id.startswith(RedisUserGroup.PREFIX): + usergroup_id = RedisUserGroup.get_id_from_task_id(task_id) + if usergroup_id is not None: + rug = RedisUserGroup(tenant_id, int(usergroup_id)) + r.srem(rug.taskset_key, task_id) + return + + if task_id.startswith(RedisConnectorDelete.PREFIX): + cc_pair_id = RedisConnector.get_id_from_task_id(task_id) + if cc_pair_id is not None: + RedisConnectorDelete.remove_from_taskset(int(cc_pair_id), task_id, r) + return + + if task_id.startswith(RedisConnectorPrune.SUBTASK_PREFIX): + cc_pair_id = RedisConnector.get_id_from_task_id(task_id) + if cc_pair_id is not None: + RedisConnectorPrune.remove_from_taskset(int(cc_pair_id), task_id, r) + return + + +def on_celeryd_init(sender: Any = None, conf: Any = None, **kwargs: Any) -> None: + """The first signal sent on celery worker startup""" + multiprocessing.set_start_method("spawn") # fork is unsafe, set to spawn + + +def wait_for_redis(sender: Any, **kwargs: Any) -> None: + r = get_redis_client(tenant_id=None) + + WAIT_INTERVAL = 5 + WAIT_LIMIT = 60 + + time_start = time.monotonic() + logger.info("Redis: Readiness check starting.") + while True: + try: + if r.ping(): + break + except Exception: + pass + + time_elapsed = time.monotonic() - time_start + logger.info( + f"Redis: Ping failed. elapsed={time_elapsed:.1f} timeout={WAIT_LIMIT:.1f}" + ) + if time_elapsed > WAIT_LIMIT: + msg = ( + f"Redis: Readiness check did not succeed within the timeout " + f"({WAIT_LIMIT} seconds). Exiting..." + ) + logger.error(msg) + raise WorkerShutdown(msg) + + time.sleep(WAIT_INTERVAL) + + logger.info("Redis: Readiness check succeeded. Continuing...") + return + + +def on_secondary_worker_init(sender: Any, **kwargs: Any) -> None: + logger.info("Running as a secondary celery worker.") + + # Exit early if multi-tenant since primary worker check not needed + if MULTI_TENANT: + return + + # Set up variables for waiting on primary worker + WAIT_INTERVAL = 5 + WAIT_LIMIT = 60 + r = get_redis_client(tenant_id=None) + time_start = time.monotonic() + + logger.info("Waiting for primary worker to be ready...") + while True: + if r.exists(DanswerRedisLocks.PRIMARY_WORKER): + break + + time_elapsed = time.monotonic() - time_start + logger.info( + f"Primary worker is not ready yet. elapsed={time_elapsed:.1f} timeout={WAIT_LIMIT:.1f}" + ) + if time_elapsed > WAIT_LIMIT: + msg = ( + f"Primary worker was not ready within the timeout. " + f"({WAIT_LIMIT} seconds). Exiting..." + ) + logger.error(msg) + raise WorkerShutdown(msg) + + time.sleep(WAIT_INTERVAL) + + logger.info("Wait for primary worker completed successfully. Continuing...") + return + + +def on_worker_ready(sender: Any, **kwargs: Any) -> None: + task_logger.info("worker_ready signal received.") + + +def on_worker_shutdown(sender: Any, **kwargs: Any) -> None: + if not celery_is_worker_primary(sender): + return + + if not sender.primary_worker_lock: + return + + logger.info("Releasing primary worker lock.") + lock = sender.primary_worker_lock + try: + if lock.owned(): + try: + lock.release() + sender.primary_worker_lock = None + except Exception as e: + logger.error(f"Failed to release primary worker lock: {e}") + except Exception as e: + logger.error(f"Failed to check if primary worker lock is owned: {e}") + + +def on_setup_logging( + loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any +) -> None: + # TODO: could unhardcode format and colorize and accept these as options from + # celery's config + + # reformats the root logger + root_logger = logging.getLogger() + + root_handler = logging.StreamHandler() # Set up a handler for the root logger + root_formatter = ColoredFormatter( + "%(asctime)s %(filename)30s %(lineno)4s: %(message)s", + datefmt="%m/%d/%Y %I:%M:%S %p", + ) + root_handler.setFormatter(root_formatter) + root_logger.addHandler(root_handler) # Apply the handler to the root logger + + if logfile: + root_file_handler = logging.FileHandler(logfile) + root_file_formatter = PlainFormatter( + "%(asctime)s %(filename)30s %(lineno)4s: %(message)s", + datefmt="%m/%d/%Y %I:%M:%S %p", + ) + root_file_handler.setFormatter(root_file_formatter) + root_logger.addHandler(root_file_handler) + + root_logger.setLevel(loglevel) + + # reformats celery's task logger + task_formatter = CeleryTaskColoredFormatter( + "%(asctime)s %(filename)30s %(lineno)4s: %(message)s", + datefmt="%m/%d/%Y %I:%M:%S %p", + ) + task_handler = logging.StreamHandler() # Set up a handler for the task logger + task_handler.setFormatter(task_formatter) + task_logger.addHandler(task_handler) # Apply the handler to the task logger + + if logfile: + task_file_handler = logging.FileHandler(logfile) + task_file_formatter = CeleryTaskPlainFormatter( + "%(asctime)s %(filename)30s %(lineno)4s: %(message)s", + datefmt="%m/%d/%Y %I:%M:%S %p", + ) + task_file_handler.setFormatter(task_file_formatter) + task_logger.addHandler(task_file_handler) + + task_logger.setLevel(loglevel) + task_logger.propagate = False + + # hide celery task received spam + # e.g. "Task check_for_pruning[a1e96171-0ba8-4e00-887b-9fbf7442eab3] received" + strategy.logger.setLevel(logging.WARNING) + + # hide celery task succeeded/failed spam + # e.g. "Task check_for_pruning[a1e96171-0ba8-4e00-887b-9fbf7442eab3] succeeded in 0.03137450001668185s: None" + trace.logger.setLevel(logging.WARNING) diff --git a/backend/danswer/background/celery/apps/beat.py b/backend/danswer/background/celery/apps/beat.py new file mode 100644 index 00000000000..5ef887121dc --- /dev/null +++ b/backend/danswer/background/celery/apps/beat.py @@ -0,0 +1,156 @@ +from datetime import timedelta +from typing import Any + +from celery import Celery +from celery import signals +from celery.beat import PersistentScheduler # type: ignore +from celery.signals import beat_init + +import danswer.background.celery.apps.app_base as app_base +from danswer.configs.constants import POSTGRES_CELERY_BEAT_APP_NAME +from danswer.db.engine import get_all_tenant_ids +from danswer.db.engine import SqlEngine +from danswer.utils.logger import setup_logger +from danswer.utils.variable_functionality import fetch_versioned_implementation + +logger = setup_logger(__name__) + +celery_app = Celery(__name__) +celery_app.config_from_object("danswer.background.celery.configs.beat") + + +class DynamicTenantScheduler(PersistentScheduler): + def __init__(self, *args: Any, **kwargs: Any) -> None: + logger.info("Initializing DynamicTenantScheduler") + super().__init__(*args, **kwargs) + self._reload_interval = timedelta(minutes=2) + self._last_reload = self.app.now() - self._reload_interval + # Let the parent class handle store initialization + self.setup_schedule() + self._update_tenant_tasks() + logger.info(f"Set reload interval to {self._reload_interval}") + + def setup_schedule(self) -> None: + logger.info("Setting up initial schedule") + super().setup_schedule() + logger.info("Initial schedule setup complete") + + def tick(self) -> float: + retval = super().tick() + now = self.app.now() + if ( + self._last_reload is None + or (now - self._last_reload) > self._reload_interval + ): + logger.info("Reload interval reached, initiating tenant task update") + self._update_tenant_tasks() + self._last_reload = now + logger.info("Tenant task update completed, reset reload timer") + return retval + + def _update_tenant_tasks(self) -> None: + logger.info("Starting tenant task update process") + try: + logger.info("Fetching all tenant IDs") + tenant_ids = get_all_tenant_ids() + logger.info(f"Found {len(tenant_ids)} tenants") + + logger.info("Fetching tasks to schedule") + tasks_to_schedule = fetch_versioned_implementation( + "danswer.background.celery.tasks.beat_schedule", "get_tasks_to_schedule" + ) + + new_beat_schedule: dict[str, dict[str, Any]] = {} + + current_schedule = self.schedule.items() + + existing_tenants = set() + for task_name, _ in current_schedule: + if "-" in task_name: + existing_tenants.add(task_name.split("-")[-1]) + logger.info(f"Found {len(existing_tenants)} existing tenants in schedule") + + for tenant_id in tenant_ids: + if tenant_id not in existing_tenants: + logger.info(f"Processing new tenant: {tenant_id}") + + for task in tasks_to_schedule(): + task_name = f"{task['name']}-{tenant_id}" + logger.debug(f"Creating task configuration for {task_name}") + new_task = { + "task": task["task"], + "schedule": task["schedule"], + "kwargs": {"tenant_id": tenant_id}, + } + if options := task.get("options"): + logger.debug(f"Adding options to task {task_name}: {options}") + new_task["options"] = options + new_beat_schedule[task_name] = new_task + + if self._should_update_schedule(current_schedule, new_beat_schedule): + logger.info( + "Schedule update required", + extra={ + "new_tasks": len(new_beat_schedule), + "current_tasks": len(current_schedule), + }, + ) + + # Create schedule entries + entries = {} + for name, entry in new_beat_schedule.items(): + entries[name] = self.Entry( + name=name, + app=self.app, + task=entry["task"], + schedule=entry["schedule"], + options=entry.get("options", {}), + kwargs=entry.get("kwargs", {}), + ) + + # Update the schedule using the scheduler's methods + self.schedule.clear() + self.schedule.update(entries) + + # Ensure changes are persisted + self.sync() + + logger.info("Schedule update completed successfully") + else: + logger.info("Schedule is up to date, no changes needed") + + except (AttributeError, KeyError) as e: + logger.exception(f"Failed to process task configuration: {str(e)}") + except Exception as e: + logger.exception(f"Unexpected error updating tenant tasks: {str(e)}") + + def _should_update_schedule( + self, current_schedule: dict, new_schedule: dict + ) -> bool: + """Compare schedules to determine if an update is needed.""" + logger.debug("Comparing current and new schedules") + current_tasks = set(name for name, _ in current_schedule) + new_tasks = set(new_schedule.keys()) + needs_update = current_tasks != new_tasks + logger.debug(f"Schedule update needed: {needs_update}") + return needs_update + + +@beat_init.connect +def on_beat_init(sender: Any, **kwargs: Any) -> None: + logger.info("beat_init signal received.") + + # Celery beat shouldn't touch the db at all. But just setting a low minimum here. + SqlEngine.set_app_name(POSTGRES_CELERY_BEAT_APP_NAME) + SqlEngine.init_engine(pool_size=2, max_overflow=0) + app_base.wait_for_redis(sender, **kwargs) + + +@signals.setup_logging.connect +def on_setup_logging( + loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any +) -> None: + app_base.on_setup_logging(loglevel, logfile, format, colorize, **kwargs) + + +celery_app.conf.beat_scheduler = DynamicTenantScheduler diff --git a/backend/danswer/background/celery/apps/heavy.py b/backend/danswer/background/celery/apps/heavy.py new file mode 100644 index 00000000000..4bd95162b3a --- /dev/null +++ b/backend/danswer/background/celery/apps/heavy.py @@ -0,0 +1,88 @@ +import multiprocessing +from typing import Any + +from celery import Celery +from celery import signals +from celery import Task +from celery.signals import celeryd_init +from celery.signals import worker_init +from celery.signals import worker_ready +from celery.signals import worker_shutdown + +import danswer.background.celery.apps.app_base as app_base +from danswer.configs.constants import POSTGRES_CELERY_WORKER_HEAVY_APP_NAME +from danswer.db.engine import SqlEngine +from danswer.utils.logger import setup_logger + + +logger = setup_logger() + +celery_app = Celery(__name__) +celery_app.config_from_object("danswer.background.celery.configs.heavy") + + +@signals.task_prerun.connect +def on_task_prerun( + sender: Any | None = None, + task_id: str | None = None, + task: Task | None = None, + args: tuple | None = None, + kwargs: dict | None = None, + **kwds: Any, +) -> None: + app_base.on_task_prerun(sender, task_id, task, args, kwargs, **kwds) + + +@signals.task_postrun.connect +def on_task_postrun( + sender: Any | None = None, + task_id: str | None = None, + task: Task | None = None, + args: tuple | None = None, + kwargs: dict | None = None, + retval: Any | None = None, + state: str | None = None, + **kwds: Any, +) -> None: + app_base.on_task_postrun(sender, task_id, task, args, kwargs, retval, state, **kwds) + + +@celeryd_init.connect +def on_celeryd_init(sender: Any = None, conf: Any = None, **kwargs: Any) -> None: + app_base.on_celeryd_init(sender, conf, **kwargs) + + +@worker_init.connect +def on_worker_init(sender: Any, **kwargs: Any) -> None: + logger.info("worker_init signal received.") + logger.info(f"Multiprocessing start method: {multiprocessing.get_start_method()}") + + SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_HEAVY_APP_NAME) + SqlEngine.init_engine(pool_size=4, max_overflow=12) + + app_base.wait_for_redis(sender, **kwargs) + app_base.on_secondary_worker_init(sender, **kwargs) + + +@worker_ready.connect +def on_worker_ready(sender: Any, **kwargs: Any) -> None: + app_base.on_worker_ready(sender, **kwargs) + + +@worker_shutdown.connect +def on_worker_shutdown(sender: Any, **kwargs: Any) -> None: + app_base.on_worker_shutdown(sender, **kwargs) + + +@signals.setup_logging.connect +def on_setup_logging( + loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any +) -> None: + app_base.on_setup_logging(loglevel, logfile, format, colorize, **kwargs) + + +celery_app.autodiscover_tasks( + [ + "danswer.background.celery.tasks.pruning", + ] +) diff --git a/backend/danswer/background/celery/apps/indexing.py b/backend/danswer/background/celery/apps/indexing.py new file mode 100644 index 00000000000..a981694e12e --- /dev/null +++ b/backend/danswer/background/celery/apps/indexing.py @@ -0,0 +1,88 @@ +import multiprocessing +from typing import Any + +from celery import Celery +from celery import signals +from celery import Task +from celery.signals import celeryd_init +from celery.signals import worker_init +from celery.signals import worker_ready +from celery.signals import worker_shutdown + +import danswer.background.celery.apps.app_base as app_base +from danswer.configs.constants import POSTGRES_CELERY_WORKER_INDEXING_APP_NAME +from danswer.db.engine import SqlEngine +from danswer.utils.logger import setup_logger + + +logger = setup_logger() + +celery_app = Celery(__name__) +celery_app.config_from_object("danswer.background.celery.configs.indexing") + + +@signals.task_prerun.connect +def on_task_prerun( + sender: Any | None = None, + task_id: str | None = None, + task: Task | None = None, + args: tuple | None = None, + kwargs: dict | None = None, + **kwds: Any, +) -> None: + app_base.on_task_prerun(sender, task_id, task, args, kwargs, **kwds) + + +@signals.task_postrun.connect +def on_task_postrun( + sender: Any | None = None, + task_id: str | None = None, + task: Task | None = None, + args: tuple | None = None, + kwargs: dict | None = None, + retval: Any | None = None, + state: str | None = None, + **kwds: Any, +) -> None: + app_base.on_task_postrun(sender, task_id, task, args, kwargs, retval, state, **kwds) + + +@celeryd_init.connect +def on_celeryd_init(sender: Any = None, conf: Any = None, **kwargs: Any) -> None: + app_base.on_celeryd_init(sender, conf, **kwargs) + + +@worker_init.connect +def on_worker_init(sender: Any, **kwargs: Any) -> None: + logger.info("worker_init signal received.") + logger.info(f"Multiprocessing start method: {multiprocessing.get_start_method()}") + + SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_INDEXING_APP_NAME) + SqlEngine.init_engine(pool_size=8, max_overflow=0) + + app_base.wait_for_redis(sender, **kwargs) + app_base.on_secondary_worker_init(sender, **kwargs) + + +@worker_ready.connect +def on_worker_ready(sender: Any, **kwargs: Any) -> None: + app_base.on_worker_ready(sender, **kwargs) + + +@worker_shutdown.connect +def on_worker_shutdown(sender: Any, **kwargs: Any) -> None: + app_base.on_worker_shutdown(sender, **kwargs) + + +@signals.setup_logging.connect +def on_setup_logging( + loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any +) -> None: + app_base.on_setup_logging(loglevel, logfile, format, colorize, **kwargs) + + +celery_app.autodiscover_tasks( + [ + "danswer.background.celery.tasks.indexing", + ] +) diff --git a/backend/danswer/background/celery/apps/light.py b/backend/danswer/background/celery/apps/light.py new file mode 100644 index 00000000000..033ba084ee8 --- /dev/null +++ b/backend/danswer/background/celery/apps/light.py @@ -0,0 +1,90 @@ +import multiprocessing +from typing import Any + +from celery import Celery +from celery import signals +from celery import Task +from celery.signals import celeryd_init +from celery.signals import worker_init +from celery.signals import worker_ready +from celery.signals import worker_shutdown + +import danswer.background.celery.apps.app_base as app_base +from danswer.configs.constants import POSTGRES_CELERY_WORKER_LIGHT_APP_NAME +from danswer.db.engine import SqlEngine +from danswer.utils.logger import setup_logger + + +logger = setup_logger() + +celery_app = Celery(__name__) +celery_app.config_from_object("danswer.background.celery.configs.light") + + +@signals.task_prerun.connect +def on_task_prerun( + sender: Any | None = None, + task_id: str | None = None, + task: Task | None = None, + args: tuple | None = None, + kwargs: dict | None = None, + **kwds: Any, +) -> None: + app_base.on_task_prerun(sender, task_id, task, args, kwargs, **kwds) + + +@signals.task_postrun.connect +def on_task_postrun( + sender: Any | None = None, + task_id: str | None = None, + task: Task | None = None, + args: tuple | None = None, + kwargs: dict | None = None, + retval: Any | None = None, + state: str | None = None, + **kwds: Any, +) -> None: + app_base.on_task_postrun(sender, task_id, task, args, kwargs, retval, state, **kwds) + + +@celeryd_init.connect +def on_celeryd_init(sender: Any = None, conf: Any = None, **kwargs: Any) -> None: + app_base.on_celeryd_init(sender, conf, **kwargs) + + +@worker_init.connect +def on_worker_init(sender: Any, **kwargs: Any) -> None: + logger.info("worker_init signal received.") + logger.info(f"Multiprocessing start method: {multiprocessing.get_start_method()}") + + SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_LIGHT_APP_NAME) + SqlEngine.init_engine(pool_size=sender.concurrency, max_overflow=8) + + app_base.wait_for_redis(sender, **kwargs) + app_base.on_secondary_worker_init(sender, **kwargs) + + +@worker_ready.connect +def on_worker_ready(sender: Any, **kwargs: Any) -> None: + app_base.on_worker_ready(sender, **kwargs) + + +@worker_shutdown.connect +def on_worker_shutdown(sender: Any, **kwargs: Any) -> None: + app_base.on_worker_shutdown(sender, **kwargs) + + +@signals.setup_logging.connect +def on_setup_logging( + loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any +) -> None: + app_base.on_setup_logging(loglevel, logfile, format, colorize, **kwargs) + + +celery_app.autodiscover_tasks( + [ + "danswer.background.celery.tasks.shared", + "danswer.background.celery.tasks.vespa", + "danswer.background.celery.tasks.connector_deletion", + ] +) diff --git a/backend/danswer/background/celery/apps/primary.py b/backend/danswer/background/celery/apps/primary.py new file mode 100644 index 00000000000..b0950fc8f03 --- /dev/null +++ b/backend/danswer/background/celery/apps/primary.py @@ -0,0 +1,237 @@ +import multiprocessing +from typing import Any + +from celery import bootsteps # type: ignore +from celery import Celery +from celery import signals +from celery import Task +from celery.exceptions import WorkerShutdown +from celery.signals import celeryd_init +from celery.signals import worker_init +from celery.signals import worker_ready +from celery.signals import worker_shutdown + +import danswer.background.celery.apps.app_base as app_base +from danswer.background.celery.apps.app_base import task_logger +from danswer.background.celery.celery_utils import celery_is_worker_primary +from danswer.configs.constants import CELERY_PRIMARY_WORKER_LOCK_TIMEOUT +from danswer.configs.constants import DanswerRedisLocks +from danswer.configs.constants import POSTGRES_CELERY_WORKER_PRIMARY_APP_NAME +from danswer.db.engine import SqlEngine +from danswer.redis.redis_connector_credential_pair import RedisConnectorCredentialPair +from danswer.redis.redis_connector_delete import RedisConnectorDelete +from danswer.redis.redis_connector_index import RedisConnectorIndex +from danswer.redis.redis_connector_prune import RedisConnectorPrune +from danswer.redis.redis_connector_stop import RedisConnectorStop +from danswer.redis.redis_document_set import RedisDocumentSet +from danswer.redis.redis_pool import get_redis_client +from danswer.redis.redis_usergroup import RedisUserGroup +from danswer.utils.logger import setup_logger +from shared_configs.configs import MULTI_TENANT + + +logger = setup_logger() + +celery_app = Celery(__name__) +celery_app.config_from_object("danswer.background.celery.configs.primary") + + +@signals.task_prerun.connect +def on_task_prerun( + sender: Any | None = None, + task_id: str | None = None, + task: Task | None = None, + args: tuple | None = None, + kwargs: dict | None = None, + **kwds: Any, +) -> None: + app_base.on_task_prerun(sender, task_id, task, args, kwargs, **kwds) + + +@signals.task_postrun.connect +def on_task_postrun( + sender: Any | None = None, + task_id: str | None = None, + task: Task | None = None, + args: tuple | None = None, + kwargs: dict | None = None, + retval: Any | None = None, + state: str | None = None, + **kwds: Any, +) -> None: + app_base.on_task_postrun(sender, task_id, task, args, kwargs, retval, state, **kwds) + + +@celeryd_init.connect +def on_celeryd_init(sender: Any = None, conf: Any = None, **kwargs: Any) -> None: + app_base.on_celeryd_init(sender, conf, **kwargs) + + +@worker_init.connect +def on_worker_init(sender: Any, **kwargs: Any) -> None: + logger.info("worker_init signal received.") + logger.info(f"Multiprocessing start method: {multiprocessing.get_start_method()}") + + SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_PRIMARY_APP_NAME) + SqlEngine.init_engine(pool_size=8, max_overflow=0) + + app_base.wait_for_redis(sender, **kwargs) + + logger.info("Running as the primary celery worker.") + + if MULTI_TENANT: + return + + # This is singleton work that should be done on startup exactly once + # by the primary worker. This is unnecessary in the multi tenant scenario + r = get_redis_client(tenant_id=None) + + # For the moment, we're assuming that we are the only primary worker + # that should be running. + # TODO: maybe check for or clean up another zombie primary worker if we detect it + r.delete(DanswerRedisLocks.PRIMARY_WORKER) + + # this process wide lock is taken to help other workers start up in order. + # it is planned to use this lock to enforce singleton behavior on the primary + # worker, since the primary worker does redis cleanup on startup, but this isn't + # implemented yet. + lock = r.lock( + DanswerRedisLocks.PRIMARY_WORKER, + timeout=CELERY_PRIMARY_WORKER_LOCK_TIMEOUT, + ) + + logger.info("Primary worker lock: Acquire starting.") + acquired = lock.acquire(blocking_timeout=CELERY_PRIMARY_WORKER_LOCK_TIMEOUT / 2) + if acquired: + logger.info("Primary worker lock: Acquire succeeded.") + else: + logger.error("Primary worker lock: Acquire failed!") + raise WorkerShutdown("Primary worker lock could not be acquired!") + + # tacking on our own user data to the sender + sender.primary_worker_lock = lock + + # As currently designed, when this worker starts as "primary", we reinitialize redis + # to a clean state (for our purposes, anyway) + r.delete(DanswerRedisLocks.CHECK_VESPA_SYNC_BEAT_LOCK) + r.delete(DanswerRedisLocks.MONITOR_VESPA_SYNC_BEAT_LOCK) + + r.delete(RedisConnectorCredentialPair.get_taskset_key()) + r.delete(RedisConnectorCredentialPair.get_fence_key()) + + RedisDocumentSet.reset_all(r) + + RedisUserGroup.reset_all(r) + + RedisConnectorDelete.reset_all(r) + + RedisConnectorPrune.reset_all(r) + + RedisConnectorIndex.reset_all(r) + + RedisConnectorStop.reset_all(r) + + +@worker_ready.connect +def on_worker_ready(sender: Any, **kwargs: Any) -> None: + app_base.on_worker_ready(sender, **kwargs) + + +@worker_shutdown.connect +def on_worker_shutdown(sender: Any, **kwargs: Any) -> None: + app_base.on_worker_shutdown(sender, **kwargs) + + +@signals.setup_logging.connect +def on_setup_logging( + loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any +) -> None: + app_base.on_setup_logging(loglevel, logfile, format, colorize, **kwargs) + + +class HubPeriodicTask(bootsteps.StartStopStep): + """Regularly reacquires the primary worker lock outside of the task queue. + Use the task_logger in this class to avoid double logging. + + This cannot be done inside a regular beat task because it must run on schedule and + a queue of existing work would starve the task from running. + """ + + # it's unclear to me whether using the hub's timer or the bootstep timer is better + requires = {"celery.worker.components:Hub"} + + def __init__(self, worker: Any, **kwargs: Any) -> None: + self.interval = CELERY_PRIMARY_WORKER_LOCK_TIMEOUT / 8 # Interval in seconds + self.task_tref = None + + def start(self, worker: Any) -> None: + if not celery_is_worker_primary(worker): + return + + # Access the worker's event loop (hub) + hub = worker.consumer.controller.hub + + # Schedule the periodic task + self.task_tref = hub.call_repeatedly( + self.interval, self.run_periodic_task, worker + ) + task_logger.info("Scheduled periodic task with hub.") + + def run_periodic_task(self, worker: Any) -> None: + try: + if not celery_is_worker_primary(worker): + return + + if not hasattr(worker, "primary_worker_lock"): + return + + lock = worker.primary_worker_lock + + r = get_redis_client(tenant_id=None) + + if lock.owned(): + task_logger.debug("Reacquiring primary worker lock.") + lock.reacquire() + else: + task_logger.warning( + "Full acquisition of primary worker lock. " + "Reasons could be worker restart or lock expiration." + ) + lock = r.lock( + DanswerRedisLocks.PRIMARY_WORKER, + timeout=CELERY_PRIMARY_WORKER_LOCK_TIMEOUT, + ) + + task_logger.info("Primary worker lock: Acquire starting.") + acquired = lock.acquire( + blocking_timeout=CELERY_PRIMARY_WORKER_LOCK_TIMEOUT / 2 + ) + if acquired: + task_logger.info("Primary worker lock: Acquire succeeded.") + worker.primary_worker_lock = lock + else: + task_logger.error("Primary worker lock: Acquire failed!") + raise TimeoutError("Primary worker lock could not be acquired!") + + except Exception: + task_logger.exception("Periodic task failed.") + + def stop(self, worker: Any) -> None: + # Cancel the scheduled task when the worker stops + if self.task_tref: + self.task_tref.cancel() + task_logger.info("Canceled periodic task with hub.") + + +celery_app.steps["worker"].add(HubPeriodicTask) + +celery_app.autodiscover_tasks( + [ + "danswer.background.celery.tasks.connector_deletion", + "danswer.background.celery.tasks.indexing", + "danswer.background.celery.tasks.periodic", + "danswer.background.celery.tasks.pruning", + "danswer.background.celery.tasks.shared", + "danswer.background.celery.tasks.vespa", + ] +) diff --git a/backend/danswer/background/celery/apps/scheduler.py b/backend/danswer/background/celery/apps/scheduler.py new file mode 100644 index 00000000000..3ddf1dc169c --- /dev/null +++ b/backend/danswer/background/celery/apps/scheduler.py @@ -0,0 +1,96 @@ +from datetime import timedelta +from typing import Any + +from celery.beat import PersistentScheduler # type: ignore +from celery.utils.log import get_task_logger + +from danswer.db.engine import get_all_tenant_ids +from danswer.utils.variable_functionality import fetch_versioned_implementation + +logger = get_task_logger(__name__) + + +class DynamicTenantScheduler(PersistentScheduler): + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self._reload_interval = timedelta(minutes=1) + self._last_reload = self.app.now() - self._reload_interval + + def setup_schedule(self) -> None: + super().setup_schedule() + + def tick(self) -> float: + retval = super().tick() + now = self.app.now() + if ( + self._last_reload is None + or (now - self._last_reload) > self._reload_interval + ): + logger.info("Reloading schedule to check for new tenants...") + self._update_tenant_tasks() + self._last_reload = now + return retval + + def _update_tenant_tasks(self) -> None: + logger.info("Checking for tenant task updates...") + try: + tenant_ids = get_all_tenant_ids() + tasks_to_schedule = fetch_versioned_implementation( + "danswer.background.celery.tasks.beat_schedule", "get_tasks_to_schedule" + ) + + new_beat_schedule: dict[str, dict[str, Any]] = {} + + current_schedule = getattr(self, "_store", {"entries": {}}).get( + "entries", {} + ) + + existing_tenants = set() + for task_name in current_schedule.keys(): + if "-" in task_name: + existing_tenants.add(task_name.split("-")[-1]) + + for tenant_id in tenant_ids: + if tenant_id not in existing_tenants: + logger.info(f"Found new tenant: {tenant_id}") + + for task in tasks_to_schedule(): + task_name = f"{task['name']}-{tenant_id}" + new_task = { + "task": task["task"], + "schedule": task["schedule"], + "kwargs": {"tenant_id": tenant_id}, + } + if options := task.get("options"): + new_task["options"] = options + new_beat_schedule[task_name] = new_task + + if self._should_update_schedule(current_schedule, new_beat_schedule): + logger.info( + "Updating schedule", + extra={ + "new_tasks": len(new_beat_schedule), + "current_tasks": len(current_schedule), + }, + ) + if not hasattr(self, "_store"): + self._store: dict[str, dict] = {"entries": {}} + self.update_from_dict(new_beat_schedule) + logger.info(f"New schedule: {new_beat_schedule}") + + logger.info("Tenant tasks updated successfully") + else: + logger.debug("No schedule updates needed") + + except (AttributeError, KeyError): + logger.exception("Failed to process task configuration") + except Exception: + logger.exception("Unexpected error updating tenant tasks") + + def _should_update_schedule( + self, current_schedule: dict, new_schedule: dict + ) -> bool: + """Compare schedules to determine if an update is needed.""" + current_tasks = set(current_schedule.keys()) + new_tasks = set(new_schedule.keys()) + return current_tasks != new_tasks diff --git a/backend/danswer/background/celery/apps/task_formatters.py b/backend/danswer/background/celery/apps/task_formatters.py new file mode 100644 index 00000000000..e82b23a5431 --- /dev/null +++ b/backend/danswer/background/celery/apps/task_formatters.py @@ -0,0 +1,26 @@ +import logging + +from celery import current_task + +from danswer.utils.logger import ColoredFormatter +from danswer.utils.logger import PlainFormatter + + +class CeleryTaskPlainFormatter(PlainFormatter): + def format(self, record: logging.LogRecord) -> str: + task = current_task + if task and task.request: + record.__dict__.update(task_id=task.request.id, task_name=task.name) + record.msg = f"[{task.name}({task.request.id})] {record.msg}" + + return super().format(record) + + +class CeleryTaskColoredFormatter(ColoredFormatter): + def format(self, record: logging.LogRecord) -> str: + task = current_task + if task and task.request: + record.__dict__.update(task_id=task.request.id, task_name=task.name) + record.msg = f"[{task.name}({task.request.id})] {record.msg}" + + return super().format(record) diff --git a/backend/danswer/background/celery/celery_app.py b/backend/danswer/background/celery/celery_app.py deleted file mode 100644 index 0440f275c36..00000000000 --- a/backend/danswer/background/celery/celery_app.py +++ /dev/null @@ -1,465 +0,0 @@ -import logging -import time -from datetime import timedelta -from typing import Any - -import redis -from celery import bootsteps # type: ignore -from celery import Celery -from celery import current_task -from celery import signals -from celery import Task -from celery.exceptions import WorkerShutdown -from celery.signals import beat_init -from celery.signals import worker_init -from celery.signals import worker_ready -from celery.signals import worker_shutdown -from celery.states import READY_STATES -from celery.utils.log import get_task_logger - -from danswer.background.celery.celery_redis import RedisConnectorCredentialPair -from danswer.background.celery.celery_redis import RedisConnectorDeletion -from danswer.background.celery.celery_redis import RedisDocumentSet -from danswer.background.celery.celery_redis import RedisUserGroup -from danswer.background.celery.celery_utils import celery_is_worker_primary -from danswer.configs.constants import CELERY_PRIMARY_WORKER_LOCK_TIMEOUT -from danswer.configs.constants import DanswerCeleryPriority -from danswer.configs.constants import DanswerRedisLocks -from danswer.configs.constants import POSTGRES_CELERY_BEAT_APP_NAME -from danswer.configs.constants import POSTGRES_CELERY_WORKER_HEAVY_APP_NAME -from danswer.configs.constants import POSTGRES_CELERY_WORKER_LIGHT_APP_NAME -from danswer.configs.constants import POSTGRES_CELERY_WORKER_PRIMARY_APP_NAME -from danswer.db.engine import SqlEngine -from danswer.redis.redis_pool import RedisPool -from danswer.utils.logger import ColoredFormatter -from danswer.utils.logger import PlainFormatter -from danswer.utils.logger import setup_logger - -logger = setup_logger() - -# use this within celery tasks to get celery task specific logging -task_logger = get_task_logger(__name__) - -redis_pool = RedisPool() - -celery_app = Celery(__name__) -celery_app.config_from_object( - "danswer.background.celery.celeryconfig" -) # Load configuration from 'celeryconfig.py' - - -@signals.task_postrun.connect -def celery_task_postrun( - sender: Any | None = None, - task_id: str | None = None, - task: Task | None = None, - args: tuple | None = None, - kwargs: dict | None = None, - retval: Any | None = None, - state: str | None = None, - **kwds: Any, -) -> None: - """We handle this signal in order to remove completed tasks - from their respective tasksets. This allows us to track the progress of document set - and user group syncs. - - This function runs after any task completes (both success and failure) - Note that this signal does not fire on a task that failed to complete and is going - to be retried. - """ - if not task: - return - - task_logger.debug(f"Task {task.name} (ID: {task_id}) completed with state: {state}") - # logger.debug(f"Result: {retval}") - - if state not in READY_STATES: - return - - if not task_id: - return - - if task_id.startswith(RedisConnectorCredentialPair.PREFIX): - r = redis_pool.get_client() - r.srem(RedisConnectorCredentialPair.get_taskset_key(), task_id) - return - - if task_id.startswith(RedisDocumentSet.PREFIX): - r = redis_pool.get_client() - document_set_id = RedisDocumentSet.get_id_from_task_id(task_id) - if document_set_id is not None: - rds = RedisDocumentSet(document_set_id) - r.srem(rds.taskset_key, task_id) - return - - if task_id.startswith(RedisUserGroup.PREFIX): - r = redis_pool.get_client() - usergroup_id = RedisUserGroup.get_id_from_task_id(task_id) - if usergroup_id is not None: - rug = RedisUserGroup(usergroup_id) - r.srem(rug.taskset_key, task_id) - return - - if task_id.startswith(RedisConnectorDeletion.PREFIX): - r = redis_pool.get_client() - cc_pair_id = RedisConnectorDeletion.get_id_from_task_id(task_id) - if cc_pair_id is not None: - rcd = RedisConnectorDeletion(cc_pair_id) - r.srem(rcd.taskset_key, task_id) - return - - -@beat_init.connect -def on_beat_init(sender: Any, **kwargs: Any) -> None: - SqlEngine.set_app_name(POSTGRES_CELERY_BEAT_APP_NAME) - SqlEngine.init_engine(pool_size=2, max_overflow=0) - - -@worker_init.connect -def on_worker_init(sender: Any, **kwargs: Any) -> None: - # decide some initial startup settings based on the celery worker's hostname - # (set at the command line) - hostname = sender.hostname - if hostname.startswith("light"): - SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_LIGHT_APP_NAME) - SqlEngine.init_engine(pool_size=sender.concurrency, max_overflow=8) - elif hostname.startswith("heavy"): - SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_HEAVY_APP_NAME) - SqlEngine.init_engine(pool_size=8, max_overflow=0) - else: - SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_PRIMARY_APP_NAME) - SqlEngine.init_engine(pool_size=8, max_overflow=0) - - r = redis_pool.get_client() - - WAIT_INTERVAL = 5 - WAIT_LIMIT = 60 - - time_start = time.monotonic() - logger.info("Redis: Readiness check starting.") - while True: - try: - if r.ping(): - break - except Exception: - pass - - time_elapsed = time.monotonic() - time_start - logger.info( - f"Redis: Ping failed. elapsed={time_elapsed:.1f} timeout={WAIT_LIMIT:.1f}" - ) - if time_elapsed > WAIT_LIMIT: - msg = ( - f"Redis: Readiness check did not succeed within the timeout " - f"({WAIT_LIMIT} seconds). Exiting..." - ) - logger.error(msg) - raise WorkerShutdown(msg) - - time.sleep(WAIT_INTERVAL) - - logger.info("Redis: Readiness check succeeded. Continuing...") - - if not celery_is_worker_primary(sender): - logger.info("Running as a secondary celery worker.") - logger.info("Waiting for primary worker to be ready...") - time_start = time.monotonic() - while True: - if r.exists(DanswerRedisLocks.PRIMARY_WORKER): - break - - time.monotonic() - time_elapsed = time.monotonic() - time_start - logger.info( - f"Primary worker is not ready yet. elapsed={time_elapsed:.1f} timeout={WAIT_LIMIT:.1f}" - ) - if time_elapsed > WAIT_LIMIT: - msg = ( - f"Primary worker was not ready within the timeout. " - f"({WAIT_LIMIT} seconds). Exiting..." - ) - logger.error(msg) - raise WorkerShutdown(msg) - - time.sleep(WAIT_INTERVAL) - - logger.info("Wait for primary worker completed successfully. Continuing...") - return - - logger.info("Running as the primary celery worker.") - - # This is singleton work that should be done on startup exactly once - # by the primary worker - r = redis_pool.get_client() - - # For the moment, we're assuming that we are the only primary worker - # that should be running. - # TODO: maybe check for or clean up another zombie primary worker if we detect it - r.delete(DanswerRedisLocks.PRIMARY_WORKER) - - # this process wide lock is taken to help other workers start up in order. - # it is planned to use this lock to enforce singleton behavior on the primary - # worker, since the primary worker does redis cleanup on startup, but this isn't - # implemented yet. - lock = r.lock( - DanswerRedisLocks.PRIMARY_WORKER, - timeout=CELERY_PRIMARY_WORKER_LOCK_TIMEOUT, - ) - - logger.info("Primary worker lock: Acquire starting.") - acquired = lock.acquire(blocking_timeout=CELERY_PRIMARY_WORKER_LOCK_TIMEOUT / 2) - if acquired: - logger.info("Primary worker lock: Acquire succeeded.") - else: - logger.error("Primary worker lock: Acquire failed!") - raise WorkerShutdown("Primary worker lock could not be acquired!") - - sender.primary_worker_lock = lock - - r.delete(DanswerRedisLocks.CHECK_VESPA_SYNC_BEAT_LOCK) - r.delete(DanswerRedisLocks.MONITOR_VESPA_SYNC_BEAT_LOCK) - - r.delete(RedisConnectorCredentialPair.get_taskset_key()) - r.delete(RedisConnectorCredentialPair.get_fence_key()) - - for key in r.scan_iter(RedisDocumentSet.TASKSET_PREFIX + "*"): - r.delete(key) - - for key in r.scan_iter(RedisDocumentSet.FENCE_PREFIX + "*"): - r.delete(key) - - for key in r.scan_iter(RedisUserGroup.TASKSET_PREFIX + "*"): - r.delete(key) - - for key in r.scan_iter(RedisUserGroup.FENCE_PREFIX + "*"): - r.delete(key) - - for key in r.scan_iter(RedisConnectorDeletion.TASKSET_PREFIX + "*"): - r.delete(key) - - for key in r.scan_iter(RedisConnectorDeletion.FENCE_PREFIX + "*"): - r.delete(key) - - -@worker_ready.connect -def on_worker_ready(sender: Any, **kwargs: Any) -> None: - task_logger.info("worker_ready signal received.") - - -@worker_shutdown.connect -def on_worker_shutdown(sender: Any, **kwargs: Any) -> None: - if not celery_is_worker_primary(sender): - return - - if not sender.primary_worker_lock: - return - - logger.info("Releasing primary worker lock.") - lock = sender.primary_worker_lock - if lock.owned(): - lock.release() - sender.primary_worker_lock = None - - -class CeleryTaskPlainFormatter(PlainFormatter): - def format(self, record: logging.LogRecord) -> str: - task = current_task - if task and task.request: - record.__dict__.update(task_id=task.request.id, task_name=task.name) - record.msg = f"[{task.name}({task.request.id})] {record.msg}" - - return super().format(record) - - -class CeleryTaskColoredFormatter(ColoredFormatter): - def format(self, record: logging.LogRecord) -> str: - task = current_task - if task and task.request: - record.__dict__.update(task_id=task.request.id, task_name=task.name) - record.msg = f"[{task.name}({task.request.id})] {record.msg}" - - return super().format(record) - - -@signals.setup_logging.connect -def on_setup_logging( - loglevel: Any, logfile: Any, format: Any, colorize: Any, **kwargs: Any -) -> None: - # TODO: could unhardcode format and colorize and accept these as options from - # celery's config - - # reformats celery's worker logger - root_logger = logging.getLogger() - - root_handler = logging.StreamHandler() # Set up a handler for the root logger - root_formatter = ColoredFormatter( - "%(asctime)s %(filename)30s %(lineno)4s: %(message)s", - datefmt="%m/%d/%Y %I:%M:%S %p", - ) - root_handler.setFormatter(root_formatter) - root_logger.addHandler(root_handler) # Apply the handler to the root logger - - if logfile: - root_file_handler = logging.FileHandler(logfile) - root_file_formatter = PlainFormatter( - "%(asctime)s %(filename)30s %(lineno)4s: %(message)s", - datefmt="%m/%d/%Y %I:%M:%S %p", - ) - root_file_handler.setFormatter(root_file_formatter) - root_logger.addHandler(root_file_handler) - - root_logger.setLevel(loglevel) - - # reformats celery's task logger - task_formatter = CeleryTaskColoredFormatter( - "%(asctime)s %(filename)30s %(lineno)4s: %(message)s", - datefmt="%m/%d/%Y %I:%M:%S %p", - ) - task_handler = logging.StreamHandler() # Set up a handler for the task logger - task_handler.setFormatter(task_formatter) - task_logger.addHandler(task_handler) # Apply the handler to the task logger - - if logfile: - task_file_handler = logging.FileHandler(logfile) - task_file_formatter = CeleryTaskPlainFormatter( - "%(asctime)s %(filename)30s %(lineno)4s: %(message)s", - datefmt="%m/%d/%Y %I:%M:%S %p", - ) - task_file_handler.setFormatter(task_file_formatter) - task_logger.addHandler(task_file_handler) - - task_logger.setLevel(loglevel) - task_logger.propagate = False - - -class HubPeriodicTask(bootsteps.StartStopStep): - """Regularly reacquires the primary worker lock outside of the task queue. - Use the task_logger in this class to avoid double logging.""" - - # it's unclear to me whether using the hub's timer or the bootstep timer is better - requires = {"celery.worker.components:Hub"} - - def __init__(self, worker: Any, **kwargs: Any) -> None: - self.interval = CELERY_PRIMARY_WORKER_LOCK_TIMEOUT / 8 # Interval in seconds - self.task_tref = None - - def start(self, worker: Any) -> None: - if not celery_is_worker_primary(worker): - return - - # Access the worker's event loop (hub) - hub = worker.consumer.controller.hub - - # Schedule the periodic task - self.task_tref = hub.call_repeatedly( - self.interval, self.run_periodic_task, worker - ) - task_logger.info("Scheduled periodic task with hub.") - - def run_periodic_task(self, worker: Any) -> None: - try: - if not worker.primary_worker_lock: - return - - if not hasattr(worker, "primary_worker_lock"): - return - - r = redis_pool.get_client() - - lock: redis.lock.Lock = worker.primary_worker_lock - - task_logger.info("Reacquiring primary worker lock.") - - if lock.owned(): - task_logger.debug("Reacquiring primary worker lock.") - lock.reacquire() - else: - task_logger.warning( - "Full acquisition of primary worker lock. " - "Reasons could be computer sleep or a clock change." - ) - lock = r.lock( - DanswerRedisLocks.PRIMARY_WORKER, - timeout=CELERY_PRIMARY_WORKER_LOCK_TIMEOUT, - ) - - task_logger.info("Primary worker lock: Acquire starting.") - acquired = lock.acquire( - blocking_timeout=CELERY_PRIMARY_WORKER_LOCK_TIMEOUT / 2 - ) - if acquired: - task_logger.info("Primary worker lock: Acquire succeeded.") - else: - task_logger.error("Primary worker lock: Acquire failed!") - raise TimeoutError("Primary worker lock could not be acquired!") - - worker.primary_worker_lock = lock - except Exception: - task_logger.exception("HubPeriodicTask.run_periodic_task exceptioned.") - - def stop(self, worker: Any) -> None: - # Cancel the scheduled task when the worker stops - if self.task_tref: - self.task_tref.cancel() - task_logger.info("Canceled periodic task with hub.") - - -celery_app.steps["worker"].add(HubPeriodicTask) - -celery_app.autodiscover_tasks( - [ - "danswer.background.celery.tasks.connector_deletion", - "danswer.background.celery.tasks.periodic", - "danswer.background.celery.tasks.pruning", - "danswer.background.celery.tasks.vespa", - ] -) - -##### -# Celery Beat (Periodic Tasks) Settings -##### -celery_app.conf.beat_schedule = { - "check-for-vespa-sync": { - "task": "check_for_vespa_sync_task", - "schedule": timedelta(seconds=5), - "options": {"priority": DanswerCeleryPriority.HIGH}, - }, -} -celery_app.conf.beat_schedule.update( - { - "check-for-connector-deletion-task": { - "task": "check_for_connector_deletion_task", - # don't need to check too often, since we kick off a deletion initially - # during the API call that actually marks the CC pair for deletion - "schedule": timedelta(minutes=1), - "options": {"priority": DanswerCeleryPriority.HIGH}, - }, - } -) -celery_app.conf.beat_schedule.update( - { - "check-for-prune": { - "task": "check_for_prune_task", - "schedule": timedelta(seconds=5), - "options": {"priority": DanswerCeleryPriority.HIGH}, - }, - } -) -celery_app.conf.beat_schedule.update( - { - "kombu-message-cleanup": { - "task": "kombu_message_cleanup_task", - "schedule": timedelta(seconds=3600), - "options": {"priority": DanswerCeleryPriority.LOWEST}, - }, - } -) -celery_app.conf.beat_schedule.update( - { - "monitor-vespa-sync": { - "task": "monitor_vespa_sync", - "schedule": timedelta(seconds=5), - "options": {"priority": DanswerCeleryPriority.HIGH}, - }, - } -) diff --git a/backend/danswer/background/celery/celery_redis.py b/backend/danswer/background/celery/celery_redis.py index 1d837bd51e0..3e205d71ded 100644 --- a/backend/danswer/background/celery/celery_redis.py +++ b/backend/danswer/background/celery/celery_redis.py @@ -1,346 +1,10 @@ # These are helper objects for tracking the keys we need to write in redis -import time -from abc import ABC -from abc import abstractmethod from typing import cast -from uuid import uuid4 -import redis -from celery import Celery from redis import Redis -from sqlalchemy.orm import Session -from danswer.background.celery.celeryconfig import CELERY_SEPARATOR -from danswer.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT +from danswer.background.celery.configs.base import CELERY_SEPARATOR from danswer.configs.constants import DanswerCeleryPriority -from danswer.configs.constants import DanswerCeleryQueues -from danswer.db.connector_credential_pair import get_connector_credential_pair_from_id -from danswer.db.document import construct_document_select_for_connector_credential_pair -from danswer.db.document import ( - construct_document_select_for_connector_credential_pair_by_needs_sync, -) -from danswer.db.document_set import construct_document_select_by_docset -from danswer.utils.variable_functionality import fetch_versioned_implementation - - -class RedisObjectHelper(ABC): - PREFIX = "base" - FENCE_PREFIX = PREFIX + "_fence" - TASKSET_PREFIX = PREFIX + "_taskset" - - def __init__(self, id: int): - self._id: int = id - - @property - def task_id_prefix(self) -> str: - return f"{self.PREFIX}_{self._id}" - - @property - def fence_key(self) -> str: - # example: documentset_fence_1 - return f"{self.FENCE_PREFIX}_{self._id}" - - @property - def taskset_key(self) -> str: - # example: documentset_taskset_1 - return f"{self.TASKSET_PREFIX}_{self._id}" - - @staticmethod - def get_id_from_fence_key(key: str) -> int | None: - """ - Extracts the object ID from a fence key in the format `PREFIX_fence_X`. - - Args: - key (str): The fence key string. - - Returns: - Optional[int]: The extracted ID if the key is in the correct format, otherwise None. - """ - parts = key.split("_") - if len(parts) != 3: - return None - - try: - object_id = int(parts[2]) - except ValueError: - return None - - return object_id - - @staticmethod - def get_id_from_task_id(task_id: str) -> int | None: - """ - Extracts the object ID from a task ID string. - - This method assumes the task ID is formatted as `prefix_objectid_suffix`, where: - - `prefix` is an arbitrary string (e.g., the name of the task or entity), - - `objectid` is the ID you want to extract, - - `suffix` is another arbitrary string (e.g., a UUID). - - Example: - If the input `task_id` is `documentset_1_cbfdc96a-80ca-4312-a242-0bb68da3c1dc`, - this method will return the string `"1"`. - - Args: - task_id (str): The task ID string from which to extract the object ID. - - Returns: - str | None: The extracted object ID if the task ID is in the correct format, otherwise None. - """ - # example: task_id=documentset_1_cbfdc96a-80ca-4312-a242-0bb68da3c1dc - parts = task_id.split("_") - if len(parts) != 3: - return None - - try: - object_id = int(parts[1]) - except ValueError: - return None - - return object_id - - @abstractmethod - def generate_tasks( - self, - celery_app: Celery, - db_session: Session, - redis_client: Redis, - lock: redis.lock.Lock, - ) -> int | None: - pass - - -class RedisDocumentSet(RedisObjectHelper): - PREFIX = "documentset" - FENCE_PREFIX = PREFIX + "_fence" - TASKSET_PREFIX = PREFIX + "_taskset" - - def generate_tasks( - self, - celery_app: Celery, - db_session: Session, - redis_client: Redis, - lock: redis.lock.Lock, - ) -> int | None: - last_lock_time = time.monotonic() - - async_results = [] - stmt = construct_document_select_by_docset(self._id, current_only=False) - for doc in db_session.scalars(stmt).yield_per(1): - current_time = time.monotonic() - if current_time - last_lock_time >= ( - CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4 - ): - lock.reacquire() - last_lock_time = current_time - - # celery's default task id format is "dd32ded3-00aa-4884-8b21-42f8332e7fac" - # the key for the result is "celery-task-meta-dd32ded3-00aa-4884-8b21-42f8332e7fac" - # we prefix the task id so it's easier to keep track of who created the task - # aka "documentset_1_6dd32ded3-00aa-4884-8b21-42f8332e7fac" - custom_task_id = f"{self.task_id_prefix}_{uuid4()}" - - # add to the set BEFORE creating the task. - redis_client.sadd(self.taskset_key, custom_task_id) - - result = celery_app.send_task( - "vespa_metadata_sync_task", - kwargs=dict(document_id=doc.id), - queue=DanswerCeleryQueues.VESPA_METADATA_SYNC, - task_id=custom_task_id, - priority=DanswerCeleryPriority.LOW, - ) - - async_results.append(result) - - return len(async_results) - - -class RedisUserGroup(RedisObjectHelper): - PREFIX = "usergroup" - FENCE_PREFIX = PREFIX + "_fence" - TASKSET_PREFIX = PREFIX + "_taskset" - - def generate_tasks( - self, - celery_app: Celery, - db_session: Session, - redis_client: Redis, - lock: redis.lock.Lock, - ) -> int | None: - last_lock_time = time.monotonic() - - async_results = [] - - try: - construct_document_select_by_usergroup = fetch_versioned_implementation( - "danswer.db.user_group", - "construct_document_select_by_usergroup", - ) - except ModuleNotFoundError: - return 0 - - stmt = construct_document_select_by_usergroup(self._id) - for doc in db_session.scalars(stmt).yield_per(1): - current_time = time.monotonic() - if current_time - last_lock_time >= ( - CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4 - ): - lock.reacquire() - last_lock_time = current_time - - # celery's default task id format is "dd32ded3-00aa-4884-8b21-42f8332e7fac" - # the key for the result is "celery-task-meta-dd32ded3-00aa-4884-8b21-42f8332e7fac" - # we prefix the task id so it's easier to keep track of who created the task - # aka "documentset_1_6dd32ded3-00aa-4884-8b21-42f8332e7fac" - custom_task_id = f"{self.task_id_prefix}_{uuid4()}" - - # add to the set BEFORE creating the task. - redis_client.sadd(self.taskset_key, custom_task_id) - - result = celery_app.send_task( - "vespa_metadata_sync_task", - kwargs=dict(document_id=doc.id), - queue=DanswerCeleryQueues.VESPA_METADATA_SYNC, - task_id=custom_task_id, - priority=DanswerCeleryPriority.LOW, - ) - - async_results.append(result) - - return len(async_results) - - -class RedisConnectorCredentialPair(RedisObjectHelper): - """This class differs from the default in that the taskset used spans - all connectors and is not per connector.""" - - PREFIX = "connectorsync" - FENCE_PREFIX = PREFIX + "_fence" - TASKSET_PREFIX = PREFIX + "_taskset" - - @classmethod - def get_fence_key(cls) -> str: - return RedisConnectorCredentialPair.FENCE_PREFIX - - @classmethod - def get_taskset_key(cls) -> str: - return RedisConnectorCredentialPair.TASKSET_PREFIX - - @property - def taskset_key(self) -> str: - """Notice that this is intentionally reusing the same taskset for all - connector syncs""" - # example: connector_taskset - return f"{self.TASKSET_PREFIX}" - - def generate_tasks( - self, - celery_app: Celery, - db_session: Session, - redis_client: Redis, - lock: redis.lock.Lock, - ) -> int | None: - last_lock_time = time.monotonic() - - async_results = [] - cc_pair = get_connector_credential_pair_from_id(self._id, db_session) - if not cc_pair: - return None - - stmt = construct_document_select_for_connector_credential_pair_by_needs_sync( - cc_pair.connector_id, cc_pair.credential_id - ) - for doc in db_session.scalars(stmt).yield_per(1): - current_time = time.monotonic() - if current_time - last_lock_time >= ( - CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4 - ): - lock.reacquire() - last_lock_time = current_time - - # celery's default task id format is "dd32ded3-00aa-4884-8b21-42f8332e7fac" - # the key for the result is "celery-task-meta-dd32ded3-00aa-4884-8b21-42f8332e7fac" - # we prefix the task id so it's easier to keep track of who created the task - # aka "documentset_1_6dd32ded3-00aa-4884-8b21-42f8332e7fac" - custom_task_id = f"{self.task_id_prefix}_{uuid4()}" - - # add to the tracking taskset in redis BEFORE creating the celery task. - # note that for the moment we are using a single taskset key, not differentiated by cc_pair id - redis_client.sadd( - RedisConnectorCredentialPair.get_taskset_key(), custom_task_id - ) - - # Priority on sync's triggered by new indexing should be medium - result = celery_app.send_task( - "vespa_metadata_sync_task", - kwargs=dict(document_id=doc.id), - queue=DanswerCeleryQueues.VESPA_METADATA_SYNC, - task_id=custom_task_id, - priority=DanswerCeleryPriority.MEDIUM, - ) - - async_results.append(result) - - return len(async_results) - - -class RedisConnectorDeletion(RedisObjectHelper): - PREFIX = "connectordeletion" - FENCE_PREFIX = PREFIX + "_fence" - TASKSET_PREFIX = PREFIX + "_taskset" - - def generate_tasks( - self, - celery_app: Celery, - db_session: Session, - redis_client: Redis, - lock: redis.lock.Lock, - ) -> int | None: - last_lock_time = time.monotonic() - - async_results = [] - cc_pair = get_connector_credential_pair_from_id(self._id, db_session) - if not cc_pair: - return None - - stmt = construct_document_select_for_connector_credential_pair( - cc_pair.connector_id, cc_pair.credential_id - ) - for doc in db_session.scalars(stmt).yield_per(1): - current_time = time.monotonic() - if current_time - last_lock_time >= ( - CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4 - ): - lock.reacquire() - last_lock_time = current_time - - # celery's default task id format is "dd32ded3-00aa-4884-8b21-42f8332e7fac" - # the actual redis key is "celery-task-meta-dd32ded3-00aa-4884-8b21-42f8332e7fac" - # we prefix the task id so it's easier to keep track of who created the task - # aka "documentset_1_6dd32ded3-00aa-4884-8b21-42f8332e7fac" - custom_task_id = f"{self.task_id_prefix}_{uuid4()}" - - # add to the tracking taskset in redis BEFORE creating the celery task. - # note that for the moment we are using a single taskset key, not differentiated by cc_pair id - redis_client.sadd(self.taskset_key, custom_task_id) - - # Priority on sync's triggered by new indexing should be medium - result = celery_app.send_task( - "document_by_cc_pair_cleanup_task", - kwargs=dict( - document_id=doc.id, - connector_id=cc_pair.connector_id, - credential_id=cc_pair.credential_id, - ), - queue=DanswerCeleryQueues.CONNECTOR_DELETION, - task_id=custom_task_id, - priority=DanswerCeleryPriority.MEDIUM, - ) - - async_results.append(result) - - return len(async_results) def celery_get_queue_length(queue: str, r: Redis) -> int: diff --git a/backend/danswer/background/celery/celery_utils.py b/backend/danswer/background/celery/celery_utils.py index 9ee282e1af3..d0df7af02d7 100644 --- a/backend/danswer/background/celery/celery_utils.py +++ b/backend/danswer/background/celery/celery_utils.py @@ -4,37 +4,32 @@ from sqlalchemy.orm import Session -from danswer.background.celery.celery_redis import RedisConnectorDeletion -from danswer.background.task_utils import name_cc_prune_task -from danswer.configs.app_configs import ALLOW_SIMULTANEOUS_PRUNING +from danswer.background.indexing.run_indexing import RunIndexingCallbackInterface from danswer.configs.app_configs import MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE from danswer.connectors.cross_connector_utils.rate_limit_wrapper import ( rate_limit_builder, ) from danswer.connectors.interfaces import BaseConnector -from danswer.connectors.interfaces import IdConnector from danswer.connectors.interfaces import LoadConnector from danswer.connectors.interfaces import PollConnector +from danswer.connectors.interfaces import SlimConnector from danswer.connectors.models import Document from danswer.db.connector_credential_pair import get_connector_credential_pair -from danswer.db.engine import get_db_current_time from danswer.db.enums import TaskStatus -from danswer.db.models import Connector -from danswer.db.models import Credential from danswer.db.models import TaskQueueState -from danswer.db.tasks import check_task_is_live_and_not_timed_out -from danswer.db.tasks import get_latest_task -from danswer.db.tasks import get_latest_task_by_type -from danswer.redis.redis_pool import RedisPool +from danswer.redis.redis_connector import RedisConnector from danswer.server.documents.models import DeletionAttemptSnapshot from danswer.utils.logger import setup_logger + logger = setup_logger() -redis_pool = RedisPool() def _get_deletion_status( - connector_id: int, credential_id: int, db_session: Session + connector_id: int, + credential_id: int, + db_session: Session, + tenant_id: str | None = None, ) -> TaskQueueState | None: """We no longer store TaskQueueState in the DB for a deletion attempt. This function populates TaskQueueState by just checking redis. @@ -45,21 +40,26 @@ def _get_deletion_status( if not cc_pair: return None - rcd = RedisConnectorDeletion(cc_pair.id) - - r = redis_pool.get_client() - if not r.exists(rcd.fence_key): + redis_connector = RedisConnector(tenant_id, cc_pair.id) + if not redis_connector.delete.fenced: return None return TaskQueueState( - task_id="", task_name=rcd.fence_key, status=TaskStatus.STARTED + task_id="", + task_name=redis_connector.delete.fence_key, + status=TaskStatus.STARTED, ) def get_deletion_attempt_snapshot( - connector_id: int, credential_id: int, db_session: Session + connector_id: int, + credential_id: int, + db_session: Session, + tenant_id: str | None = None, ) -> DeletionAttemptSnapshot | None: - deletion_task = _get_deletion_status(connector_id, credential_id, db_session) + deletion_task = _get_deletion_status( + connector_id, credential_id, db_session, tenant_id + ) if not deletion_task: return None @@ -70,79 +70,31 @@ def get_deletion_attempt_snapshot( ) -def skip_cc_pair_pruning_by_task( - pruning_task: TaskQueueState | None, db_session: Session -) -> bool: - """task should be the latest prune task for this cc_pair""" - if not ALLOW_SIMULTANEOUS_PRUNING: - # if only one prune is allowed at any time, then check to see if any prune - # is active - pruning_type_task_name = name_cc_prune_task() - last_pruning_type_task = get_latest_task_by_type( - pruning_type_task_name, db_session - ) - - if last_pruning_type_task and check_task_is_live_and_not_timed_out( - last_pruning_type_task, db_session - ): - return True - - if pruning_task and check_task_is_live_and_not_timed_out(pruning_task, db_session): - # if the last task is live right now, we shouldn't start a new one - return True - - return False - - -def should_prune_cc_pair( - connector: Connector, credential: Credential, db_session: Session -) -> bool: - if not connector.prune_freq: - return False - - pruning_task_name = name_cc_prune_task( - connector_id=connector.id, credential_id=credential.id - ) - last_pruning_task = get_latest_task(pruning_task_name, db_session) - - if skip_cc_pair_pruning_by_task(last_pruning_task, db_session): - return False - - current_db_time = get_db_current_time(db_session) - - if not last_pruning_task: - # If the connector has never been pruned, then compare vs when the connector - # was created - time_since_initialization = current_db_time - connector.time_created - if time_since_initialization.total_seconds() >= connector.prune_freq: - return True - return False - - if not last_pruning_task.start_time: - # if the last prune task hasn't started, we shouldn't start a new one - return False - - # if the last prune task has a start time, then compare against it to determine - # if we should start - time_since_last_pruning = current_db_time - last_pruning_task.start_time - return time_since_last_pruning.total_seconds() >= connector.prune_freq - - -def document_batch_to_ids(doc_batch: list[Document]) -> set[str]: +def document_batch_to_ids( + doc_batch: list[Document], +) -> set[str]: return {doc.id for doc in doc_batch} -def extract_ids_from_runnable_connector(runnable_connector: BaseConnector) -> set[str]: +def extract_ids_from_runnable_connector( + runnable_connector: BaseConnector, + callback: RunIndexingCallbackInterface | None = None, +) -> set[str]: """ If the PruneConnector hasnt been implemented for the given connector, just pull - all docs using the load_from_state and grab out the IDs + all docs using the load_from_state and grab out the IDs. + + Optionally, a callback can be passed to handle the length of each document batch. """ all_connector_doc_ids: set[str] = set() + if isinstance(runnable_connector, SlimConnector): + for metadata_batch in runnable_connector.retrieve_all_slim_documents(): + all_connector_doc_ids.update({doc.id for doc in metadata_batch}) + doc_batch_generator = None - if isinstance(runnable_connector, IdConnector): - all_connector_doc_ids = runnable_connector.retrieve_all_source_ids() - elif isinstance(runnable_connector, LoadConnector): + + if isinstance(runnable_connector, LoadConnector): doc_batch_generator = runnable_connector.load_from_state() elif isinstance(runnable_connector, PollConnector): start = datetime(1970, 1, 1, tzinfo=timezone.utc).timestamp() @@ -151,14 +103,17 @@ def extract_ids_from_runnable_connector(runnable_connector: BaseConnector) -> se else: raise RuntimeError("Pruning job could not find a valid runnable_connector.") - if doc_batch_generator: - doc_batch_processing_func = document_batch_to_ids - if MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE: - doc_batch_processing_func = rate_limit_builder( - max_calls=MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE, period=60 - )(document_batch_to_ids) - for doc_batch in doc_batch_generator: - all_connector_doc_ids.update(doc_batch_processing_func(doc_batch)) + doc_batch_processing_func = document_batch_to_ids + if MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE: + doc_batch_processing_func = rate_limit_builder( + max_calls=MAX_PRUNING_DOCUMENT_RETRIEVAL_PER_MINUTE, period=60 + )(document_batch_to_ids) + for doc_batch in doc_batch_generator: + if callback: + if callback.should_stop(): + raise RuntimeError("Stop signal received") + callback.progress(len(doc_batch)) + all_connector_doc_ids.update(doc_batch_processing_func(doc_batch)) return all_connector_doc_ids @@ -177,14 +132,12 @@ def celery_is_listening_to_queue(worker: Any, name: str) -> bool: def celery_is_worker_primary(worker: Any) -> bool: - """There are multiple approaches that could be taken, but the way we do it is to - check the hostname set for the celery worker, either in celeryconfig.py or on the - command line.""" + """There are multiple approaches that could be taken to determine if a celery worker + is 'primary', as defined by us. But the way we do it is to check the hostname set + for the celery worker, which can be done on the + command line with '--hostname'.""" hostname = worker.hostname - if hostname.startswith("light"): - return False - - if hostname.startswith("heavy"): - return False + if hostname.startswith("primary"): + return True - return True + return False diff --git a/backend/danswer/background/celery/celeryconfig.py b/backend/danswer/background/celery/configs/base.py similarity index 81% rename from backend/danswer/background/celery/celeryconfig.py rename to backend/danswer/background/celery/configs/base.py index 1b1aa092d17..7fc6e67d7a7 100644 --- a/backend/danswer/background/celery/celeryconfig.py +++ b/backend/danswer/background/celery/configs/base.py @@ -1,4 +1,6 @@ # docs: https://docs.celeryq.dev/en/stable/userguide/configuration.html +import urllib.parse + from danswer.configs.app_configs import CELERY_BROKER_POOL_LIMIT from danswer.configs.app_configs import CELERY_RESULT_EXPIRES from danswer.configs.app_configs import REDIS_DB_NUMBER_CELERY @@ -17,7 +19,7 @@ CELERY_PASSWORD_PART = "" if REDIS_PASSWORD: - CELERY_PASSWORD_PART = f":{REDIS_PASSWORD}@" + CELERY_PASSWORD_PART = ":" + urllib.parse.quote(REDIS_PASSWORD, safe="") + "@" REDIS_SCHEME = "redis" @@ -29,16 +31,10 @@ if REDIS_SSL_CA_CERTS: SSL_QUERY_PARAMS += f"&ssl_ca_certs={REDIS_SSL_CA_CERTS}" +# region Broker settings # example celery_broker_url: "redis://:password@localhost:6379/15" broker_url = f"{REDIS_SCHEME}://{CELERY_PASSWORD_PART}{REDIS_HOST}:{REDIS_PORT}/{REDIS_DB_NUMBER_CELERY}{SSL_QUERY_PARAMS}" -result_backend = f"{REDIS_SCHEME}://{CELERY_PASSWORD_PART}{REDIS_HOST}:{REDIS_PORT}/{REDIS_DB_NUMBER_CELERY_RESULT_BACKEND}{SSL_QUERY_PARAMS}" - -# NOTE: prefetch 4 is significantly faster than prefetch 1 for small tasks -# however, prefetching is bad when tasks are lengthy as those tasks -# can stall other tasks. -worker_prefetch_multiplier = 4 - broker_connection_retry_on_startup = True broker_pool_limit = CELERY_BROKER_POOL_LIMIT @@ -53,6 +49,16 @@ "socket_keepalive": True, "socket_keepalive_options": REDIS_SOCKET_KEEPALIVE_OPTIONS, } +# endregion + +# redis backend settings +# https://docs.celeryq.dev/en/stable/userguide/configuration.html#redis-backend-settings + +# there doesn't appear to be a way to set socket_keepalive_options on the redis result backend +redis_socket_keepalive = True +redis_retry_on_timeout = True +redis_backend_health_check_interval = REDIS_HEALTH_CHECK_INTERVAL + # redis backend settings # https://docs.celeryq.dev/en/stable/userguide/configuration.html#redis-backend-settings @@ -66,10 +72,19 @@ task_default_priority = DanswerCeleryPriority.MEDIUM task_acks_late = True +# region Task result backend settings # It's possible we don't even need celery's result backend, in which case all of the optimization below # might be irrelevant +result_backend = f"{REDIS_SCHEME}://{CELERY_PASSWORD_PART}{REDIS_HOST}:{REDIS_PORT}/{REDIS_DB_NUMBER_CELERY_RESULT_BACKEND}{SSL_QUERY_PARAMS}" result_expires = CELERY_RESULT_EXPIRES # 86400 seconds is the default +# endregion + +# Leaving this to the default of True may cause double logging since both our own app +# and celery think they are controlling the logger. +# TODO: Configure celery's logger entirely manually and set this to False +# worker_hijack_root_logger = False +# region Notes on serialization performance # Option 0: Defaults (json serializer, no compression) # about 1.5 KB per queued task. 1KB in queue, 400B for result, 100 as a child entry in generator result @@ -95,3 +110,4 @@ # task_serializer = "pickle-bzip2" # result_serializer = "pickle-bzip2" # accept_content=["pickle", "pickle-bzip2"] +# endregion diff --git a/backend/danswer/background/celery/configs/beat.py b/backend/danswer/background/celery/configs/beat.py new file mode 100644 index 00000000000..ef8b21c386f --- /dev/null +++ b/backend/danswer/background/celery/configs/beat.py @@ -0,0 +1,14 @@ +# docs: https://docs.celeryq.dev/en/stable/userguide/configuration.html +import danswer.background.celery.configs.base as shared_config + +broker_url = shared_config.broker_url +broker_connection_retry_on_startup = shared_config.broker_connection_retry_on_startup +broker_pool_limit = shared_config.broker_pool_limit +broker_transport_options = shared_config.broker_transport_options + +redis_socket_keepalive = shared_config.redis_socket_keepalive +redis_retry_on_timeout = shared_config.redis_retry_on_timeout +redis_backend_health_check_interval = shared_config.redis_backend_health_check_interval + +result_backend = shared_config.result_backend +result_expires = shared_config.result_expires # 86400 seconds is the default diff --git a/backend/danswer/background/celery/configs/heavy.py b/backend/danswer/background/celery/configs/heavy.py new file mode 100644 index 00000000000..2d1c65aa86e --- /dev/null +++ b/backend/danswer/background/celery/configs/heavy.py @@ -0,0 +1,20 @@ +import danswer.background.celery.configs.base as shared_config + +broker_url = shared_config.broker_url +broker_connection_retry_on_startup = shared_config.broker_connection_retry_on_startup +broker_pool_limit = shared_config.broker_pool_limit +broker_transport_options = shared_config.broker_transport_options + +redis_socket_keepalive = shared_config.redis_socket_keepalive +redis_retry_on_timeout = shared_config.redis_retry_on_timeout +redis_backend_health_check_interval = shared_config.redis_backend_health_check_interval + +result_backend = shared_config.result_backend +result_expires = shared_config.result_expires # 86400 seconds is the default + +task_default_priority = shared_config.task_default_priority +task_acks_late = shared_config.task_acks_late + +worker_concurrency = 4 +worker_pool = "threads" +worker_prefetch_multiplier = 1 diff --git a/backend/danswer/background/celery/configs/indexing.py b/backend/danswer/background/celery/configs/indexing.py new file mode 100644 index 00000000000..d2b1b99baa9 --- /dev/null +++ b/backend/danswer/background/celery/configs/indexing.py @@ -0,0 +1,21 @@ +import danswer.background.celery.configs.base as shared_config +from danswer.configs.app_configs import CELERY_WORKER_INDEXING_CONCURRENCY + +broker_url = shared_config.broker_url +broker_connection_retry_on_startup = shared_config.broker_connection_retry_on_startup +broker_pool_limit = shared_config.broker_pool_limit +broker_transport_options = shared_config.broker_transport_options + +redis_socket_keepalive = shared_config.redis_socket_keepalive +redis_retry_on_timeout = shared_config.redis_retry_on_timeout +redis_backend_health_check_interval = shared_config.redis_backend_health_check_interval + +result_backend = shared_config.result_backend +result_expires = shared_config.result_expires # 86400 seconds is the default + +task_default_priority = shared_config.task_default_priority +task_acks_late = shared_config.task_acks_late + +worker_concurrency = CELERY_WORKER_INDEXING_CONCURRENCY +worker_pool = "threads" +worker_prefetch_multiplier = 1 diff --git a/backend/danswer/background/celery/configs/light.py b/backend/danswer/background/celery/configs/light.py new file mode 100644 index 00000000000..f75ddfd0fb5 --- /dev/null +++ b/backend/danswer/background/celery/configs/light.py @@ -0,0 +1,22 @@ +import danswer.background.celery.configs.base as shared_config +from danswer.configs.app_configs import CELERY_WORKER_LIGHT_CONCURRENCY +from danswer.configs.app_configs import CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER + +broker_url = shared_config.broker_url +broker_connection_retry_on_startup = shared_config.broker_connection_retry_on_startup +broker_pool_limit = shared_config.broker_pool_limit +broker_transport_options = shared_config.broker_transport_options + +redis_socket_keepalive = shared_config.redis_socket_keepalive +redis_retry_on_timeout = shared_config.redis_retry_on_timeout +redis_backend_health_check_interval = shared_config.redis_backend_health_check_interval + +result_backend = shared_config.result_backend +result_expires = shared_config.result_expires # 86400 seconds is the default + +task_default_priority = shared_config.task_default_priority +task_acks_late = shared_config.task_acks_late + +worker_concurrency = CELERY_WORKER_LIGHT_CONCURRENCY +worker_pool = "threads" +worker_prefetch_multiplier = CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER diff --git a/backend/danswer/background/celery/configs/primary.py b/backend/danswer/background/celery/configs/primary.py new file mode 100644 index 00000000000..2d1c65aa86e --- /dev/null +++ b/backend/danswer/background/celery/configs/primary.py @@ -0,0 +1,20 @@ +import danswer.background.celery.configs.base as shared_config + +broker_url = shared_config.broker_url +broker_connection_retry_on_startup = shared_config.broker_connection_retry_on_startup +broker_pool_limit = shared_config.broker_pool_limit +broker_transport_options = shared_config.broker_transport_options + +redis_socket_keepalive = shared_config.redis_socket_keepalive +redis_retry_on_timeout = shared_config.redis_retry_on_timeout +redis_backend_health_check_interval = shared_config.redis_backend_health_check_interval + +result_backend = shared_config.result_backend +result_expires = shared_config.result_expires # 86400 seconds is the default + +task_default_priority = shared_config.task_default_priority +task_acks_late = shared_config.task_acks_late + +worker_concurrency = 4 +worker_pool = "threads" +worker_prefetch_multiplier = 1 diff --git a/backend/danswer/background/celery/tasks/beat_schedule.py b/backend/danswer/background/celery/tasks/beat_schedule.py new file mode 100644 index 00000000000..6a20c6ba5c1 --- /dev/null +++ b/backend/danswer/background/celery/tasks/beat_schedule.py @@ -0,0 +1,48 @@ +from datetime import timedelta +from typing import Any + +from danswer.configs.constants import DanswerCeleryPriority + + +tasks_to_schedule = [ + { + "name": "check-for-vespa-sync", + "task": "check_for_vespa_sync_task", + "schedule": timedelta(seconds=5), + "options": {"priority": DanswerCeleryPriority.HIGH}, + }, + { + "name": "check-for-connector-deletion", + "task": "check_for_connector_deletion_task", + "schedule": timedelta(seconds=20), + "options": {"priority": DanswerCeleryPriority.HIGH}, + }, + { + "name": "check-for-indexing", + "task": "check_for_indexing", + "schedule": timedelta(seconds=10), + "options": {"priority": DanswerCeleryPriority.HIGH}, + }, + { + "name": "check-for-prune", + "task": "check_for_pruning", + "schedule": timedelta(seconds=10), + "options": {"priority": DanswerCeleryPriority.HIGH}, + }, + { + "name": "kombu-message-cleanup", + "task": "kombu_message_cleanup_task", + "schedule": timedelta(seconds=3600), + "options": {"priority": DanswerCeleryPriority.LOWEST}, + }, + { + "name": "monitor-vespa-sync", + "task": "monitor_vespa_sync", + "schedule": timedelta(seconds=5), + "options": {"priority": DanswerCeleryPriority.HIGH}, + }, +] + + +def get_tasks_to_schedule() -> list[dict[str, Any]]: + return tasks_to_schedule diff --git a/backend/danswer/background/celery/tasks/connector_deletion/tasks.py b/backend/danswer/background/celery/tasks/connector_deletion/tasks.py index 655487f7168..360481015bb 100644 --- a/backend/danswer/background/celery/tasks/connector_deletion/tasks.py +++ b/backend/danswer/background/celery/tasks/connector_deletion/tasks.py @@ -1,38 +1,41 @@ +from datetime import datetime +from datetime import timezone + import redis +from celery import Celery from celery import shared_task +from celery import Task from celery.exceptions import SoftTimeLimitExceeded -from celery.utils.log import get_task_logger from redis import Redis from sqlalchemy.orm import Session -from sqlalchemy.orm.exc import ObjectDeletedError -from danswer.background.celery.celery_app import celery_app -from danswer.background.celery.celery_redis import RedisConnectorDeletion +from danswer.background.celery.apps.app_base import task_logger from danswer.configs.app_configs import JOB_TIMEOUT from danswer.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT from danswer.configs.constants import DanswerRedisLocks +from danswer.db.connector_credential_pair import get_connector_credential_pair_from_id from danswer.db.connector_credential_pair import get_connector_credential_pairs -from danswer.db.engine import get_sqlalchemy_engine +from danswer.db.engine import get_session_with_tenant from danswer.db.enums import ConnectorCredentialPairStatus -from danswer.db.enums import IndexingStatus -from danswer.db.index_attempt import get_last_attempt -from danswer.db.models import ConnectorCredentialPair -from danswer.db.search_settings import get_current_search_settings -from danswer.redis.redis_pool import RedisPool +from danswer.db.search_settings import get_all_search_settings +from danswer.redis.redis_connector import RedisConnector +from danswer.redis.redis_connector_delete import RedisConnectorDeletionFenceData +from danswer.redis.redis_pool import get_redis_client -redis_pool = RedisPool() -# use this within celery tasks to get celery task specific logging -task_logger = get_task_logger(__name__) +class TaskDependencyError(RuntimeError): + """Raised to the caller to indicate dependent tasks are running that would interfere + with connector deletion.""" @shared_task( name="check_for_connector_deletion_task", soft_time_limit=JOB_TIMEOUT, trail=False, + bind=True, ) -def check_for_connector_deletion_task() -> None: - r = redis_pool.get_client() +def check_for_connector_deletion_task(self: Task, *, tenant_id: str | None) -> None: + r = get_redis_client(tenant_id=tenant_id) lock_beat = r.lock( DanswerRedisLocks.CHECK_CONNECTOR_DELETION_BEAT_LOCK, @@ -44,90 +47,135 @@ def check_for_connector_deletion_task() -> None: if not lock_beat.acquire(blocking=False): return - with Session(get_sqlalchemy_engine()) as db_session: + # collect cc_pair_ids + cc_pair_ids: list[int] = [] + with get_session_with_tenant(tenant_id) as db_session: cc_pairs = get_connector_credential_pairs(db_session) for cc_pair in cc_pairs: - try_generate_document_cc_pair_cleanup_tasks( - cc_pair, db_session, r, lock_beat - ) + cc_pair_ids.append(cc_pair.id) + + # try running cleanup on the cc_pair_ids + for cc_pair_id in cc_pair_ids: + with get_session_with_tenant(tenant_id) as db_session: + redis_connector = RedisConnector(tenant_id, cc_pair_id) + try: + try_generate_document_cc_pair_cleanup_tasks( + self.app, cc_pair_id, db_session, r, lock_beat, tenant_id + ) + except TaskDependencyError as e: + # this means we wanted to start deleting but dependent tasks were running + # Leave a stop signal to clear indexing and pruning tasks more quickly + task_logger.info(str(e)) + redis_connector.stop.set_fence(True) + else: + # clear the stop signal if it exists ... no longer needed + redis_connector.stop.set_fence(False) + except SoftTimeLimitExceeded: task_logger.info( "Soft time limit exceeded, task is being terminated gracefully." ) except Exception: - task_logger.exception("Unexpected exception") + task_logger.exception(f"Unexpected exception: tenant={tenant_id}") finally: if lock_beat.owned(): lock_beat.release() def try_generate_document_cc_pair_cleanup_tasks( - cc_pair: ConnectorCredentialPair, + app: Celery, + cc_pair_id: int, db_session: Session, r: Redis, lock_beat: redis.lock.Lock, + tenant_id: str | None, ) -> int | None: """Returns an int if syncing is needed. The int represents the number of sync tasks generated. Note that syncing can still be required even if the number of sync tasks generated is zero. Returns None if no syncing is required. + + Will raise TaskDependencyError if dependent tasks such as indexing and pruning are + still running. In our case, the caller reacts by setting a stop signal in Redis to + exit those tasks as quickly as possible. """ lock_beat.reacquire() - rcd = RedisConnectorDeletion(cc_pair.id) + redis_connector = RedisConnector(tenant_id, cc_pair_id) # don't generate sync tasks if tasks are still pending - if r.exists(rcd.fence_key): + if redis_connector.delete.fenced: return None - # we need to refresh the state of the object inside the fence + # we need to load the state of the object inside the fence # to avoid a race condition with db.commit/fence deletion # at the end of this taskset - try: - db_session.refresh(cc_pair) - except ObjectDeletedError: + cc_pair = get_connector_credential_pair_from_id(cc_pair_id, db_session) + if not cc_pair: return None if cc_pair.status != ConnectorCredentialPairStatus.DELETING: return None - search_settings = get_current_search_settings(db_session) - - last_indexing = get_last_attempt( - connector_id=cc_pair.connector_id, - credential_id=cc_pair.credential_id, - search_settings_id=search_settings.id, - db_session=db_session, - ) - if last_indexing: - if ( - last_indexing.status == IndexingStatus.IN_PROGRESS - or last_indexing.status == IndexingStatus.NOT_STARTED - ): - return None - - # add tasks to celery and build up the task set to monitor in redis - r.delete(rcd.taskset_key) - - # Add all documents that need to be updated into the queue - task_logger.info( - f"RedisConnectorDeletion.generate_tasks starting. cc_pair_id={cc_pair.id}" + # set a basic fence to start + fence_payload = RedisConnectorDeletionFenceData( + num_tasks=None, + submitted=datetime.now(timezone.utc), ) - tasks_generated = rcd.generate_tasks(celery_app, db_session, r, lock_beat) - if tasks_generated is None: + + redis_connector.delete.set_fence(fence_payload) + + try: + # do not proceed if connector indexing or connector pruning are running + search_settings_list = get_all_search_settings(db_session) + for search_settings in search_settings_list: + redis_connector_index = redis_connector.new_index(search_settings.id) + if redis_connector_index.fenced: + raise TaskDependencyError( + f"Connector deletion - Delayed (indexing in progress): " + f"cc_pair={cc_pair_id} " + f"search_settings={search_settings.id}" + ) + + if redis_connector.prune.fenced: + raise TaskDependencyError( + f"Connector deletion - Delayed (pruning in progress): " + f"cc_pair={cc_pair_id}" + ) + + # add tasks to celery and build up the task set to monitor in redis + redis_connector.delete.taskset_clear() + + # Add all documents that need to be updated into the queue + task_logger.info( + f"RedisConnectorDeletion.generate_tasks starting. cc_pair={cc_pair_id}" + ) + tasks_generated = redis_connector.delete.generate_tasks( + app, db_session, lock_beat + ) + if tasks_generated is None: + raise ValueError("RedisConnectorDeletion.generate_tasks returned None") + except TaskDependencyError: + redis_connector.delete.set_fence(None) + raise + except Exception: + task_logger.exception("Unexpected exception") + redis_connector.delete.set_fence(None) return None + else: + # Currently we are allowing the sync to proceed with 0 tasks. + # It's possible for sets/groups to be generated initially with no entries + # and they still need to be marked as up to date. + # if tasks_generated == 0: + # return 0 - # Currently we are allowing the sync to proceed with 0 tasks. - # It's possible for sets/groups to be generated initially with no entries - # and they still need to be marked as up to date. - # if tasks_generated == 0: - # return 0 + task_logger.info( + f"RedisConnectorDeletion.generate_tasks finished. " + f"cc_pair={cc_pair_id} tasks_generated={tasks_generated}" + ) - task_logger.info( - f"RedisConnectorDeletion.generate_tasks finished. " - f"cc_pair_id={cc_pair.id} tasks_generated={tasks_generated}" - ) + # set this only after all tasks have been added + fence_payload.num_tasks = tasks_generated + redis_connector.delete.set_fence(fence_payload) - # set this only after all tasks have been added - r.set(rcd.fence_key, tasks_generated) return tasks_generated diff --git a/backend/danswer/background/celery/tasks/indexing/tasks.py b/backend/danswer/background/celery/tasks/indexing/tasks.py new file mode 100644 index 00000000000..cd6126223c4 --- /dev/null +++ b/backend/danswer/background/celery/tasks/indexing/tasks.py @@ -0,0 +1,637 @@ +from datetime import datetime +from datetime import timezone +from http import HTTPStatus +from time import sleep + +import redis +from celery import Celery +from celery import shared_task +from celery import Task +from celery.exceptions import SoftTimeLimitExceeded +from redis import Redis +from sqlalchemy.orm import Session + +from danswer.background.celery.apps.app_base import task_logger +from danswer.background.indexing.job_client import SimpleJobClient +from danswer.background.indexing.run_indexing import run_indexing_entrypoint +from danswer.background.indexing.run_indexing import RunIndexingCallbackInterface +from danswer.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP +from danswer.configs.constants import CELERY_INDEXING_LOCK_TIMEOUT +from danswer.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT +from danswer.configs.constants import DANSWER_REDIS_FUNCTION_LOCK_PREFIX +from danswer.configs.constants import DanswerCeleryPriority +from danswer.configs.constants import DanswerCeleryQueues +from danswer.configs.constants import DanswerRedisLocks +from danswer.configs.constants import DocumentSource +from danswer.db.connector_credential_pair import fetch_connector_credential_pairs +from danswer.db.connector_credential_pair import get_connector_credential_pair_from_id +from danswer.db.engine import get_db_current_time +from danswer.db.engine import get_session_with_tenant +from danswer.db.enums import ConnectorCredentialPairStatus +from danswer.db.enums import IndexingStatus +from danswer.db.enums import IndexModelStatus +from danswer.db.index_attempt import create_index_attempt +from danswer.db.index_attempt import get_index_attempt +from danswer.db.index_attempt import get_last_attempt_for_cc_pair +from danswer.db.index_attempt import mark_attempt_failed +from danswer.db.models import ConnectorCredentialPair +from danswer.db.models import IndexAttempt +from danswer.db.models import SearchSettings +from danswer.db.search_settings import get_current_search_settings +from danswer.db.search_settings import get_secondary_search_settings +from danswer.db.swap_index import check_index_swap +from danswer.natural_language_processing.search_nlp_models import EmbeddingModel +from danswer.natural_language_processing.search_nlp_models import warm_up_bi_encoder +from danswer.redis.redis_connector import RedisConnector +from danswer.redis.redis_connector_index import RedisConnectorIndexingFenceData +from danswer.redis.redis_pool import get_redis_client +from danswer.utils.logger import setup_logger +from danswer.utils.variable_functionality import global_version +from shared_configs.configs import INDEXING_MODEL_SERVER_HOST +from shared_configs.configs import INDEXING_MODEL_SERVER_PORT +from shared_configs.configs import MULTI_TENANT + +logger = setup_logger() + + +class RunIndexingCallback(RunIndexingCallbackInterface): + def __init__( + self, + stop_key: str, + generator_progress_key: str, + redis_lock: redis.lock.Lock, + redis_client: Redis, + ): + super().__init__() + self.redis_lock: redis.lock.Lock = redis_lock + self.stop_key: str = stop_key + self.generator_progress_key: str = generator_progress_key + self.redis_client = redis_client + + def should_stop(self) -> bool: + if self.redis_client.exists(self.stop_key): + return True + return False + + def progress(self, amount: int) -> None: + self.redis_lock.reacquire() + self.redis_client.incrby(self.generator_progress_key, amount) + + +@shared_task( + name="check_for_indexing", + soft_time_limit=300, + bind=True, +) +def check_for_indexing(self: Task, *, tenant_id: str | None) -> int | None: + tasks_created = 0 + + r = get_redis_client(tenant_id=tenant_id) + + lock_beat = r.lock( + DanswerRedisLocks.CHECK_INDEXING_BEAT_LOCK, + timeout=CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT, + ) + + try: + # these tasks should never overlap + if not lock_beat.acquire(blocking=False): + return None + + with get_session_with_tenant(tenant_id=tenant_id) as db_session: + old_search_settings = check_index_swap(db_session=db_session) + current_search_settings = get_current_search_settings(db_session) + # So that the first time users aren't surprised by really slow speed of first + # batch of documents indexed + if current_search_settings.provider_type is None and not MULTI_TENANT: + if old_search_settings: + embedding_model = EmbeddingModel.from_db_model( + search_settings=current_search_settings, + server_host=INDEXING_MODEL_SERVER_HOST, + server_port=INDEXING_MODEL_SERVER_PORT, + ) + + # only warm up if search settings were changed + warm_up_bi_encoder( + embedding_model=embedding_model, + ) + + cc_pair_ids: list[int] = [] + with get_session_with_tenant(tenant_id) as db_session: + cc_pairs = fetch_connector_credential_pairs(db_session) + for cc_pair_entry in cc_pairs: + cc_pair_ids.append(cc_pair_entry.id) + + for cc_pair_id in cc_pair_ids: + redis_connector = RedisConnector(tenant_id, cc_pair_id) + with get_session_with_tenant(tenant_id) as db_session: + # Get the primary search settings + primary_search_settings = get_current_search_settings(db_session) + search_settings = [primary_search_settings] + + # Check for secondary search settings + secondary_search_settings = get_secondary_search_settings(db_session) + if secondary_search_settings is not None: + # If secondary settings exist, add them to the list + search_settings.append(secondary_search_settings) + + for search_settings_instance in search_settings: + redis_connector_index = redis_connector.new_index( + search_settings_instance.id + ) + if redis_connector_index.fenced: + continue + + cc_pair = get_connector_credential_pair_from_id( + cc_pair_id, db_session + ) + if not cc_pair: + continue + + last_attempt = get_last_attempt_for_cc_pair( + cc_pair.id, search_settings_instance.id, db_session + ) + if not _should_index( + cc_pair=cc_pair, + last_index=last_attempt, + search_settings_instance=search_settings_instance, + secondary_index_building=len(search_settings) > 1, + db_session=db_session, + ): + continue + + # using a task queue and only allowing one task per cc_pair/search_setting + # prevents us from starving out certain attempts + attempt_id = try_creating_indexing_task( + self.app, + cc_pair, + search_settings_instance, + False, + db_session, + r, + tenant_id, + ) + if attempt_id: + task_logger.info( + f"Indexing queued: index_attempt={attempt_id} " + f"cc_pair={cc_pair.id} " + f"search_settings={search_settings_instance.id} " + ) + tasks_created += 1 + except SoftTimeLimitExceeded: + task_logger.info( + "Soft time limit exceeded, task is being terminated gracefully." + ) + except Exception: + task_logger.exception(f"Unexpected exception: tenant={tenant_id}") + finally: + if lock_beat.owned(): + lock_beat.release() + + return tasks_created + + +def _should_index( + cc_pair: ConnectorCredentialPair, + last_index: IndexAttempt | None, + search_settings_instance: SearchSettings, + secondary_index_building: bool, + db_session: Session, +) -> bool: + """Checks various global settings and past indexing attempts to determine if + we should try to start indexing the cc pair / search setting combination. + + Note that tactical checks such as preventing overlap with a currently running task + are not handled here. + + Return True if we should try to index, False if not. + """ + connector = cc_pair.connector + + # uncomment for debugging + # task_logger.info(f"_should_index: " + # f"cc_pair={cc_pair.id} " + # f"connector={cc_pair.connector_id} " + # f"refresh_freq={connector.refresh_freq}") + + # don't kick off indexing for `NOT_APPLICABLE` sources + if connector.source == DocumentSource.NOT_APPLICABLE: + return False + + # User can still manually create single indexing attempts via the UI for the + # currently in use index + if DISABLE_INDEX_UPDATE_ON_SWAP: + if ( + search_settings_instance.status == IndexModelStatus.PRESENT + and secondary_index_building + ): + return False + + # When switching over models, always index at least once + if search_settings_instance.status == IndexModelStatus.FUTURE: + if last_index: + # No new index if the last index attempt succeeded + # Once is enough. The model will never be able to swap otherwise. + if last_index.status == IndexingStatus.SUCCESS: + return False + + # No new index if the last index attempt is waiting to start + if last_index.status == IndexingStatus.NOT_STARTED: + return False + + # No new index if the last index attempt is running + if last_index.status == IndexingStatus.IN_PROGRESS: + return False + else: + if ( + connector.id == 0 or connector.source == DocumentSource.INGESTION_API + ): # Ingestion API + return False + return True + + # If the connector is paused or is the ingestion API, don't index + # NOTE: during an embedding model switch over, the following logic + # is bypassed by the above check for a future model + if ( + not cc_pair.status.is_active() + or connector.id == 0 + or connector.source == DocumentSource.INGESTION_API + ): + return False + + # if no attempt has ever occurred, we should index regardless of refresh_freq + if not last_index: + return True + + if connector.refresh_freq is None: + return False + + current_db_time = get_db_current_time(db_session) + time_since_index = current_db_time - last_index.time_updated + if time_since_index.total_seconds() < connector.refresh_freq: + return False + + return True + + +def try_creating_indexing_task( + celery_app: Celery, + cc_pair: ConnectorCredentialPair, + search_settings: SearchSettings, + reindex: bool, + db_session: Session, + r: Redis, + tenant_id: str | None, +) -> int | None: + """Checks for any conditions that should block the indexing task from being + created, then creates the task. + + Does not check for scheduling related conditions as this function + is used to trigger indexing immediately. + """ + + LOCK_TIMEOUT = 30 + + # we need to serialize any attempt to trigger indexing since it can be triggered + # either via celery beat or manually (API call) + lock = r.lock( + DANSWER_REDIS_FUNCTION_LOCK_PREFIX + "try_creating_indexing_task", + timeout=LOCK_TIMEOUT, + ) + + acquired = lock.acquire(blocking_timeout=LOCK_TIMEOUT / 2) + if not acquired: + return None + + try: + redis_connector = RedisConnector(tenant_id, cc_pair.id) + redis_connector_index = redis_connector.new_index(search_settings.id) + + # skip if already indexing + if redis_connector_index.fenced: + return None + + # skip indexing if the cc_pair is deleting + if redis_connector.delete.fenced: + return None + + db_session.refresh(cc_pair) + if cc_pair.status == ConnectorCredentialPairStatus.DELETING: + return None + + # add a long running generator task to the queue + redis_connector_index.generator_clear() + + # set a basic fence to start + payload = RedisConnectorIndexingFenceData( + index_attempt_id=None, + started=None, + submitted=datetime.now(timezone.utc), + celery_task_id=None, + ) + + redis_connector_index.set_fence(payload) + + # create the index attempt for tracking purposes + # code elsewhere checks for index attempts without an associated redis key + # and cleans them up + # therefore we must create the attempt and the task after the fence goes up + index_attempt_id = create_index_attempt( + cc_pair.id, + search_settings.id, + from_beginning=reindex, + db_session=db_session, + ) + + custom_task_id = redis_connector_index.generate_generator_task_id() + + result = celery_app.send_task( + "connector_indexing_proxy_task", + kwargs=dict( + index_attempt_id=index_attempt_id, + cc_pair_id=cc_pair.id, + search_settings_id=search_settings.id, + tenant_id=tenant_id, + ), + queue=DanswerCeleryQueues.CONNECTOR_INDEXING, + task_id=custom_task_id, + priority=DanswerCeleryPriority.MEDIUM, + ) + if not result: + raise RuntimeError("send_task for connector_indexing_proxy_task failed.") + + # now fill out the fence with the rest of the data + payload.index_attempt_id = index_attempt_id + payload.celery_task_id = result.id + redis_connector_index.set_fence(payload) + + except Exception: + redis_connector_index.set_fence(payload) + task_logger.exception( + f"Unexpected exception: " + f"tenant={tenant_id} " + f"cc_pair={cc_pair.id} " + f"search_settings={search_settings.id}" + ) + return None + finally: + if lock.owned(): + lock.release() + + return index_attempt_id + + +@shared_task(name="connector_indexing_proxy_task", acks_late=False, track_started=True) +def connector_indexing_proxy_task( + index_attempt_id: int, + cc_pair_id: int, + search_settings_id: int, + tenant_id: str | None, +) -> None: + """celery tasks are forked, but forking is unstable. This proxies work to a spawned task.""" + task_logger.info( + f"Indexing proxy - starting: attempt={index_attempt_id} " + f"tenant={tenant_id} " + f"cc_pair={cc_pair_id} " + f"search_settings={search_settings_id}" + ) + client = SimpleJobClient() + + job = client.submit( + connector_indexing_task, + index_attempt_id, + cc_pair_id, + search_settings_id, + tenant_id, + global_version.is_ee_version(), + pure=False, + ) + + if not job: + task_logger.info( + f"Indexing proxy - spawn failed: attempt={index_attempt_id} " + f"tenant={tenant_id} " + f"cc_pair={cc_pair_id} " + f"search_settings={search_settings_id}" + ) + return + + task_logger.info( + f"Indexing proxy - spawn succeeded: attempt={index_attempt_id} " + f"tenant={tenant_id} " + f"cc_pair={cc_pair_id} " + f"search_settings={search_settings_id}" + ) + + while True: + sleep(10) + + # do nothing for ongoing jobs that haven't been stopped + if not job.done(): + with get_session_with_tenant(tenant_id) as db_session: + index_attempt = get_index_attempt( + db_session=db_session, index_attempt_id=index_attempt_id + ) + + if not index_attempt: + continue + + if not index_attempt.is_finished(): + continue + + if job.status == "error": + task_logger.error( + f"Indexing proxy - spawned task exceptioned: " + f"attempt={index_attempt_id} " + f"tenant={tenant_id} " + f"cc_pair={cc_pair_id} " + f"search_settings={search_settings_id} " + f"error={job.exception()}" + ) + + job.release() + break + + task_logger.info( + f"Indexing proxy - finished: attempt={index_attempt_id} " + f"tenant={tenant_id} " + f"cc_pair={cc_pair_id} " + f"search_settings={search_settings_id}" + ) + return + + +def connector_indexing_task( + index_attempt_id: int, + cc_pair_id: int, + search_settings_id: int, + tenant_id: str | None, + is_ee: bool, +) -> int | None: + """Indexing task. For a cc pair, this task pulls all document IDs from the source + and compares those IDs to locally stored documents and deletes all locally stored IDs missing + from the most recently pulled document ID list + + acks_late must be set to False. Otherwise, celery's visibility timeout will + cause any task that runs longer than the timeout to be redispatched by the broker. + There appears to be no good workaround for this, so we need to handle redispatching + manually. + + Returns None if the task did not run (possibly due to a conflict). + Otherwise, returns an int >= 0 representing the number of indexed docs. + + NOTE: if an exception is raised out of this task, the primary worker will detect + that the task transitioned to a "READY" state but the generator_complete_key doesn't exist. + This will cause the primary worker to abort the indexing attempt and clean up. + """ + logger.info( + f"Indexing spawned task starting: attempt={index_attempt_id} " + f"tenant={tenant_id} " + f"cc_pair={cc_pair_id} " + f"search_settings={search_settings_id}" + ) + + attempt_found = False + n_final_progress: int | None = None + + redis_connector = RedisConnector(tenant_id, cc_pair_id) + redis_connector_index = redis_connector.new_index(search_settings_id) + + r = get_redis_client(tenant_id=tenant_id) + + if redis_connector.delete.fenced: + raise RuntimeError( + f"Indexing will not start because connector deletion is in progress: " + f"cc_pair={cc_pair_id} " + f"fence={redis_connector.delete.fence_key}" + ) + + if redis_connector.stop.fenced: + raise RuntimeError( + f"Indexing will not start because a connector stop signal was detected: " + f"cc_pair={cc_pair_id} " + f"fence={redis_connector.stop.fence_key}" + ) + + while True: + # wait for the fence to come up + if not redis_connector_index.fenced: + raise ValueError( + f"connector_indexing_task - fence not found: fence={redis_connector_index.fence_key}" + ) + + payload = redis_connector_index.payload + if not payload: + raise ValueError("connector_indexing_task: payload invalid or not found") + + if payload.index_attempt_id is None or payload.celery_task_id is None: + logger.info( + f"connector_indexing_task - Waiting for fence: fence={redis_connector_index.fence_key}" + ) + sleep(1) + continue + + if payload.index_attempt_id != index_attempt_id: + raise ValueError( + f"connector_indexing_task - id mismatch. Task may be left over from previous run.: " + f"task_index_attempt={index_attempt_id} " + f"payload_index_attempt={payload.index_attempt_id}" + ) + + logger.info( + f"connector_indexing_task - Fence found, continuing...: fence={redis_connector_index.fence_key}" + ) + break + + lock = r.lock( + redis_connector_index.generator_lock_key, + timeout=CELERY_INDEXING_LOCK_TIMEOUT, + ) + + acquired = lock.acquire(blocking=False) + if not acquired: + logger.warning( + f"Indexing task already running, exiting...: " + f"cc_pair={cc_pair_id} search_settings={search_settings_id}" + ) + return None + + payload.started = datetime.now(timezone.utc) + redis_connector_index.set_fence(payload) + + try: + with get_session_with_tenant(tenant_id) as db_session: + attempt = get_index_attempt(db_session, index_attempt_id) + if not attempt: + raise ValueError( + f"Index attempt not found: index_attempt={index_attempt_id}" + ) + attempt_found = True + + cc_pair = get_connector_credential_pair_from_id( + cc_pair_id=cc_pair_id, + db_session=db_session, + ) + + if not cc_pair: + raise ValueError(f"cc_pair not found: cc_pair={cc_pair_id}") + + if not cc_pair.connector: + raise ValueError( + f"Connector not found: cc_pair={cc_pair_id} connector={cc_pair.connector_id}" + ) + + if not cc_pair.credential: + raise ValueError( + f"Credential not found: cc_pair={cc_pair_id} credential={cc_pair.credential_id}" + ) + + # define a callback class + callback = RunIndexingCallback( + redis_connector.stop.fence_key, + redis_connector_index.generator_progress_key, + lock, + r, + ) + + logger.info( + f"Indexing spawned task running entrypoint: attempt={index_attempt_id} " + f"tenant={tenant_id} " + f"cc_pair={cc_pair_id} " + f"search_settings={search_settings_id}" + ) + + run_indexing_entrypoint( + index_attempt_id, + tenant_id, + cc_pair_id, + is_ee, + callback=callback, + ) + + # get back the total number of indexed docs and return it + n_final_progress = redis_connector_index.get_progress() + redis_connector_index.set_generator_complete(HTTPStatus.OK.value) + except Exception as e: + logger.exception( + f"Indexing spawned task failed: attempt={index_attempt_id} " + f"tenant={tenant_id} " + f"cc_pair={cc_pair_id} " + f"search_settings={search_settings_id}" + ) + if attempt_found: + with get_session_with_tenant(tenant_id) as db_session: + mark_attempt_failed(index_attempt_id, db_session, failure_reason=str(e)) + + raise e + finally: + if lock.owned(): + lock.release() + + logger.info( + f"Indexing spawned task finished: attempt={index_attempt_id} " + f"tenant={tenant_id} " + f"cc_pair={cc_pair_id} " + f"search_settings={search_settings_id}" + ) + return n_final_progress diff --git a/backend/danswer/background/celery/tasks/periodic/tasks.py b/backend/danswer/background/celery/tasks/periodic/tasks.py index bd3b082aeb8..20baa7c52fa 100644 --- a/backend/danswer/background/celery/tasks/periodic/tasks.py +++ b/backend/danswer/background/celery/tasks/periodic/tasks.py @@ -7,17 +7,14 @@ from celery import shared_task from celery.contrib.abortable import AbortableTask # type: ignore from celery.exceptions import TaskRevokedError -from celery.utils.log import get_task_logger from sqlalchemy import inspect from sqlalchemy import text from sqlalchemy.orm import Session +from danswer.background.celery.apps.app_base import task_logger from danswer.configs.app_configs import JOB_TIMEOUT from danswer.configs.constants import PostgresAdvisoryLocks -from danswer.db.engine import get_sqlalchemy_engine # type: ignore - -# use this within celery tasks to get celery task specific logging -task_logger = get_task_logger(__name__) +from danswer.db.engine import get_session_with_tenant @shared_task( @@ -26,7 +23,7 @@ bind=True, base=AbortableTask, ) -def kombu_message_cleanup_task(self: Any) -> int: +def kombu_message_cleanup_task(self: Any, tenant_id: str | None) -> int: """Runs periodically to clean up the kombu_message table""" # we will select messages older than this amount to clean up @@ -38,7 +35,7 @@ def kombu_message_cleanup_task(self: Any) -> int: ctx["deleted"] = 0 ctx["cleanup_age"] = KOMBU_MESSAGE_CLEANUP_AGE ctx["page_limit"] = KOMBU_MESSAGE_CLEANUP_PAGE_LIMIT - with Session(get_sqlalchemy_engine()) as db_session: + with get_session_with_tenant(tenant_id) as db_session: # Exit the task if we can't take the advisory lock result = db_session.execute( text("SELECT pg_try_advisory_lock(:id)"), diff --git a/backend/danswer/background/celery/tasks/pruning/tasks.py b/backend/danswer/background/celery/tasks/pruning/tasks.py index 2f840e430ae..af80e6b886c 100644 --- a/backend/danswer/background/celery/tasks/pruning/tasks.py +++ b/backend/danswer/background/celery/tasks/pruning/tasks.py @@ -1,63 +1,254 @@ +from datetime import datetime +from datetime import timedelta +from datetime import timezone +from uuid import uuid4 + +from celery import Celery from celery import shared_task -from celery.utils.log import get_task_logger +from celery import Task +from celery.exceptions import SoftTimeLimitExceeded +from redis import Redis from sqlalchemy.orm import Session -from danswer.background.celery.celery_app import celery_app +from danswer.background.celery.apps.app_base import task_logger from danswer.background.celery.celery_utils import extract_ids_from_runnable_connector -from danswer.background.celery.celery_utils import should_prune_cc_pair -from danswer.background.connector_deletion import delete_connector_credential_pair_batch -from danswer.background.task_utils import build_celery_task_wrapper -from danswer.background.task_utils import name_cc_prune_task +from danswer.background.celery.tasks.indexing.tasks import RunIndexingCallback +from danswer.configs.app_configs import ALLOW_SIMULTANEOUS_PRUNING from danswer.configs.app_configs import JOB_TIMEOUT +from danswer.configs.constants import CELERY_PRUNING_LOCK_TIMEOUT +from danswer.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT +from danswer.configs.constants import DANSWER_REDIS_FUNCTION_LOCK_PREFIX +from danswer.configs.constants import DanswerCeleryPriority +from danswer.configs.constants import DanswerCeleryQueues +from danswer.configs.constants import DanswerRedisLocks from danswer.connectors.factory import instantiate_connector from danswer.connectors.models import InputType from danswer.db.connector_credential_pair import get_connector_credential_pair +from danswer.db.connector_credential_pair import get_connector_credential_pair_from_id from danswer.db.connector_credential_pair import get_connector_credential_pairs from danswer.db.document import get_documents_for_connector_credential_pair -from danswer.db.engine import get_sqlalchemy_engine -from danswer.document_index.document_index_utils import get_both_index_names -from danswer.document_index.factory import get_default_document_index - +from danswer.db.engine import get_session_with_tenant +from danswer.db.enums import ConnectorCredentialPairStatus +from danswer.db.models import ConnectorCredentialPair +from danswer.redis.redis_connector import RedisConnector +from danswer.redis.redis_pool import get_redis_client +from danswer.utils.logger import pruning_ctx +from danswer.utils.logger import setup_logger -# use this within celery tasks to get celery task specific logging -task_logger = get_task_logger(__name__) +logger = setup_logger() @shared_task( - name="check_for_prune_task", + name="check_for_pruning", soft_time_limit=JOB_TIMEOUT, + bind=True, ) -def check_for_prune_task() -> None: - """Runs periodically to check if any prune tasks should be run and adds them - to the queue""" +def check_for_pruning(self: Task, *, tenant_id: str | None) -> None: + r = get_redis_client(tenant_id=tenant_id) - with Session(get_sqlalchemy_engine()) as db_session: - all_cc_pairs = get_connector_credential_pairs(db_session) + lock_beat = r.lock( + DanswerRedisLocks.CHECK_PRUNE_BEAT_LOCK, + timeout=CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT, + ) - for cc_pair in all_cc_pairs: - if should_prune_cc_pair( - connector=cc_pair.connector, - credential=cc_pair.credential, - db_session=db_session, - ): - task_logger.info(f"Pruning the {cc_pair.connector.name} connector") - - prune_documents_task.apply_async( - kwargs=dict( - connector_id=cc_pair.connector.id, - credential_id=cc_pair.credential.id, - ) + try: + # these tasks should never overlap + if not lock_beat.acquire(blocking=False): + return + + cc_pair_ids: list[int] = [] + with get_session_with_tenant(tenant_id) as db_session: + cc_pairs = get_connector_credential_pairs(db_session) + for cc_pair_entry in cc_pairs: + cc_pair_ids.append(cc_pair_entry.id) + + for cc_pair_id in cc_pair_ids: + lock_beat.reacquire() + with get_session_with_tenant(tenant_id) as db_session: + cc_pair = get_connector_credential_pair_from_id(cc_pair_id, db_session) + if not cc_pair: + continue + + if not is_pruning_due(cc_pair, db_session, r): + continue + + tasks_created = try_creating_prune_generator_task( + self.app, cc_pair, db_session, r, tenant_id ) + if not tasks_created: + continue + + task_logger.info(f"Pruning queued: cc_pair={cc_pair.id}") + except SoftTimeLimitExceeded: + task_logger.info( + "Soft time limit exceeded, task is being terminated gracefully." + ) + except Exception: + task_logger.exception(f"Unexpected exception: tenant={tenant_id}") + finally: + if lock_beat.owned(): + lock_beat.release() + + +def is_pruning_due( + cc_pair: ConnectorCredentialPair, + db_session: Session, + r: Redis, +) -> bool: + """Returns an int if pruning is triggered. + The int represents the number of prune tasks generated (in this case, only one + because the task is a long running generator task.) + Returns None if no pruning is triggered (due to not being needed or + other reasons such as simultaneous pruning restrictions. + + Checks for scheduling related conditions, then delegates the rest of the checks to + try_creating_prune_generator_task. + """ + + # skip pruning if no prune frequency is set + # pruning can still be forced via the API which will run a pruning task directly + if not cc_pair.connector.prune_freq: + return False + + # skip pruning if not active + if cc_pair.status != ConnectorCredentialPairStatus.ACTIVE: + return False + + # skip pruning if the next scheduled prune time hasn't been reached yet + last_pruned = cc_pair.last_pruned + if not last_pruned: + if not cc_pair.last_successful_index_time: + # if we've never indexed, we can't prune + return False + + # if never pruned, use the last time the connector indexed successfully + last_pruned = cc_pair.last_successful_index_time + + next_prune = last_pruned + timedelta(seconds=cc_pair.connector.prune_freq) + if datetime.now(timezone.utc) < next_prune: + return False + + return True + + +def try_creating_prune_generator_task( + celery_app: Celery, + cc_pair: ConnectorCredentialPair, + db_session: Session, + r: Redis, + tenant_id: str | None, +) -> int | None: + """Checks for any conditions that should block the pruning generator task from being + created, then creates the task. + + Does not check for scheduling related conditions as this function + is used to trigger prunes immediately, e.g. via the web ui. + """ + + redis_connector = RedisConnector(tenant_id, cc_pair.id) + + if not ALLOW_SIMULTANEOUS_PRUNING: + count = redis_connector.prune.get_active_task_count() + if count > 0: + return None + + LOCK_TIMEOUT = 30 + + # we need to serialize starting pruning since it can be triggered either via + # celery beat or manually (API call) + lock = r.lock( + DANSWER_REDIS_FUNCTION_LOCK_PREFIX + "try_creating_prune_generator_task", + timeout=LOCK_TIMEOUT, + ) + + acquired = lock.acquire(blocking_timeout=LOCK_TIMEOUT / 2) + if not acquired: + return None + + try: + if redis_connector.prune.fenced: # skip pruning if already pruning + return None + + if redis_connector.delete.fenced: # skip pruning if the cc_pair is deleting + return None + db_session.refresh(cc_pair) + if cc_pair.status == ConnectorCredentialPairStatus.DELETING: + return None -@build_celery_task_wrapper(name_cc_prune_task) -@celery_app.task(name="prune_documents_task", soft_time_limit=JOB_TIMEOUT) -def prune_documents_task(connector_id: int, credential_id: int) -> None: + # add a long running generator task to the queue + redis_connector.prune.generator_clear() + redis_connector.prune.taskset_clear() + + custom_task_id = f"{redis_connector.prune.generator_task_key}_{uuid4()}" + + celery_app.send_task( + "connector_pruning_generator_task", + kwargs=dict( + cc_pair_id=cc_pair.id, + connector_id=cc_pair.connector_id, + credential_id=cc_pair.credential_id, + tenant_id=tenant_id, + ), + queue=DanswerCeleryQueues.CONNECTOR_PRUNING, + task_id=custom_task_id, + priority=DanswerCeleryPriority.LOW, + ) + + # set this only after all tasks have been added + redis_connector.prune.set_fence(True) + except Exception: + task_logger.exception(f"Unexpected exception: cc_pair={cc_pair.id}") + return None + finally: + if lock.owned(): + lock.release() + + return 1 + + +@shared_task( + name="connector_pruning_generator_task", + acks_late=False, + soft_time_limit=JOB_TIMEOUT, + track_started=True, + trail=False, + bind=True, +) +def connector_pruning_generator_task( + self: Task, + cc_pair_id: int, + connector_id: int, + credential_id: int, + tenant_id: str | None, +) -> None: """connector pruning task. For a cc pair, this task pulls all document IDs from the source and compares those IDs to locally stored documents and deletes all locally stored IDs missing from the most recently pulled document ID list""" - with Session(get_sqlalchemy_engine()) as db_session: - try: + + pruning_ctx_dict = pruning_ctx.get() + pruning_ctx_dict["cc_pair_id"] = cc_pair_id + pruning_ctx_dict["request_id"] = self.request.id + pruning_ctx.set(pruning_ctx_dict) + + redis_connector = RedisConnector(tenant_id, cc_pair_id) + + r = get_redis_client(tenant_id=tenant_id) + + lock = r.lock( + DanswerRedisLocks.PRUNING_LOCK_PREFIX + f"_{redis_connector.id}", + timeout=CELERY_PRUNING_LOCK_TIMEOUT, + ) + + acquired = lock.acquire(blocking=False) + if not acquired: + task_logger.warning( + f"Pruning task already running, exiting...: cc_pair={cc_pair_id}" + ) + return None + + try: + with get_session_with_tenant(tenant_id) as db_session: cc_pair = get_connector_credential_pair( db_session=db_session, connector_id=connector_id, @@ -66,22 +257,30 @@ def prune_documents_task(connector_id: int, credential_id: int) -> None: if not cc_pair: task_logger.warning( - f"ccpair not found for {connector_id} {credential_id}" + f"cc_pair not found for {connector_id} {credential_id}" ) return runnable_connector = instantiate_connector( db_session, cc_pair.connector.source, - InputType.PRUNE, + InputType.SLIM_RETRIEVAL, cc_pair.connector.connector_specific_config, cc_pair.credential, ) + callback = RunIndexingCallback( + redis_connector.stop.fence_key, + redis_connector.prune.generator_progress_key, + lock, + r, + ) + # a list of docs in the source all_connector_doc_ids: set[str] = extract_ids_from_runnable_connector( - runnable_connector + runnable_connector, callback ) + # a list of docs in our local index all_indexed_document_ids = { doc.id for doc in get_documents_for_connector_credential_pair( @@ -91,30 +290,40 @@ def prune_documents_task(connector_id: int, credential_id: int) -> None: ) } + # generate list of docs to remove (no longer in the source) doc_ids_to_remove = list(all_indexed_document_ids - all_connector_doc_ids) - curr_ind_name, sec_ind_name = get_both_index_names(db_session) - document_index = get_default_document_index( - primary_index_name=curr_ind_name, secondary_index_name=sec_ind_name + task_logger.info( + f"Pruning set collected: " + f"cc_pair={cc_pair_id} " + f"docs_to_remove={len(doc_ids_to_remove)} " + f"doc_source={cc_pair.connector.source}" ) - if len(doc_ids_to_remove) == 0: - task_logger.info( - f"No docs to prune from {cc_pair.connector.source} connector" - ) - return - task_logger.info( - f"pruning {len(doc_ids_to_remove)} doc(s) from {cc_pair.connector.source} connector" + f"RedisConnector.prune.generate_tasks starting. cc_pair={cc_pair_id}" ) - delete_connector_credential_pair_batch( - document_ids=doc_ids_to_remove, - connector_id=connector_id, - credential_id=credential_id, - document_index=document_index, + tasks_generated = redis_connector.prune.generate_tasks( + set(doc_ids_to_remove), self.app, db_session, None ) - except Exception as e: - task_logger.exception( - f"Failed to run pruning for connector id {connector_id}." + if tasks_generated is None: + return None + + task_logger.info( + f"RedisConnector.prune.generate_tasks finished. " + f"cc_pair={cc_pair_id} tasks_generated={tasks_generated}" ) - raise e + + redis_connector.prune.generator_complete = tasks_generated + except Exception as e: + task_logger.exception( + f"Failed to run pruning: cc_pair={cc_pair_id} connector={connector_id}" + ) + + redis_connector.prune.generator_clear() + redis_connector.prune.taskset_clear() + redis_connector.prune.set_fence(False) + raise e + finally: + if lock.owned(): + lock.release() diff --git a/backend/danswer/background/celery/tasks/shared/RetryDocumentIndex.py b/backend/danswer/background/celery/tasks/shared/RetryDocumentIndex.py new file mode 100644 index 00000000000..bdaca0d811e --- /dev/null +++ b/backend/danswer/background/celery/tasks/shared/RetryDocumentIndex.py @@ -0,0 +1,40 @@ +import httpx +from tenacity import retry +from tenacity import retry_if_exception_type +from tenacity import stop_after_delay +from tenacity import wait_random_exponential + +from danswer.document_index.interfaces import DocumentIndex +from danswer.document_index.interfaces import VespaDocumentFields + + +class RetryDocumentIndex: + """A wrapper class to help with specific retries against Vespa involving + read timeouts. + + wait_random_exponential implements full jitter as per this article: + https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/""" + + MAX_WAIT = 30 + + # STOP_AFTER + MAX_WAIT should be slightly less (5?) than the celery soft_time_limit + STOP_AFTER = 70 + + def __init__(self, index: DocumentIndex): + self.index: DocumentIndex = index + + @retry( + retry=retry_if_exception_type(httpx.ReadTimeout), + wait=wait_random_exponential(multiplier=1, max=MAX_WAIT), + stop=stop_after_delay(STOP_AFTER), + ) + def delete_single(self, doc_id: str) -> int: + return self.index.delete_single(doc_id) + + @retry( + retry=retry_if_exception_type(httpx.ReadTimeout), + wait=wait_random_exponential(multiplier=1, max=MAX_WAIT), + stop=stop_after_delay(STOP_AFTER), + ) + def update_single(self, doc_id: str, fields: VespaDocumentFields) -> int: + return self.index.update_single(doc_id, fields) diff --git a/backend/danswer/background/celery/tasks/shared/tasks.py b/backend/danswer/background/celery/tasks/shared/tasks.py new file mode 100644 index 00000000000..116e7e1ff7e --- /dev/null +++ b/backend/danswer/background/celery/tasks/shared/tasks.py @@ -0,0 +1,182 @@ +from http import HTTPStatus + +import httpx +from celery import shared_task +from celery import Task +from celery.exceptions import SoftTimeLimitExceeded +from tenacity import RetryError + +from danswer.access.access import get_access_for_document +from danswer.background.celery.apps.app_base import task_logger +from danswer.background.celery.tasks.shared.RetryDocumentIndex import RetryDocumentIndex +from danswer.db.document import delete_document_by_connector_credential_pair__no_commit +from danswer.db.document import delete_documents_complete__no_commit +from danswer.db.document import get_document +from danswer.db.document import get_document_connector_count +from danswer.db.document import mark_document_as_modified +from danswer.db.document import mark_document_as_synced +from danswer.db.document_set import fetch_document_sets_for_document +from danswer.db.engine import get_session_with_tenant +from danswer.document_index.document_index_utils import get_both_index_names +from danswer.document_index.factory import get_default_document_index +from danswer.document_index.interfaces import VespaDocumentFields +from danswer.server.documents.models import ConnectorCredentialPairIdentifier + +DOCUMENT_BY_CC_PAIR_CLEANUP_MAX_RETRIES = 3 + + +# 5 seconds more than RetryDocumentIndex STOP_AFTER+MAX_WAIT +LIGHT_SOFT_TIME_LIMIT = 105 +LIGHT_TIME_LIMIT = LIGHT_SOFT_TIME_LIMIT + 15 + + +@shared_task( + name="document_by_cc_pair_cleanup_task", + soft_time_limit=LIGHT_SOFT_TIME_LIMIT, + time_limit=LIGHT_TIME_LIMIT, + max_retries=DOCUMENT_BY_CC_PAIR_CLEANUP_MAX_RETRIES, + bind=True, +) +def document_by_cc_pair_cleanup_task( + self: Task, + document_id: str, + connector_id: int, + credential_id: int, + tenant_id: str | None, +) -> bool: + """A lightweight subtask used to clean up document to cc pair relationships. + Created by connection deletion and connector pruning parent tasks.""" + + """ + To delete a connector / credential pair: + (1) find all documents associated with connector / credential pair where there + this the is only connector / credential pair that has indexed it + (2) delete all documents from document stores + (3) delete all entries from postgres + (4) find all documents associated with connector / credential pair where there + are multiple connector / credential pairs that have indexed it + (5) update document store entries to remove access associated with the + connector / credential pair from the access list + (6) delete all relevant entries from postgres + """ + task_logger.info(f"tenant={tenant_id} doc={document_id}") + + try: + with get_session_with_tenant(tenant_id) as db_session: + action = "skip" + chunks_affected = 0 + + curr_ind_name, sec_ind_name = get_both_index_names(db_session) + doc_index = get_default_document_index( + primary_index_name=curr_ind_name, secondary_index_name=sec_ind_name + ) + + retry_index = RetryDocumentIndex(doc_index) + + count = get_document_connector_count(db_session, document_id) + if count == 1: + # count == 1 means this is the only remaining cc_pair reference to the doc + # delete it from vespa and the db + action = "delete" + + chunks_affected = retry_index.delete_single(document_id) + delete_documents_complete__no_commit( + db_session=db_session, + document_ids=[document_id], + ) + elif count > 1: + action = "update" + + # count > 1 means the document still has cc_pair references + doc = get_document(document_id, db_session) + if not doc: + return False + + # the below functions do not include cc_pairs being deleted. + # i.e. they will correctly omit access for the current cc_pair + doc_access = get_access_for_document( + document_id=document_id, db_session=db_session + ) + + doc_sets = fetch_document_sets_for_document(document_id, db_session) + update_doc_sets: set[str] = set(doc_sets) + + fields = VespaDocumentFields( + document_sets=update_doc_sets, + access=doc_access, + boost=doc.boost, + hidden=doc.hidden, + ) + + # update Vespa. OK if doc doesn't exist. Raises exception otherwise. + chunks_affected = retry_index.update_single(document_id, fields=fields) + + # there are still other cc_pair references to the doc, so just resync to Vespa + delete_document_by_connector_credential_pair__no_commit( + db_session=db_session, + document_id=document_id, + connector_credential_pair_identifier=ConnectorCredentialPairIdentifier( + connector_id=connector_id, + credential_id=credential_id, + ), + ) + + mark_document_as_synced(document_id, db_session) + else: + pass + + db_session.commit() + + task_logger.info( + f"tenant={tenant_id} " + f"doc={document_id} " + f"action={action} " + f"refcount={count} " + f"chunks={chunks_affected}" + ) + except SoftTimeLimitExceeded: + task_logger.info( + f"SoftTimeLimitExceeded exception. tenant={tenant_id} doc={document_id}" + ) + return False + except Exception as ex: + if isinstance(ex, RetryError): + task_logger.info(f"Retry failed: {ex.last_attempt.attempt_number}") + + # only set the inner exception if it is of type Exception + e_temp = ex.last_attempt.exception() + if isinstance(e_temp, Exception): + e = e_temp + else: + e = ex + + if isinstance(e, httpx.HTTPStatusError): + if e.response.status_code == HTTPStatus.BAD_REQUEST: + task_logger.exception( + f"Non-retryable HTTPStatusError: " + f"tenant={tenant_id} " + f"doc={document_id} " + f"status={e.response.status_code}" + ) + return False + + task_logger.exception( + f"Unexpected exception: tenant={tenant_id} doc={document_id}" + ) + + if self.request.retries < DOCUMENT_BY_CC_PAIR_CLEANUP_MAX_RETRIES: + # Still retrying. Exponential backoff from 2^4 to 2^6 ... i.e. 16, 32, 64 + countdown = 2 ** (self.request.retries + 4) + self.retry(exc=e, countdown=countdown) + else: + # This is the last attempt! mark the document as dirty in the db so that it + # eventually gets fixed out of band via stale document reconciliation + task_logger.info( + f"Max retries reached. Marking doc as dirty for reconciliation: " + f"tenant={tenant_id} doc={document_id}" + ) + with get_session_with_tenant(tenant_id): + mark_document_as_modified(document_id, db_session) + return False + + return True diff --git a/backend/danswer/background/celery/tasks/vespa/tasks.py b/backend/danswer/background/celery/tasks/vespa/tasks.py index d11d317d0b1..b01a0eac815 100644 --- a/backend/danswer/background/celery/tasks/vespa/tasks.py +++ b/backend/danswer/background/celery/tasks/vespa/tasks.py @@ -1,24 +1,33 @@ import traceback +from datetime import datetime +from datetime import timezone +from http import HTTPStatus from typing import cast +import httpx import redis +from celery import Celery from celery import shared_task from celery import Task from celery.exceptions import SoftTimeLimitExceeded -from celery.utils.log import get_task_logger +from celery.result import AsyncResult +from celery.states import READY_STATES from redis import Redis from sqlalchemy.orm import Session +from tenacity import RetryError from danswer.access.access import get_access_for_document -from danswer.background.celery.celery_app import celery_app -from danswer.background.celery.celery_redis import RedisConnectorCredentialPair -from danswer.background.celery.celery_redis import RedisConnectorDeletion -from danswer.background.celery.celery_redis import RedisDocumentSet -from danswer.background.celery.celery_redis import RedisUserGroup +from danswer.background.celery.apps.app_base import task_logger +from danswer.background.celery.celery_redis import celery_get_queue_length +from danswer.background.celery.tasks.shared.RetryDocumentIndex import RetryDocumentIndex +from danswer.background.celery.tasks.shared.tasks import LIGHT_SOFT_TIME_LIMIT +from danswer.background.celery.tasks.shared.tasks import LIGHT_TIME_LIMIT from danswer.configs.app_configs import JOB_TIMEOUT from danswer.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT +from danswer.configs.constants import DanswerCeleryQueues from danswer.configs.constants import DanswerRedisLocks from danswer.db.connector import fetch_connector_by_id +from danswer.db.connector import mark_ccpair_as_pruned from danswer.db.connector_credential_pair import add_deletion_failure_message from danswer.db.connector_credential_pair import ( delete_connector_credential_pair__no_commit, @@ -27,6 +36,7 @@ from danswer.db.connector_credential_pair import get_connector_credential_pairs from danswer.db.document import count_documents_by_needs_sync from danswer.db.document import get_document +from danswer.db.document import get_document_ids_for_connector_credential_pair from danswer.db.document import mark_document_as_synced from danswer.db.document_set import delete_document_set from danswer.db.document_set import delete_document_set_cc_pair_relationship__no_commit @@ -34,24 +44,34 @@ from danswer.db.document_set import fetch_document_sets_for_document from danswer.db.document_set import get_document_set_by_id from danswer.db.document_set import mark_document_set_as_synced -from danswer.db.engine import get_sqlalchemy_engine +from danswer.db.engine import get_session_with_tenant +from danswer.db.enums import IndexingStatus from danswer.db.index_attempt import delete_index_attempts +from danswer.db.index_attempt import get_all_index_attempts_by_status +from danswer.db.index_attempt import get_index_attempt +from danswer.db.index_attempt import mark_attempt_failed from danswer.db.models import DocumentSet -from danswer.db.models import UserGroup +from danswer.db.models import IndexAttempt from danswer.document_index.document_index_utils import get_both_index_names from danswer.document_index.factory import get_default_document_index -from danswer.document_index.interfaces import UpdateRequest -from danswer.redis.redis_pool import RedisPool +from danswer.document_index.interfaces import VespaDocumentFields +from danswer.redis.redis_connector import RedisConnector +from danswer.redis.redis_connector_credential_pair import RedisConnectorCredentialPair +from danswer.redis.redis_connector_delete import RedisConnectorDelete +from danswer.redis.redis_connector_index import RedisConnectorIndex +from danswer.redis.redis_connector_prune import RedisConnectorPrune +from danswer.redis.redis_document_set import RedisDocumentSet +from danswer.redis.redis_pool import get_redis_client +from danswer.redis.redis_usergroup import RedisUserGroup +from danswer.utils.logger import setup_logger from danswer.utils.variable_functionality import fetch_versioned_implementation from danswer.utils.variable_functionality import ( fetch_versioned_implementation_with_fallback, ) +from danswer.utils.variable_functionality import global_version from danswer.utils.variable_functionality import noop_fallback -redis_pool = RedisPool() - -# use this within celery tasks to get celery task specific logging -task_logger = get_task_logger(__name__) +logger = setup_logger() # celery auto associates tasks created inside another task, @@ -60,12 +80,13 @@ name="check_for_vespa_sync_task", soft_time_limit=JOB_TIMEOUT, trail=False, + bind=True, ) -def check_for_vespa_sync_task() -> None: +def check_for_vespa_sync_task(self: Task, *, tenant_id: str | None) -> None: """Runs periodically to check if any document needs syncing. Generates sets of tasks for Celery if syncing is needed.""" - r = redis_pool.get_client() + r = get_redis_client(tenant_id=tenant_id) lock_beat = r.lock( DanswerRedisLocks.CHECK_VESPA_SYNC_BEAT_LOCK, @@ -77,47 +98,72 @@ def check_for_vespa_sync_task() -> None: if not lock_beat.acquire(blocking=False): return - with Session(get_sqlalchemy_engine()) as db_session: - try_generate_stale_document_sync_tasks(db_session, r, lock_beat) + with get_session_with_tenant(tenant_id) as db_session: + try_generate_stale_document_sync_tasks( + self.app, db_session, r, lock_beat, tenant_id + ) + # region document set scan + document_set_ids: list[int] = [] + with get_session_with_tenant(tenant_id) as db_session: # check if any document sets are not synced document_set_info = fetch_document_sets( user_id=None, db_session=db_session, include_outdated=True ) + for document_set, _ in document_set_info: + document_set_ids.append(document_set.id) + + for document_set_id in document_set_ids: + with get_session_with_tenant(tenant_id) as db_session: try_generate_document_set_sync_tasks( - document_set, db_session, r, lock_beat + self.app, document_set_id, db_session, r, lock_beat, tenant_id ) + # endregion - # check if any user groups are not synced + # check if any user groups are not synced + if global_version.is_ee_version(): try: fetch_user_groups = fetch_versioned_implementation( "danswer.db.user_group", "fetch_user_groups" ) - - user_groups = fetch_user_groups( - db_session=db_session, only_up_to_date=False - ) - for usergroup in user_groups: - try_generate_user_group_sync_tasks( - usergroup, db_session, r, lock_beat - ) except ModuleNotFoundError: # Always exceptions on the MIT version, which is expected + # We shouldn't actually get here if the ee version check works pass + else: + usergroup_ids: list[int] = [] + with get_session_with_tenant(tenant_id) as db_session: + user_groups = fetch_user_groups( + db_session=db_session, only_up_to_date=False + ) + + for usergroup in user_groups: + usergroup_ids.append(usergroup.id) + + for usergroup_id in usergroup_ids: + with get_session_with_tenant(tenant_id) as db_session: + try_generate_user_group_sync_tasks( + self.app, usergroup_id, db_session, r, lock_beat, tenant_id + ) + except SoftTimeLimitExceeded: task_logger.info( "Soft time limit exceeded, task is being terminated gracefully." ) except Exception: - task_logger.exception("Unexpected exception") + task_logger.exception(f"Unexpected exception: tenant={tenant_id}") finally: if lock_beat.owned(): lock_beat.release() def try_generate_stale_document_sync_tasks( - db_session: Session, r: Redis, lock_beat: redis.lock.Lock + celery_app: Celery, + db_session: Session, + r: Redis, + lock_beat: redis.lock.Lock, + tenant_id: str | None, ) -> int | None: # the fence is up, do nothing if r.exists(RedisConnectorCredentialPair.get_fence_key()): @@ -141,8 +187,10 @@ def try_generate_stale_document_sync_tasks( total_tasks_generated = 0 cc_pairs = get_connector_credential_pairs(db_session) for cc_pair in cc_pairs: - rc = RedisConnectorCredentialPair(cc_pair.id) - tasks_generated = rc.generate_tasks(celery_app, db_session, r, lock_beat) + rc = RedisConnectorCredentialPair(tenant_id, cc_pair.id) + tasks_generated = rc.generate_tasks( + celery_app, db_session, r, lock_beat, tenant_id + ) if tasks_generated is None: continue @@ -166,19 +214,27 @@ def try_generate_stale_document_sync_tasks( def try_generate_document_set_sync_tasks( - document_set: DocumentSet, db_session: Session, r: Redis, lock_beat: redis.lock.Lock + celery_app: Celery, + document_set_id: int, + db_session: Session, + r: Redis, + lock_beat: redis.lock.Lock, + tenant_id: str | None, ) -> int | None: lock_beat.reacquire() - rds = RedisDocumentSet(document_set.id) + rds = RedisDocumentSet(tenant_id, document_set_id) # don't generate document set sync tasks if tasks are still pending - if r.exists(rds.fence_key): + if rds.fenced: return None # don't generate sync tasks if we're up to date # race condition with the monitor/cleanup function if we use a cached result! - db_session.refresh(document_set) + document_set = get_document_set_by_id(db_session, document_set_id) + if not document_set: + return None + if document_set.is_up_to_date: return None @@ -190,7 +246,9 @@ def try_generate_document_set_sync_tasks( ) # Add all documents that need to be updated into the queue - tasks_generated = rds.generate_tasks(celery_app, db_session, r, lock_beat) + tasks_generated = rds.generate_tasks( + celery_app, db_session, r, lock_beat, tenant_id + ) if tasks_generated is None: return None @@ -206,23 +264,34 @@ def try_generate_document_set_sync_tasks( ) # set this only after all tasks have been added - r.set(rds.fence_key, tasks_generated) + rds.set_fence(tasks_generated) return tasks_generated def try_generate_user_group_sync_tasks( - usergroup: UserGroup, db_session: Session, r: Redis, lock_beat: redis.lock.Lock + celery_app: Celery, + usergroup_id: int, + db_session: Session, + r: Redis, + lock_beat: redis.lock.Lock, + tenant_id: str | None, ) -> int | None: lock_beat.reacquire() - rug = RedisUserGroup(usergroup.id) - - # don't generate sync tasks if tasks are still pending - if r.exists(rug.fence_key): + rug = RedisUserGroup(tenant_id, usergroup_id) + if rug.fenced: + # don't generate sync tasks if tasks are still pending return None # race condition with the monitor/cleanup function if we use a cached result! - db_session.refresh(usergroup) + fetch_user_group = fetch_versioned_implementation( + "danswer.db.user_group", "fetch_user_group" + ) + + usergroup = fetch_user_group(db_session, usergroup_id) + if not usergroup: + return None + if usergroup.is_up_to_date: return None @@ -233,7 +302,9 @@ def try_generate_user_group_sync_tasks( task_logger.info( f"RedisUserGroup.generate_tasks starting. usergroup_id={usergroup.id}" ) - tasks_generated = rug.generate_tasks(celery_app, db_session, r, lock_beat) + tasks_generated = rug.generate_tasks( + celery_app, db_session, r, lock_beat, tenant_id + ) if tasks_generated is None: return None @@ -249,7 +320,7 @@ def try_generate_user_group_sync_tasks( ) # set this only after all tasks have been added - r.set(rug.fence_key, tasks_generated) + rug.set_fence(tasks_generated) return tasks_generated @@ -275,29 +346,28 @@ def monitor_connector_taskset(r: Redis) -> None: def monitor_document_set_taskset( - key_bytes: bytes, r: Redis, db_session: Session + tenant_id: str | None, key_bytes: bytes, r: Redis, db_session: Session ) -> None: fence_key = key_bytes.decode("utf-8") - document_set_id = RedisDocumentSet.get_id_from_fence_key(fence_key) - if document_set_id is None: - task_logger.warning("could not parse document set id from {key}") + document_set_id_str = RedisDocumentSet.get_id_from_fence_key(fence_key) + if document_set_id_str is None: + task_logger.warning(f"could not parse document set id from {fence_key}") return - rds = RedisDocumentSet(document_set_id) + document_set_id = int(document_set_id_str) - fence_value = r.get(rds.fence_key) - if fence_value is None: + rds = RedisDocumentSet(tenant_id, document_set_id) + if not rds.fenced: return - try: - initial_count = int(cast(int, fence_value)) - except ValueError: - task_logger.error("The value is not an integer.") + initial_count = rds.payload + if initial_count is None: return count = cast(int, r.scard(rds.taskset_key)) task_logger.info( - f"Document set sync progress: document_set_id={document_set_id} remaining={count} initial={initial_count}" + f"Document set sync progress: document_set={document_set_id} " + f"remaining={count} initial={initial_count}" ) if count > 0: return @@ -311,55 +381,73 @@ def monitor_document_set_taskset( # if there are no connectors, then delete the document set. delete_document_set(document_set_row=document_set, db_session=db_session) task_logger.info( - f"Successfully deleted document set with ID: '{document_set_id}'!" + f"Successfully deleted document set: document_set={document_set_id}" ) else: mark_document_set_as_synced(document_set_id, db_session) task_logger.info( - f"Successfully synced document set with ID: '{document_set_id}'!" + f"Successfully synced document set: document_set={document_set_id}" ) - r.delete(rds.taskset_key) - r.delete(rds.fence_key) + rds.reset() -def monitor_connector_deletion_taskset(key_bytes: bytes, r: Redis) -> None: +def monitor_connector_deletion_taskset( + tenant_id: str | None, key_bytes: bytes, r: Redis +) -> None: fence_key = key_bytes.decode("utf-8") - cc_pair_id = RedisConnectorDeletion.get_id_from_fence_key(fence_key) - if cc_pair_id is None: - task_logger.warning("could not parse document set id from {key}") + cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key) + if cc_pair_id_str is None: + task_logger.warning(f"could not parse cc_pair_id from {fence_key}") return - rcd = RedisConnectorDeletion(cc_pair_id) + cc_pair_id = int(cc_pair_id_str) - fence_value = r.get(rcd.fence_key) - if fence_value is None: + redis_connector = RedisConnector(tenant_id, cc_pair_id) + + fence_data = redis_connector.delete.payload + if not fence_data: + task_logger.warning( + f"Connector deletion - fence payload invalid: cc_pair={cc_pair_id}" + ) return - try: - initial_count = int(cast(int, fence_value)) - except ValueError: - task_logger.error("The value is not an integer.") + if fence_data.num_tasks is None: + # the fence is setting up but isn't ready yet return - count = cast(int, r.scard(rcd.taskset_key)) + remaining = redis_connector.delete.get_remaining() task_logger.info( - f"Connector deletion progress: cc_pair_id={cc_pair_id} remaining={count} initial={initial_count}" + f"Connector deletion progress: cc_pair={cc_pair_id} remaining={remaining} initial={fence_data.num_tasks}" ) - if count > 0: + if remaining > 0: return - with Session(get_sqlalchemy_engine()) as db_session: + with get_session_with_tenant(tenant_id) as db_session: cc_pair = get_connector_credential_pair_from_id(cc_pair_id, db_session) if not cc_pair: + task_logger.warning( + f"Connector deletion - cc_pair not found: cc_pair={cc_pair_id}" + ) return try: + doc_ids = get_document_ids_for_connector_credential_pair( + db_session, cc_pair.connector_id, cc_pair.credential_id + ) + if len(doc_ids) > 0: + # if this happens, documents somehow got added while deletion was in progress. Likely a bug + # gating off pruning and indexing work before deletion starts + task_logger.warning( + f"Connector deletion - documents still found after taskset completion: " + f"cc_pair={cc_pair_id} num={len(doc_ids)}" + ) + # clean up the rest of the related Postgres entities # index attempts delete_index_attempts( db_session=db_session, - cc_pair_id=cc_pair.id, + cc_pair_id=cc_pair_id, ) # document sets @@ -376,7 +464,7 @@ def monitor_connector_deletion_taskset(key_bytes: bytes, r: Redis) -> None: noop_fallback, ) cleanup_user_groups( - cc_pair_id=cc_pair.id, + cc_pair_id=cc_pair_id, db_session=db_session, ) @@ -393,42 +481,169 @@ def monitor_connector_deletion_taskset(key_bytes: bytes, r: Redis) -> None: ) if not connector or not len(connector.credentials): task_logger.info( - "Found no credentials left for connector, deleting connector" + "Connector deletion - Found no credentials left for connector, deleting connector" ) db_session.delete(connector) db_session.commit() except Exception as e: + db_session.rollback() stack_trace = traceback.format_exc() error_message = f"Error: {str(e)}\n\nStack Trace:\n{stack_trace}" - add_deletion_failure_message(db_session, cc_pair.id, error_message) + add_deletion_failure_message(db_session, cc_pair_id, error_message) task_logger.exception( - f"Failed to run connector_deletion. " - f"connector_id={cc_pair.connector_id} credential_id={cc_pair.credential_id}" + f"Connector deletion exceptioned: " + f"cc_pair={cc_pair_id} connector={cc_pair.connector_id} credential={cc_pair.credential_id}" ) raise e task_logger.info( - f"Successfully deleted connector_credential_pair with connector_id: '{cc_pair.connector_id}' " - f"and credential_id: '{cc_pair.credential_id}'. " - f"Deleted {initial_count} docs." + f"Connector deletion succeeded: " + f"cc_pair={cc_pair_id} " + f"connector={cc_pair.connector_id} " + f"credential={cc_pair.credential_id} " + f"docs_deleted={fence_data.num_tasks}" + ) + + redis_connector.delete.taskset_clear() + redis_connector.delete.set_fence(None) + + +def monitor_ccpair_pruning_taskset( + tenant_id: str | None, key_bytes: bytes, r: Redis, db_session: Session +) -> None: + fence_key = key_bytes.decode("utf-8") + cc_pair_id_str = RedisConnector.get_id_from_fence_key(fence_key) + if cc_pair_id_str is None: + task_logger.warning( + f"monitor_ccpair_pruning_taskset: could not parse cc_pair_id from {fence_key}" + ) + return + + cc_pair_id = int(cc_pair_id_str) + + redis_connector = RedisConnector(tenant_id, cc_pair_id) + if not redis_connector.prune.fenced: + return + + initial = redis_connector.prune.generator_complete + if initial is None: + return + + remaining = redis_connector.prune.get_remaining() + task_logger.info( + f"Connector pruning progress: cc_pair={cc_pair_id} remaining={remaining} initial={initial}" + ) + if remaining > 0: + return + + mark_ccpair_as_pruned(int(cc_pair_id), db_session) + task_logger.info( + f"Successfully pruned connector credential pair. cc_pair={cc_pair_id}" ) - r.delete(rcd.taskset_key) - r.delete(rcd.fence_key) + redis_connector.prune.taskset_clear() + redis_connector.prune.generator_clear() + redis_connector.prune.set_fence(False) -@shared_task(name="monitor_vespa_sync", soft_time_limit=300) -def monitor_vespa_sync() -> None: +def monitor_ccpair_indexing_taskset( + tenant_id: str | None, key_bytes: bytes, r: Redis, db_session: Session +) -> None: + # if the fence doesn't exist, there's nothing to do + fence_key = key_bytes.decode("utf-8") + composite_id = RedisConnector.get_id_from_fence_key(fence_key) + if composite_id is None: + task_logger.warning( + f"monitor_ccpair_indexing_taskset: could not parse composite_id from {fence_key}" + ) + return + + # parse out metadata and initialize the helper class with it + parts = composite_id.split("/") + if len(parts) != 2: + return + + cc_pair_id = int(parts[0]) + search_settings_id = int(parts[1]) + + redis_connector = RedisConnector(tenant_id, cc_pair_id) + redis_connector_index = redis_connector.new_index(search_settings_id) + if not redis_connector_index.fenced: + return + + payload = redis_connector_index.payload + if not payload: + return + + elapsed_submitted = datetime.now(timezone.utc) - payload.submitted + + progress = redis_connector_index.get_progress() + if progress is not None: + task_logger.info( + f"Connector indexing progress: cc_pair_id={cc_pair_id} " + f"search_settings_id={search_settings_id} " + f"progress={progress} " + f"elapsed_submitted={elapsed_submitted.total_seconds():.2f}" + ) + + if payload.index_attempt_id is None or payload.celery_task_id is None: + # the task is still setting up + return + + # Read result state BEFORE generator_complete_key to avoid a race condition + # never use any blocking methods on the result from inside a task! + result: AsyncResult = AsyncResult(payload.celery_task_id) + result_state = result.state + + status_int = redis_connector_index.get_completion() + if status_int is None: + if result_state in READY_STATES: + # IF the task state is READY, THEN generator_complete should be set + # if it isn't, then the worker crashed + task_logger.info( + f"Connector indexing aborted: " + f"cc_pair_id={cc_pair_id} " + f"search_settings_id={search_settings_id} " + f"elapsed_submitted={elapsed_submitted.total_seconds():.2f}" + ) + + index_attempt = get_index_attempt(db_session, payload.index_attempt_id) + if index_attempt: + mark_attempt_failed( + index_attempt_id=payload.index_attempt_id, + db_session=db_session, + failure_reason="Connector indexing aborted or exceptioned.", + ) + + redis_connector_index.reset() + return + + status_enum = HTTPStatus(status_int) + + task_logger.info( + f"Connector indexing finished: cc_pair_id={cc_pair_id} " + f"search_settings_id={search_settings_id} " + f"status={status_enum.name} " + f"elapsed_submitted={elapsed_submitted.total_seconds():.2f}" + ) + + redis_connector_index.reset() + + +@shared_task(name="monitor_vespa_sync", soft_time_limit=300, bind=True) +def monitor_vespa_sync(self: Task, tenant_id: str | None) -> bool: """This is a celery beat task that monitors and finalizes metadata sync tasksets. It scans for fence values and then gets the counts of any associated tasksets. If the count is 0, that means all tasks finished and we should clean up. This task lock timeout is CELERY_METADATA_SYNC_BEAT_LOCK_TIMEOUT seconds, so don't do anything too expensive in this function! + + Returns True if the task actually did work, False if it exited early to prevent overlap """ - r = redis_pool.get_client() + r = get_redis_client(tenant_id=tenant_id) - lock_beat = r.lock( + lock_beat: redis.lock.Lock = r.lock( DanswerRedisLocks.MONITOR_VESPA_SYNC_BEAT_LOCK, timeout=CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT, ) @@ -436,27 +651,95 @@ def monitor_vespa_sync() -> None: try: # prevent overlapping tasks if not lock_beat.acquire(blocking=False): - return + return False + # print current queue lengths + r_celery = self.app.broker_connection().channel().client # type: ignore + n_celery = celery_get_queue_length("celery", r) + n_indexing = celery_get_queue_length( + DanswerCeleryQueues.CONNECTOR_INDEXING, r_celery + ) + n_sync = celery_get_queue_length( + DanswerCeleryQueues.VESPA_METADATA_SYNC, r_celery + ) + n_deletion = celery_get_queue_length( + DanswerCeleryQueues.CONNECTOR_DELETION, r_celery + ) + n_pruning = celery_get_queue_length( + DanswerCeleryQueues.CONNECTOR_PRUNING, r_celery + ) + + task_logger.info( + f"Queue lengths: celery={n_celery} " + f"indexing={n_indexing} " + f"sync={n_sync} " + f"deletion={n_deletion} " + f"pruning={n_pruning}" + ) + + # do some cleanup before clearing fences + # check the db for any outstanding index attempts + with get_session_with_tenant(tenant_id) as db_session: + attempts: list[IndexAttempt] = [] + attempts.extend( + get_all_index_attempts_by_status(IndexingStatus.NOT_STARTED, db_session) + ) + attempts.extend( + get_all_index_attempts_by_status(IndexingStatus.IN_PROGRESS, db_session) + ) + + for a in attempts: + # if attempts exist in the db but we don't detect them in redis, mark them as failed + fence_key = RedisConnectorIndex.fence_key_with_ids( + a.connector_credential_pair_id, a.search_settings_id + ) + if not r.exists(fence_key): + failure_reason = ( + f"Unknown index attempt. Might be left over from a process restart: " + f"index_attempt={a.id} " + f"cc_pair={a.connector_credential_pair_id} " + f"search_settings={a.search_settings_id}" + ) + task_logger.warning(failure_reason) + mark_attempt_failed(a.id, db_session, failure_reason=failure_reason) + + lock_beat.reacquire() if r.exists(RedisConnectorCredentialPair.get_fence_key()): monitor_connector_taskset(r) - for key_bytes in r.scan_iter(RedisConnectorDeletion.FENCE_PREFIX + "*"): - monitor_connector_deletion_taskset(key_bytes, r) + lock_beat.reacquire() + for key_bytes in r.scan_iter(RedisConnectorDelete.FENCE_PREFIX + "*"): + lock_beat.reacquire() + monitor_connector_deletion_taskset(tenant_id, key_bytes, r) + + lock_beat.reacquire() + for key_bytes in r.scan_iter(RedisDocumentSet.FENCE_PREFIX + "*"): + lock_beat.reacquire() + with get_session_with_tenant(tenant_id) as db_session: + monitor_document_set_taskset(tenant_id, key_bytes, r, db_session) + + lock_beat.reacquire() + for key_bytes in r.scan_iter(RedisUserGroup.FENCE_PREFIX + "*"): + lock_beat.reacquire() + monitor_usergroup_taskset = fetch_versioned_implementation_with_fallback( + "danswer.background.celery.tasks.vespa.tasks", + "monitor_usergroup_taskset", + noop_fallback, + ) + with get_session_with_tenant(tenant_id) as db_session: + monitor_usergroup_taskset(tenant_id, key_bytes, r, db_session) - with Session(get_sqlalchemy_engine()) as db_session: - for key_bytes in r.scan_iter(RedisDocumentSet.FENCE_PREFIX + "*"): - monitor_document_set_taskset(key_bytes, r, db_session) + lock_beat.reacquire() + for key_bytes in r.scan_iter(RedisConnectorPrune.FENCE_PREFIX + "*"): + lock_beat.reacquire() + with get_session_with_tenant(tenant_id) as db_session: + monitor_ccpair_pruning_taskset(tenant_id, key_bytes, r, db_session) - for key_bytes in r.scan_iter(RedisUserGroup.FENCE_PREFIX + "*"): - monitor_usergroup_taskset = ( - fetch_versioned_implementation_with_fallback( - "danswer.background.celery.tasks.vespa.tasks", - "monitor_usergroup_taskset", - noop_fallback, - ) - ) - monitor_usergroup_taskset(key_bytes, r, db_session) + lock_beat.reacquire() + for key_bytes in r.scan_iter(RedisConnectorIndex.FENCE_PREFIX + "*"): + lock_beat.reacquire() + with get_session_with_tenant(tenant_id) as db_session: + monitor_ccpair_indexing_taskset(tenant_id, key_bytes, r, db_session) # uncomment for debugging if needed # r_celery = celery_app.broker_connection().channel().client @@ -470,24 +753,28 @@ def monitor_vespa_sync() -> None: if lock_beat.owned(): lock_beat.release() + return True + @shared_task( name="vespa_metadata_sync_task", bind=True, - soft_time_limit=45, - time_limit=60, + soft_time_limit=LIGHT_SOFT_TIME_LIMIT, + time_limit=LIGHT_TIME_LIMIT, max_retries=3, ) -def vespa_metadata_sync_task(self: Task, document_id: str) -> bool: - task_logger.info(f"document_id={document_id}") - +def vespa_metadata_sync_task( + self: Task, document_id: str, tenant_id: str | None +) -> bool: try: - with Session(get_sqlalchemy_engine()) as db_session: + with get_session_with_tenant(tenant_id) as db_session: curr_ind_name, sec_ind_name = get_both_index_names(db_session) - document_index = get_default_document_index( + doc_index = get_default_document_index( primary_index_name=curr_ind_name, secondary_index_name=sec_ind_name ) + retry_index = RetryDocumentIndex(doc_index) + doc = get_document(document_id, db_session) if not doc: return False @@ -500,24 +787,52 @@ def vespa_metadata_sync_task(self: Task, document_id: str) -> bool: doc_access = get_access_for_document( document_id=document_id, db_session=db_session ) - update_request = UpdateRequest( - document_ids=[document_id], + + fields = VespaDocumentFields( document_sets=update_doc_sets, access=doc_access, boost=doc.boost, hidden=doc.hidden, ) - # update Vespa - document_index.update(update_requests=[update_request]) + # update Vespa. OK if doc doesn't exist. Raises exception otherwise. + chunks_affected = retry_index.update_single(document_id, fields) # update db last. Worst case = we crash right before this and # the sync might repeat again later mark_document_as_synced(document_id, db_session) + + task_logger.info( + f"tenant={tenant_id} doc={document_id} action=sync chunks={chunks_affected}" + ) except SoftTimeLimitExceeded: - task_logger.info(f"SoftTimeLimitExceeded exception. doc_id={document_id}") - except Exception as e: - task_logger.exception("Unexpected exception") + task_logger.info( + f"SoftTimeLimitExceeded exception. tenant={tenant_id} doc={document_id}" + ) + except Exception as ex: + if isinstance(ex, RetryError): + task_logger.warning(f"Retry failed: {ex.last_attempt.attempt_number}") + + # only set the inner exception if it is of type Exception + e_temp = ex.last_attempt.exception() + if isinstance(e_temp, Exception): + e = e_temp + else: + e = ex + + if isinstance(e, httpx.HTTPStatusError): + if e.response.status_code == HTTPStatus.BAD_REQUEST: + task_logger.exception( + f"Non-retryable HTTPStatusError: " + f"tenant={tenant_id} " + f"doc={document_id} " + f"status={e.response.status_code}" + ) + return False + + task_logger.exception( + f"Unexpected exception: tenant={tenant_id} doc={document_id}" + ) # Exponential backoff from 2^4 to 2^6 ... i.e. 16, 32, 64 countdown = 2 ** (self.request.retries + 4) diff --git a/backend/danswer/background/celery/versioned_apps/beat.py b/backend/danswer/background/celery/versioned_apps/beat.py new file mode 100644 index 00000000000..af407f93c64 --- /dev/null +++ b/backend/danswer/background/celery/versioned_apps/beat.py @@ -0,0 +1,6 @@ +"""Factory stub for running celery worker / celery beat.""" +from danswer.background.celery.apps.beat import celery_app +from danswer.utils.variable_functionality import set_is_ee_based_on_env_variable + +set_is_ee_based_on_env_variable() +app = celery_app diff --git a/backend/danswer/background/celery/versioned_apps/heavy.py b/backend/danswer/background/celery/versioned_apps/heavy.py new file mode 100644 index 00000000000..c2b58a53bfc --- /dev/null +++ b/backend/danswer/background/celery/versioned_apps/heavy.py @@ -0,0 +1,17 @@ +"""Factory stub for running celery worker / celery beat. +This code is different from the primary/beat stubs because there is no EE version to +fetch. Port over the code in those files if we add an EE version of this worker.""" +from celery import Celery + +from danswer.utils.variable_functionality import set_is_ee_based_on_env_variable + +set_is_ee_based_on_env_variable() + + +def get_app() -> Celery: + from danswer.background.celery.apps.heavy import celery_app + + return celery_app + + +app = get_app() diff --git a/backend/danswer/background/celery/versioned_apps/indexing.py b/backend/danswer/background/celery/versioned_apps/indexing.py new file mode 100644 index 00000000000..ed26fc548bc --- /dev/null +++ b/backend/danswer/background/celery/versioned_apps/indexing.py @@ -0,0 +1,17 @@ +"""Factory stub for running celery worker / celery beat. +This code is different from the primary/beat stubs because there is no EE version to +fetch. Port over the code in those files if we add an EE version of this worker.""" +from celery import Celery + +from danswer.utils.variable_functionality import set_is_ee_based_on_env_variable + +set_is_ee_based_on_env_variable() + + +def get_app() -> Celery: + from danswer.background.celery.apps.indexing import celery_app + + return celery_app + + +app = get_app() diff --git a/backend/danswer/background/celery/versioned_apps/light.py b/backend/danswer/background/celery/versioned_apps/light.py new file mode 100644 index 00000000000..3d229431ce5 --- /dev/null +++ b/backend/danswer/background/celery/versioned_apps/light.py @@ -0,0 +1,17 @@ +"""Factory stub for running celery worker / celery beat. +This code is different from the primary/beat stubs because there is no EE version to +fetch. Port over the code in those files if we add an EE version of this worker.""" +from celery import Celery + +from danswer.utils.variable_functionality import set_is_ee_based_on_env_variable + +set_is_ee_based_on_env_variable() + + +def get_app() -> Celery: + from danswer.background.celery.apps.light import celery_app + + return celery_app + + +app = get_app() diff --git a/backend/danswer/background/celery/celery_run.py b/backend/danswer/background/celery/versioned_apps/primary.py similarity index 55% rename from backend/danswer/background/celery/celery_run.py rename to backend/danswer/background/celery/versioned_apps/primary.py index 0fdb2f044a8..2d97caa3da5 100644 --- a/backend/danswer/background/celery/celery_run.py +++ b/backend/danswer/background/celery/versioned_apps/primary.py @@ -1,9 +1,8 @@ -"""Entry point for running celery worker / celery beat.""" +"""Factory stub for running celery worker / celery beat.""" from danswer.utils.variable_functionality import fetch_versioned_implementation from danswer.utils.variable_functionality import set_is_ee_based_on_env_variable - set_is_ee_based_on_env_variable() -celery_app = fetch_versioned_implementation( - "danswer.background.celery.celery_app", "celery_app" +app = fetch_versioned_implementation( + "danswer.background.celery.apps.primary", "celery_app" ) diff --git a/backend/danswer/background/connector_deletion.py b/backend/danswer/background/connector_deletion.py deleted file mode 100644 index 84b696dd8e4..00000000000 --- a/backend/danswer/background/connector_deletion.py +++ /dev/null @@ -1,211 +0,0 @@ -""" -To delete a connector / credential pair: -(1) find all documents associated with connector / credential pair where there -this the is only connector / credential pair that has indexed it -(2) delete all documents from document stores -(3) delete all entries from postgres -(4) find all documents associated with connector / credential pair where there -are multiple connector / credential pairs that have indexed it -(5) update document store entries to remove access associated with the -connector / credential pair from the access list -(6) delete all relevant entries from postgres -""" -from celery import shared_task -from celery import Task -from celery.exceptions import SoftTimeLimitExceeded -from celery.utils.log import get_task_logger -from sqlalchemy.orm import Session - -from danswer.access.access import get_access_for_document -from danswer.access.access import get_access_for_documents -from danswer.db.document import delete_document_by_connector_credential_pair__no_commit -from danswer.db.document import delete_documents_by_connector_credential_pair__no_commit -from danswer.db.document import delete_documents_complete__no_commit -from danswer.db.document import get_document -from danswer.db.document import get_document_connector_count -from danswer.db.document import get_document_connector_counts -from danswer.db.document import mark_document_as_synced -from danswer.db.document import prepare_to_modify_documents -from danswer.db.document_set import fetch_document_sets_for_document -from danswer.db.document_set import fetch_document_sets_for_documents -from danswer.db.engine import get_sqlalchemy_engine -from danswer.document_index.document_index_utils import get_both_index_names -from danswer.document_index.factory import get_default_document_index -from danswer.document_index.interfaces import DocumentIndex -from danswer.document_index.interfaces import UpdateRequest -from danswer.server.documents.models import ConnectorCredentialPairIdentifier -from danswer.utils.logger import setup_logger - -logger = setup_logger() - -# use this within celery tasks to get celery task specific logging -task_logger = get_task_logger(__name__) - -_DELETION_BATCH_SIZE = 1000 - - -def delete_connector_credential_pair_batch( - document_ids: list[str], - connector_id: int, - credential_id: int, - document_index: DocumentIndex, -) -> None: - """ - Removes a batch of documents ids from a cc-pair. If no other cc-pair uses a document anymore - it gets permanently deleted. - """ - with Session(get_sqlalchemy_engine()) as db_session: - # acquire lock for all documents in this batch so that indexing can't - # override the deletion - with prepare_to_modify_documents( - db_session=db_session, document_ids=document_ids - ): - document_connector_counts = get_document_connector_counts( - db_session=db_session, document_ids=document_ids - ) - - # figure out which docs need to be completely deleted - document_ids_to_delete = [ - document_id - for document_id, cnt in document_connector_counts - if cnt == 1 - ] - logger.debug(f"Deleting documents: {document_ids_to_delete}") - - document_index.delete(doc_ids=document_ids_to_delete) - - delete_documents_complete__no_commit( - db_session=db_session, - document_ids=document_ids_to_delete, - ) - - # figure out which docs need to be updated - document_ids_to_update = [ - document_id for document_id, cnt in document_connector_counts if cnt > 1 - ] - - # maps document id to list of document set names - new_doc_sets_for_documents: dict[str, set[str]] = { - document_id_and_document_set_names_tuple[0]: set( - document_id_and_document_set_names_tuple[1] - ) - for document_id_and_document_set_names_tuple in fetch_document_sets_for_documents( - db_session=db_session, - document_ids=document_ids_to_update, - ) - } - - # determine future ACLs for documents in batch - access_for_documents = get_access_for_documents( - document_ids=document_ids_to_update, - db_session=db_session, - ) - - # update Vespa - logger.debug(f"Updating documents: {document_ids_to_update}") - update_requests = [ - UpdateRequest( - document_ids=[document_id], - access=access, - document_sets=new_doc_sets_for_documents[document_id], - ) - for document_id, access in access_for_documents.items() - ] - document_index.update(update_requests=update_requests) - - # clean up Postgres - delete_documents_by_connector_credential_pair__no_commit( - db_session=db_session, - document_ids=document_ids_to_update, - connector_credential_pair_identifier=ConnectorCredentialPairIdentifier( - connector_id=connector_id, - credential_id=credential_id, - ), - ) - db_session.commit() - - -@shared_task( - name="document_by_cc_pair_cleanup_task", - bind=True, - soft_time_limit=45, - time_limit=60, - max_retries=3, -) -def document_by_cc_pair_cleanup_task( - self: Task, document_id: str, connector_id: int, credential_id: int -) -> bool: - task_logger.info(f"document_id={document_id}") - - try: - with Session(get_sqlalchemy_engine()) as db_session: - curr_ind_name, sec_ind_name = get_both_index_names(db_session) - document_index = get_default_document_index( - primary_index_name=curr_ind_name, secondary_index_name=sec_ind_name - ) - - count = get_document_connector_count(db_session, document_id) - if count == 1: - # count == 1 means this is the only remaining cc_pair reference to the doc - # delete it from vespa and the db - document_index.delete_single(doc_id=document_id) - delete_documents_complete__no_commit( - db_session=db_session, - document_ids=[document_id], - ) - elif count > 1: - # count > 1 means the document still has cc_pair references - doc = get_document(document_id, db_session) - if not doc: - return False - - # the below functions do not include cc_pairs being deleted. - # i.e. they will correctly omit access for the current cc_pair - doc_access = get_access_for_document( - document_id=document_id, db_session=db_session - ) - - doc_sets = fetch_document_sets_for_document(document_id, db_session) - update_doc_sets: set[str] = set(doc_sets) - - update_request = UpdateRequest( - document_ids=[document_id], - document_sets=update_doc_sets, - access=doc_access, - boost=doc.boost, - hidden=doc.hidden, - ) - - # update Vespa. OK if doc doesn't exist. Raises exception otherwise. - document_index.update_single(update_request=update_request) - - # there are still other cc_pair references to the doc, so just resync to Vespa - delete_document_by_connector_credential_pair__no_commit( - db_session=db_session, - document_id=document_id, - connector_credential_pair_identifier=ConnectorCredentialPairIdentifier( - connector_id=connector_id, - credential_id=credential_id, - ), - ) - - mark_document_as_synced(document_id, db_session) - else: - pass - - # update_docs_last_modified__no_commit( - # db_session=db_session, - # document_ids=[document_id], - # ) - - db_session.commit() - except SoftTimeLimitExceeded: - task_logger.info(f"SoftTimeLimitExceeded exception. doc_id={document_id}") - except Exception as e: - task_logger.exception("Unexpected exception") - - # Exponential backoff from 2^4 to 2^6 ... i.e. 16, 32, 64 - countdown = 2 ** (self.request.retries + 4) - self.retry(exc=e, countdown=countdown) - - return True diff --git a/backend/danswer/background/indexing/job_client.py b/backend/danswer/background/indexing/job_client.py index 68d706895fd..6808a52c5ca 100644 --- a/backend/danswer/background/indexing/job_client.py +++ b/backend/danswer/background/indexing/job_client.py @@ -11,7 +11,8 @@ from typing import Literal from typing import Optional -from danswer.db.engine import get_sqlalchemy_engine +from danswer.configs.constants import POSTGRES_CELERY_WORKER_INDEXING_CHILD_APP_NAME +from danswer.db.engine import SqlEngine from danswer.utils.logger import setup_logger logger = setup_logger() @@ -37,7 +38,9 @@ def _initializer( if kwargs is None: kwargs = {} - get_sqlalchemy_engine().dispose(close=False) + logger.info("Initializing spawned worker child process.") + SqlEngine.set_app_name(POSTGRES_CELERY_WORKER_INDEXING_CHILD_APP_NAME) + SqlEngine.init_engine(pool_size=4, max_overflow=12, pool_recycle=60) return func(*args, **kwargs) diff --git a/backend/danswer/background/indexing/run_indexing.py b/backend/danswer/background/indexing/run_indexing.py index 499899ac225..35cb080b903 100644 --- a/backend/danswer/background/indexing/run_indexing.py +++ b/backend/danswer/background/indexing/run_indexing.py @@ -1,5 +1,7 @@ import time import traceback +from abc import ABC +from abc import abstractmethod from datetime import datetime from datetime import timedelta from datetime import timezone @@ -14,15 +16,15 @@ from danswer.connectors.connector_runner import ConnectorRunner from danswer.connectors.factory import instantiate_connector from danswer.connectors.models import IndexAttemptMetadata +from danswer.db.connector_credential_pair import get_connector_credential_pair_from_id from danswer.db.connector_credential_pair import get_last_successful_attempt_time from danswer.db.connector_credential_pair import update_connector_credential_pair -from danswer.db.engine import get_sqlalchemy_engine +from danswer.db.engine import get_session_with_tenant from danswer.db.enums import ConnectorCredentialPairStatus -from danswer.db.index_attempt import get_index_attempt from danswer.db.index_attempt import mark_attempt_failed -from danswer.db.index_attempt import mark_attempt_in_progress from danswer.db.index_attempt import mark_attempt_partially_succeeded from danswer.db.index_attempt import mark_attempt_succeeded +from danswer.db.index_attempt import transition_attempt_to_in_progress from danswer.db.index_attempt import update_docs_indexed from danswer.db.models import IndexAttempt from danswer.db.models import IndexingStatus @@ -40,16 +42,30 @@ INDEXING_TRACER_NUM_PRINT_ENTRIES = 5 +class RunIndexingCallbackInterface(ABC): + """Defines a callback interface to be passed to + to run_indexing_entrypoint.""" + + @abstractmethod + def should_stop(self) -> bool: + """Signal to stop the looping function in flight.""" + + @abstractmethod + def progress(self, amount: int) -> None: + """Send progress updates to the caller.""" + + def _get_connector_runner( db_session: Session, attempt: IndexAttempt, start_time: datetime, end_time: datetime, + tenant_id: str | None, ) -> ConnectorRunner: """ NOTE: `start_time` and `end_time` are only used for poll connectors - Returns an interator of document batches and whether the returned documents + Returns an iterator of document batches and whether the returned documents are the complete list of existing documents of the connector. If the task of type LOAD_STATE, the list will be considered complete and otherwise incomplete. """ @@ -62,17 +78,23 @@ def _get_connector_runner( input_type=task, connector_specific_config=attempt.connector_credential_pair.connector.connector_specific_config, credential=attempt.connector_credential_pair.credential, + tenant_id=tenant_id, ) except Exception as e: logger.exception(f"Unable to instantiate connector due to {e}") # since we failed to even instantiate the connector, we pause the CCPair since # it will never succeed - update_connector_credential_pair( - db_session=db_session, - connector_id=attempt.connector_credential_pair.connector.id, - credential_id=attempt.connector_credential_pair.credential.id, - status=ConnectorCredentialPairStatus.PAUSED, + + cc_pair = get_connector_credential_pair_from_id( + attempt.connector_credential_pair.id, db_session ) + if cc_pair and cc_pair.status == ConnectorCredentialPairStatus.ACTIVE: + update_connector_credential_pair( + db_session=db_session, + connector_id=attempt.connector_credential_pair.connector.id, + credential_id=attempt.connector_credential_pair.credential.id, + status=ConnectorCredentialPairStatus.PAUSED, + ) raise e return ConnectorRunner( @@ -83,15 +105,26 @@ def _get_connector_runner( def _run_indexing( db_session: Session, index_attempt: IndexAttempt, + tenant_id: str | None, + callback: RunIndexingCallbackInterface | None = None, ) -> None: """ 1. Get documents which are either new or updated from specified application 2. Embed and index these documents into the chosen datastore (vespa) 3. Updates Postgres to record the indexed documents + the outcome of this run + + TODO: do not change index attempt statuses here ... instead, set signals in redis + and allow the monitor function to clean them up """ start_time = time.time() + if index_attempt.search_settings is None: + raise ValueError( + "Search settings must be set for indexing. This should not be possible." + ) + search_settings = index_attempt.search_settings + index_name = search_settings.index_name # Only update cc-pair status for primary index jobs @@ -123,6 +156,7 @@ def _run_indexing( or (search_settings.status == IndexModelStatus.FUTURE) ), db_session=db_session, + tenant_id=tenant_id, ) db_cc_pair = index_attempt.connector_credential_pair @@ -179,6 +213,7 @@ def _run_indexing( attempt=index_attempt, start_time=window_start, end_time=window_end, + tenant_id=tenant_id, ) all_connector_doc_ids: set[str] = set() @@ -191,7 +226,12 @@ def _run_indexing( # index being built. We want to populate it even for paused connectors # Often paused connectors are sources that aren't updated frequently but the # contents still need to be initially pulled. - db_session.refresh(db_connector) + if callback: + if callback.should_stop(): + raise RuntimeError("Connector stop signal detected") + + # TODO: should we move this into the above callback instead? + db_session.refresh(db_cc_pair) if ( ( db_cc_pair.status == ConnectorCredentialPairStatus.PAUSED @@ -206,7 +246,9 @@ def _run_indexing( db_session.refresh(index_attempt) if index_attempt.status != IndexingStatus.IN_PROGRESS: # Likely due to user manually disabling it or model swap - raise RuntimeError("Index Attempt was canceled") + raise RuntimeError( + f"Index Attempt was canceled, status is {index_attempt.status}" + ) batch_description = [] for doc in doc_batch: @@ -226,6 +268,8 @@ def _run_indexing( logger.debug(f"Indexing batch of documents: {batch_description}") index_attempt_md.batch_num = batch_num + 1 # use 1-index for this + + # real work happens here! new_docs, total_batch_chunks = indexing_pipeline( document_batch=doc_batch, index_attempt_metadata=index_attempt_md, @@ -244,6 +288,9 @@ def _run_indexing( # be inaccurate db_session.commit() + if callback: + callback.progress(len(doc_batch)) + # This new value is updated every batch, so UI can refresh per batch update update_docs_indexed( db_session=db_session, @@ -290,7 +337,7 @@ def _run_indexing( or index_attempt.status != IndexingStatus.IN_PROGRESS ): mark_attempt_failed( - index_attempt, + index_attempt.id, db_session, failure_reason=str(e), full_exception_trace=traceback.format_exc(), @@ -325,7 +372,7 @@ def _run_indexing( and index_attempt_md.num_exceptions >= batch_num ): mark_attempt_failed( - index_attempt, + index_attempt.id, db_session, failure_reason="All batches exceptioned.", ) @@ -367,40 +414,13 @@ def _run_indexing( ) -def _prepare_index_attempt(db_session: Session, index_attempt_id: int) -> IndexAttempt: - # make sure that the index attempt can't change in between checking the - # status and marking it as in_progress. This setting will be discarded - # after the next commit: - # https://docs.sqlalchemy.org/en/20/orm/session_transaction.html#setting-isolation-for-individual-transactions - db_session.connection(execution_options={"isolation_level": "SERIALIZABLE"}) # type: ignore - - attempt = get_index_attempt( - db_session=db_session, - index_attempt_id=index_attempt_id, - ) - - if attempt is None: - raise RuntimeError(f"Unable to find IndexAttempt for ID '{index_attempt_id}'") - - if attempt.status != IndexingStatus.NOT_STARTED: - raise RuntimeError( - f"Indexing attempt with ID '{index_attempt_id}' is not in NOT_STARTED status. " - f"Current status is '{attempt.status}'." - ) - - # only commit once, to make sure this all happens in a single transaction - mark_attempt_in_progress(attempt, db_session) - - return attempt - - def run_indexing_entrypoint( - index_attempt_id: int, connector_credential_pair_id: int, is_ee: bool = False + index_attempt_id: int, + tenant_id: str | None, + connector_credential_pair_id: int, + is_ee: bool = False, + callback: RunIndexingCallbackInterface | None = None, ) -> None: - """Entrypoint for indexing run when using dask distributed. - Wraps the actual logic in a `try` block so that we can catch any exceptions - and mark the attempt as failed.""" - try: if is_ee: global_version.set_ee() @@ -410,26 +430,29 @@ def run_indexing_entrypoint( IndexAttemptSingleton.set_cc_and_index_id( index_attempt_id, connector_credential_pair_id ) - - with Session(get_sqlalchemy_engine()) as db_session: - # make sure that it is valid to run this indexing attempt + mark it - # as in progress - attempt = _prepare_index_attempt(db_session, index_attempt_id) + with get_session_with_tenant(tenant_id) as db_session: + attempt = transition_attempt_to_in_progress(index_attempt_id, db_session) logger.info( - f"Indexing starting: " - f"connector='{attempt.connector_credential_pair.connector.name}' " + f"Indexing starting for tenant {tenant_id}: " + if tenant_id is not None + else "" + + f"connector='{attempt.connector_credential_pair.connector.name}' " f"config='{attempt.connector_credential_pair.connector.connector_specific_config}' " f"credentials='{attempt.connector_credential_pair.connector_id}'" ) - _run_indexing(db_session, attempt) + _run_indexing(db_session, attempt, tenant_id, callback) logger.info( - f"Indexing finished: " - f"connector='{attempt.connector_credential_pair.connector.name}' " + f"Indexing finished for tenant {tenant_id}: " + if tenant_id is not None + else "" + + f"connector='{attempt.connector_credential_pair.connector.name}' " f"config='{attempt.connector_credential_pair.connector.connector_specific_config}' " f"credentials='{attempt.connector_credential_pair.connector_id}'" ) except Exception as e: - logger.exception(f"Indexing job with ID '{index_attempt_id}' failed due to {e}") + logger.exception( + f"Indexing job with ID '{index_attempt_id}' for tenant {tenant_id} failed due to {e}" + ) diff --git a/backend/danswer/background/update.py b/backend/danswer/background/update.py deleted file mode 100755 index 94e703635ee..00000000000 --- a/backend/danswer/background/update.py +++ /dev/null @@ -1,487 +0,0 @@ -import logging -import time -from datetime import datetime - -import dask -from dask.distributed import Client -from dask.distributed import Future -from distributed import LocalCluster -from sqlalchemy.orm import Session - -from danswer.background.indexing.dask_utils import ResourceLogger -from danswer.background.indexing.job_client import SimpleJob -from danswer.background.indexing.job_client import SimpleJobClient -from danswer.background.indexing.run_indexing import run_indexing_entrypoint -from danswer.configs.app_configs import CLEANUP_INDEXING_JOBS_TIMEOUT -from danswer.configs.app_configs import DASK_JOB_CLIENT_ENABLED -from danswer.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP -from danswer.configs.app_configs import NUM_INDEXING_WORKERS -from danswer.configs.app_configs import NUM_SECONDARY_INDEXING_WORKERS -from danswer.configs.constants import DocumentSource -from danswer.configs.constants import POSTGRES_INDEXER_APP_NAME -from danswer.db.connector import fetch_connectors -from danswer.db.connector_credential_pair import fetch_connector_credential_pairs -from danswer.db.engine import get_db_current_time -from danswer.db.engine import get_sqlalchemy_engine -from danswer.db.engine import init_sqlalchemy_engine -from danswer.db.index_attempt import create_index_attempt -from danswer.db.index_attempt import get_index_attempt -from danswer.db.index_attempt import get_inprogress_index_attempts -from danswer.db.index_attempt import get_last_attempt_for_cc_pair -from danswer.db.index_attempt import get_not_started_index_attempts -from danswer.db.index_attempt import mark_attempt_failed -from danswer.db.models import ConnectorCredentialPair -from danswer.db.models import IndexAttempt -from danswer.db.models import IndexingStatus -from danswer.db.models import IndexModelStatus -from danswer.db.models import SearchSettings -from danswer.db.search_settings import get_current_search_settings -from danswer.db.search_settings import get_secondary_search_settings -from danswer.db.swap_index import check_index_swap -from danswer.natural_language_processing.search_nlp_models import EmbeddingModel -from danswer.natural_language_processing.search_nlp_models import warm_up_bi_encoder -from danswer.utils.logger import setup_logger -from danswer.utils.variable_functionality import global_version -from danswer.utils.variable_functionality import set_is_ee_based_on_env_variable -from shared_configs.configs import INDEXING_MODEL_SERVER_HOST -from shared_configs.configs import LOG_LEVEL -from shared_configs.configs import MODEL_SERVER_PORT - -logger = setup_logger() - -# If the indexing dies, it's most likely due to resource constraints, -# restarting just delays the eventual failure, not useful to the user -dask.config.set({"distributed.scheduler.allowed-failures": 0}) - -_UNEXPECTED_STATE_FAILURE_REASON = ( - "Stopped mid run, likely due to the background process being killed" -) - - -def _should_create_new_indexing( - cc_pair: ConnectorCredentialPair, - last_index: IndexAttempt | None, - search_settings_instance: SearchSettings, - secondary_index_building: bool, - db_session: Session, -) -> bool: - connector = cc_pair.connector - - # don't kick off indexing for `NOT_APPLICABLE` sources - if connector.source == DocumentSource.NOT_APPLICABLE: - return False - - # User can still manually create single indexing attempts via the UI for the - # currently in use index - if DISABLE_INDEX_UPDATE_ON_SWAP: - if ( - search_settings_instance.status == IndexModelStatus.PRESENT - and secondary_index_building - ): - return False - - # When switching over models, always index at least once - if search_settings_instance.status == IndexModelStatus.FUTURE: - if last_index: - # No new index if the last index attempt succeeded - # Once is enough. The model will never be able to swap otherwise. - if last_index.status == IndexingStatus.SUCCESS: - return False - - # No new index if the last index attempt is waiting to start - if last_index.status == IndexingStatus.NOT_STARTED: - return False - - # No new index if the last index attempt is running - if last_index.status == IndexingStatus.IN_PROGRESS: - return False - else: - if connector.id == 0: # Ingestion API - return False - return True - - # If the connector is paused or is the ingestion API, don't index - # NOTE: during an embedding model switch over, the following logic - # is bypassed by the above check for a future model - if not cc_pair.status.is_active() or connector.id == 0: - return False - - if not last_index: - return True - - if connector.refresh_freq is None: - return False - - # Only one scheduled/ongoing job per connector at a time - # this prevents cases where - # (1) the "latest" index_attempt is scheduled so we show - # that in the UI despite another index_attempt being in-progress - # (2) multiple scheduled index_attempts at a time - if ( - last_index.status == IndexingStatus.NOT_STARTED - or last_index.status == IndexingStatus.IN_PROGRESS - ): - return False - - current_db_time = get_db_current_time(db_session) - time_since_index = current_db_time - last_index.time_updated - return time_since_index.total_seconds() >= connector.refresh_freq - - -def _mark_run_failed( - db_session: Session, index_attempt: IndexAttempt, failure_reason: str -) -> None: - """Marks the `index_attempt` row as failed + updates the ` - connector_credential_pair` to reflect that the run failed""" - logger.warning( - f"Marking in-progress attempt 'connector: {index_attempt.connector_credential_pair.connector_id}, " - f"credential: {index_attempt.connector_credential_pair.credential_id}' as failed due to {failure_reason}" - ) - mark_attempt_failed( - index_attempt=index_attempt, - db_session=db_session, - failure_reason=failure_reason, - ) - - -"""Main funcs""" - - -def create_indexing_jobs(existing_jobs: dict[int, Future | SimpleJob]) -> None: - """Creates new indexing jobs for each connector / credential pair which is: - 1. Enabled - 2. `refresh_frequency` time has passed since the last indexing run for this pair - 3. There is not already an ongoing indexing attempt for this pair - """ - with Session(get_sqlalchemy_engine()) as db_session: - ongoing: set[tuple[int | None, int]] = set() - for attempt_id in existing_jobs: - attempt = get_index_attempt( - db_session=db_session, index_attempt_id=attempt_id - ) - if attempt is None: - logger.error( - f"Unable to find IndexAttempt for ID '{attempt_id}' when creating " - "indexing jobs" - ) - continue - ongoing.add( - ( - attempt.connector_credential_pair_id, - attempt.search_settings_id, - ) - ) - - # Get the primary search settings - primary_search_settings = get_current_search_settings(db_session) - search_settings = [primary_search_settings] - - # Check for secondary search settings - secondary_search_settings = get_secondary_search_settings(db_session) - if secondary_search_settings is not None: - # If secondary settings exist, add them to the list - search_settings.append(secondary_search_settings) - - all_connector_credential_pairs = fetch_connector_credential_pairs(db_session) - for cc_pair in all_connector_credential_pairs: - for search_settings_instance in search_settings: - # Check if there is an ongoing indexing attempt for this connector credential pair - if (cc_pair.id, search_settings_instance.id) in ongoing: - continue - - last_attempt = get_last_attempt_for_cc_pair( - cc_pair.id, search_settings_instance.id, db_session - ) - if not _should_create_new_indexing( - cc_pair=cc_pair, - last_index=last_attempt, - search_settings_instance=search_settings_instance, - secondary_index_building=len(search_settings) > 1, - db_session=db_session, - ): - continue - - create_index_attempt( - cc_pair.id, search_settings_instance.id, db_session - ) - - -def cleanup_indexing_jobs( - existing_jobs: dict[int, Future | SimpleJob], - timeout_hours: int = CLEANUP_INDEXING_JOBS_TIMEOUT, -) -> dict[int, Future | SimpleJob]: - existing_jobs_copy = existing_jobs.copy() - # clean up completed jobs - with Session(get_sqlalchemy_engine()) as db_session: - for attempt_id, job in existing_jobs.items(): - index_attempt = get_index_attempt( - db_session=db_session, index_attempt_id=attempt_id - ) - - # do nothing for ongoing jobs that haven't been stopped - if not job.done(): - if not index_attempt: - continue - - if not index_attempt.is_finished(): - continue - - if job.status == "error": - logger.error(job.exception()) - - job.release() - del existing_jobs_copy[attempt_id] - - if not index_attempt: - logger.error( - f"Unable to find IndexAttempt for ID '{attempt_id}' when cleaning " - "up indexing jobs" - ) - continue - - if ( - index_attempt.status == IndexingStatus.IN_PROGRESS - or job.status == "error" - ): - _mark_run_failed( - db_session=db_session, - index_attempt=index_attempt, - failure_reason=_UNEXPECTED_STATE_FAILURE_REASON, - ) - - # clean up in-progress jobs that were never completed - connectors = fetch_connectors(db_session) - for connector in connectors: - in_progress_indexing_attempts = get_inprogress_index_attempts( - connector.id, db_session - ) - for index_attempt in in_progress_indexing_attempts: - if index_attempt.id in existing_jobs: - # If index attempt is canceled, stop the run - if index_attempt.status == IndexingStatus.FAILED: - existing_jobs[index_attempt.id].cancel() - # check to see if the job has been updated in last `timeout_hours` hours, if not - # assume it to frozen in some bad state and just mark it as failed. Note: this relies - # on the fact that the `time_updated` field is constantly updated every - # batch of documents indexed - current_db_time = get_db_current_time(db_session=db_session) - time_since_update = current_db_time - index_attempt.time_updated - if time_since_update.total_seconds() > 60 * 60 * timeout_hours: - existing_jobs[index_attempt.id].cancel() - _mark_run_failed( - db_session=db_session, - index_attempt=index_attempt, - failure_reason="Indexing run frozen - no updates in the last three hours. " - "The run will be re-attempted at next scheduled indexing time.", - ) - else: - # If job isn't known, simply mark it as failed - _mark_run_failed( - db_session=db_session, - index_attempt=index_attempt, - failure_reason=_UNEXPECTED_STATE_FAILURE_REASON, - ) - - return existing_jobs_copy - - -def kickoff_indexing_jobs( - existing_jobs: dict[int, Future | SimpleJob], - client: Client | SimpleJobClient, - secondary_client: Client | SimpleJobClient, -) -> dict[int, Future | SimpleJob]: - existing_jobs_copy = existing_jobs.copy() - engine = get_sqlalchemy_engine() - - # Don't include jobs waiting in the Dask queue that just haven't started running - # Also (rarely) don't include for jobs that started but haven't updated the indexing tables yet - with Session(engine) as db_session: - # get_not_started_index_attempts orders its returned results from oldest to newest - # we must process attempts in a FIFO manner to prevent connector starvation - new_indexing_attempts = [ - (attempt, attempt.search_settings) - for attempt in get_not_started_index_attempts(db_session) - if attempt.id not in existing_jobs - ] - - logger.debug(f"Found {len(new_indexing_attempts)} new indexing task(s).") - - if not new_indexing_attempts: - return existing_jobs - - indexing_attempt_count = 0 - - primary_client_full = False - secondary_client_full = False - for attempt, search_settings in new_indexing_attempts: - if primary_client_full and secondary_client_full: - break - - use_secondary_index = ( - search_settings.status == IndexModelStatus.FUTURE - if search_settings is not None - else False - ) - if attempt.connector_credential_pair.connector is None: - logger.warning( - f"Skipping index attempt as Connector has been deleted: {attempt}" - ) - with Session(engine) as db_session: - mark_attempt_failed( - attempt, db_session, failure_reason="Connector is null" - ) - continue - if attempt.connector_credential_pair.credential is None: - logger.warning( - f"Skipping index attempt as Credential has been deleted: {attempt}" - ) - with Session(engine) as db_session: - mark_attempt_failed( - attempt, db_session, failure_reason="Credential is null" - ) - continue - - if not use_secondary_index: - if not primary_client_full: - run = client.submit( - run_indexing_entrypoint, - attempt.id, - attempt.connector_credential_pair_id, - global_version.get_is_ee_version(), - pure=False, - ) - if not run: - primary_client_full = True - else: - if not secondary_client_full: - run = secondary_client.submit( - run_indexing_entrypoint, - attempt.id, - attempt.connector_credential_pair_id, - global_version.get_is_ee_version(), - pure=False, - ) - if not run: - secondary_client_full = True - - if run: - if indexing_attempt_count == 0: - logger.info( - f"Indexing dispatch starts: pending={len(new_indexing_attempts)}" - ) - - indexing_attempt_count += 1 - secondary_str = " (secondary index)" if use_secondary_index else "" - logger.info( - f"Indexing dispatched{secondary_str}: " - f"attempt_id={attempt.id} " - f"connector='{attempt.connector_credential_pair.connector.name}' " - f"config='{attempt.connector_credential_pair.connector.connector_specific_config}' " - f"credentials='{attempt.connector_credential_pair.credential_id}'" - ) - existing_jobs_copy[attempt.id] = run - - if indexing_attempt_count > 0: - logger.info( - f"Indexing dispatch results: " - f"initial_pending={len(new_indexing_attempts)} " - f"started={indexing_attempt_count} " - f"remaining={len(new_indexing_attempts) - indexing_attempt_count}" - ) - - return existing_jobs_copy - - -def update_loop( - delay: int = 10, - num_workers: int = NUM_INDEXING_WORKERS, - num_secondary_workers: int = NUM_SECONDARY_INDEXING_WORKERS, -) -> None: - engine = get_sqlalchemy_engine() - with Session(engine) as db_session: - check_index_swap(db_session=db_session) - search_settings = get_current_search_settings(db_session) - - # So that the first time users aren't surprised by really slow speed of first - # batch of documents indexed - - if search_settings.provider_type is None: - logger.notice("Running a first inference to warm up embedding model") - embedding_model = EmbeddingModel.from_db_model( - search_settings=search_settings, - server_host=INDEXING_MODEL_SERVER_HOST, - server_port=MODEL_SERVER_PORT, - ) - - warm_up_bi_encoder( - embedding_model=embedding_model, - ) - logger.notice("First inference complete.") - - client_primary: Client | SimpleJobClient - client_secondary: Client | SimpleJobClient - if DASK_JOB_CLIENT_ENABLED: - cluster_primary = LocalCluster( - n_workers=num_workers, - threads_per_worker=1, - # there are warning about high memory usage + "Event loop unresponsive" - # which are not relevant to us since our workers are expected to use a - # lot of memory + involve CPU intensive tasks that will not relinquish - # the event loop - silence_logs=logging.ERROR, - ) - cluster_secondary = LocalCluster( - n_workers=num_secondary_workers, - threads_per_worker=1, - silence_logs=logging.ERROR, - ) - client_primary = Client(cluster_primary) - client_secondary = Client(cluster_secondary) - if LOG_LEVEL.lower() == "debug": - client_primary.register_worker_plugin(ResourceLogger()) - else: - client_primary = SimpleJobClient(n_workers=num_workers) - client_secondary = SimpleJobClient(n_workers=num_secondary_workers) - - existing_jobs: dict[int, Future | SimpleJob] = {} - - logger.notice("Startup complete. Waiting for indexing jobs...") - while True: - start = time.time() - start_time_utc = datetime.utcfromtimestamp(start).strftime("%Y-%m-%d %H:%M:%S") - logger.debug(f"Running update, current UTC time: {start_time_utc}") - - if existing_jobs: - # TODO: make this debug level once the "no jobs are being scheduled" issue is resolved - logger.debug( - "Found existing indexing jobs: " - f"{[(attempt_id, job.status) for attempt_id, job in existing_jobs.items()]}" - ) - - try: - with Session(get_sqlalchemy_engine()) as db_session: - check_index_swap(db_session) - existing_jobs = cleanup_indexing_jobs(existing_jobs=existing_jobs) - create_indexing_jobs(existing_jobs=existing_jobs) - existing_jobs = kickoff_indexing_jobs( - existing_jobs=existing_jobs, - client=client_primary, - secondary_client=client_secondary, - ) - except Exception as e: - logger.exception(f"Failed to run update due to {e}") - sleep_time = delay - (time.time() - start) - if sleep_time > 0: - time.sleep(sleep_time) - - -def update__main() -> None: - set_is_ee_based_on_env_variable() - init_sqlalchemy_engine(POSTGRES_INDEXER_APP_NAME) - - logger.notice("Starting indexing service") - update_loop() - - -if __name__ == "__main__": - update__main() diff --git a/backend/danswer/chat/chat_utils.py b/backend/danswer/chat/chat_utils.py index b1e4132779b..f5961010ba5 100644 --- a/backend/danswer/chat/chat_utils.py +++ b/backend/danswer/chat/chat_utils.py @@ -1,6 +1,8 @@ import re from typing import cast +from uuid import UUID +from fastapi.datastructures import Headers from sqlalchemy.orm import Session from danswer.chat.models import CitationInfo @@ -33,7 +35,7 @@ def llm_doc_from_inference_section(inference_section: InferenceSection) -> LlmDo def create_chat_chain( - chat_session_id: int, + chat_session_id: UUID, db_session: Session, prefetch_tool_calls: bool = True, # Optional id at which we finish processing @@ -166,3 +168,31 @@ def slack_link_format(match: re.Match) -> str: new_citation_info[citation.citation_num] = citation return new_answer, list(new_citation_info.values()) + + +def extract_headers( + headers: dict[str, str] | Headers, pass_through_headers: list[str] | None +) -> dict[str, str]: + """ + Extract headers specified in pass_through_headers from input headers. + Handles both dict and FastAPI Headers objects, accounting for lowercase keys. + + Args: + headers: Input headers as dict or Headers object. + + Returns: + dict: Filtered headers based on pass_through_headers. + """ + if not pass_through_headers: + return {} + + extracted_headers: dict[str, str] = {} + for key in pass_through_headers: + if key in headers: + extracted_headers[key] = headers[key] + else: + # fastapi makes all header keys lowercase, handling that here + lowercase_key = key.lower() + if lowercase_key in headers: + extracted_headers[lowercase_key] = headers[lowercase_key] + return extracted_headers diff --git a/backend/danswer/chat/load_yamls.py b/backend/danswer/chat/load_yamls.py index 8d0fd34d8da..e8a19c158b2 100644 --- a/backend/danswer/chat/load_yamls.py +++ b/backend/danswer/chat/load_yamls.py @@ -6,7 +6,6 @@ from danswer.configs.chat_configs import PERSONAS_YAML from danswer.configs.chat_configs import PROMPTS_YAML from danswer.db.document_set import get_or_create_document_set_by_name -from danswer.db.engine import get_sqlalchemy_engine from danswer.db.input_prompt import insert_input_prompt_if_not_exists from danswer.db.models import DocumentSet as DocumentSetDBModel from danswer.db.models import Persona @@ -18,30 +17,32 @@ from danswer.search.enums import RecencyBiasSetting -def load_prompts_from_yaml(prompts_yaml: str = PROMPTS_YAML) -> None: +def load_prompts_from_yaml( + db_session: Session, prompts_yaml: str = PROMPTS_YAML +) -> None: with open(prompts_yaml, "r") as file: data = yaml.safe_load(file) all_prompts = data.get("prompts", []) - with Session(get_sqlalchemy_engine()) as db_session: - for prompt in all_prompts: - upsert_prompt( - user=None, - prompt_id=prompt.get("id"), - name=prompt["name"], - description=prompt["description"].strip(), - system_prompt=prompt["system"].strip(), - task_prompt=prompt["task"].strip(), - include_citations=prompt["include_citations"], - datetime_aware=prompt.get("datetime_aware", True), - default_prompt=True, - personas=None, - db_session=db_session, - commit=True, - ) + for prompt in all_prompts: + upsert_prompt( + user=None, + prompt_id=prompt.get("id"), + name=prompt["name"], + description=prompt["description"].strip(), + system_prompt=prompt["system"].strip(), + task_prompt=prompt["task"].strip(), + include_citations=prompt["include_citations"], + datetime_aware=prompt.get("datetime_aware", True), + default_prompt=True, + personas=None, + db_session=db_session, + commit=True, + ) def load_personas_from_yaml( + db_session: Session, personas_yaml: str = PERSONAS_YAML, default_chunks: float = MAX_CHUNKS_FED_TO_CHAT, ) -> None: @@ -49,117 +50,117 @@ def load_personas_from_yaml( data = yaml.safe_load(file) all_personas = data.get("personas", []) - with Session(get_sqlalchemy_engine()) as db_session: - for persona in all_personas: - doc_set_names = persona["document_sets"] - doc_sets: list[DocumentSetDBModel] = [ - get_or_create_document_set_by_name(db_session, name) - for name in doc_set_names + for persona in all_personas: + doc_set_names = persona["document_sets"] + doc_sets: list[DocumentSetDBModel] = [ + get_or_create_document_set_by_name(db_session, name) + for name in doc_set_names + ] + + # Assume if user hasn't set any document sets for the persona, the user may want + # to later attach document sets to the persona manually, therefore, don't overwrite/reset + # the document sets for the persona + doc_set_ids: list[int] | None = None + if doc_sets: + doc_set_ids = [doc_set.id for doc_set in doc_sets] + else: + doc_set_ids = None + + prompt_ids: list[int] | None = None + prompt_set_names = persona["prompts"] + if prompt_set_names: + prompts: list[PromptDBModel | None] = [ + get_prompt_by_name(prompt_name, user=None, db_session=db_session) + for prompt_name in prompt_set_names ] - - # Assume if user hasn't set any document sets for the persona, the user may want - # to later attach document sets to the persona manually, therefore, don't overwrite/reset - # the document sets for the persona - doc_set_ids: list[int] | None = None - if doc_sets: - doc_set_ids = [doc_set.id for doc_set in doc_sets] - else: - doc_set_ids = None - - prompt_ids: list[int] | None = None - prompt_set_names = persona["prompts"] - if prompt_set_names: - prompts: list[PromptDBModel | None] = [ - get_prompt_by_name(prompt_name, user=None, db_session=db_session) - for prompt_name in prompt_set_names - ] - if any([prompt is None for prompt in prompts]): - raise ValueError("Invalid Persona configs, not all prompts exist") - - if prompts: - prompt_ids = [prompt.id for prompt in prompts if prompt is not None] - - p_id = persona.get("id") - tool_ids = [] - if persona.get("image_generation"): - image_gen_tool = ( - db_session.query(ToolDBModel) - .filter(ToolDBModel.name == "ImageGenerationTool") - .first() - ) - if image_gen_tool: - tool_ids.append(image_gen_tool.id) - - llm_model_provider_override = persona.get("llm_model_provider_override") - llm_model_version_override = persona.get("llm_model_version_override") - - # Set specific overrides for image generation persona - if persona.get("image_generation"): - llm_model_version_override = "gpt-4o" - - existing_persona = ( - db_session.query(Persona) - .filter(Persona.name == persona["name"]) + if any([prompt is None for prompt in prompts]): + raise ValueError("Invalid Persona configs, not all prompts exist") + + if prompts: + prompt_ids = [prompt.id for prompt in prompts if prompt is not None] + + p_id = persona.get("id") + tool_ids = [] + if persona.get("image_generation"): + image_gen_tool = ( + db_session.query(ToolDBModel) + .filter(ToolDBModel.name == "ImageGenerationTool") .first() ) - - upsert_persona( - user=None, - persona_id=(-1 * p_id) if p_id is not None else None, - name=persona["name"], - description=persona["description"], - num_chunks=persona.get("num_chunks") - if persona.get("num_chunks") is not None - else default_chunks, - llm_relevance_filter=persona.get("llm_relevance_filter"), - starter_messages=persona.get("starter_messages"), - llm_filter_extraction=persona.get("llm_filter_extraction"), - icon_shape=persona.get("icon_shape"), - icon_color=persona.get("icon_color"), - llm_model_provider_override=llm_model_provider_override, - llm_model_version_override=llm_model_version_override, - recency_bias=RecencyBiasSetting(persona["recency_bias"]), - prompt_ids=prompt_ids, - document_set_ids=doc_set_ids, - tool_ids=tool_ids, - builtin_persona=True, - is_public=True, - display_priority=existing_persona.display_priority - if existing_persona is not None - else persona.get("display_priority"), - is_visible=existing_persona.is_visible - if existing_persona is not None - else persona.get("is_visible"), - db_session=db_session, - ) - - -def load_input_prompts_from_yaml(input_prompts_yaml: str = INPUT_PROMPT_YAML) -> None: + if image_gen_tool: + tool_ids.append(image_gen_tool.id) + + llm_model_provider_override = persona.get("llm_model_provider_override") + llm_model_version_override = persona.get("llm_model_version_override") + + # Set specific overrides for image generation persona + if persona.get("image_generation"): + llm_model_version_override = "gpt-4o" + + existing_persona = ( + db_session.query(Persona).filter(Persona.name == persona["name"]).first() + ) + + upsert_persona( + user=None, + persona_id=(-1 * p_id) if p_id is not None else None, + name=persona["name"], + description=persona["description"], + num_chunks=persona.get("num_chunks") + if persona.get("num_chunks") is not None + else default_chunks, + llm_relevance_filter=persona.get("llm_relevance_filter"), + starter_messages=persona.get("starter_messages"), + llm_filter_extraction=persona.get("llm_filter_extraction"), + icon_shape=persona.get("icon_shape"), + icon_color=persona.get("icon_color"), + llm_model_provider_override=llm_model_provider_override, + llm_model_version_override=llm_model_version_override, + recency_bias=RecencyBiasSetting(persona["recency_bias"]), + prompt_ids=prompt_ids, + document_set_ids=doc_set_ids, + tool_ids=tool_ids, + builtin_persona=True, + is_public=True, + display_priority=existing_persona.display_priority + if existing_persona is not None + else persona.get("display_priority"), + is_visible=existing_persona.is_visible + if existing_persona is not None + else persona.get("is_visible"), + db_session=db_session, + ) + + +def load_input_prompts_from_yaml( + db_session: Session, input_prompts_yaml: str = INPUT_PROMPT_YAML +) -> None: with open(input_prompts_yaml, "r") as file: data = yaml.safe_load(file) all_input_prompts = data.get("input_prompts", []) - with Session(get_sqlalchemy_engine()) as db_session: - for input_prompt in all_input_prompts: - # If these prompts are deleted (which is a hard delete in the DB), on server startup - # they will be recreated, but the user can always just deactivate them, just a light inconvenience - insert_input_prompt_if_not_exists( - user=None, - input_prompt_id=input_prompt.get("id"), - prompt=input_prompt["prompt"], - content=input_prompt["content"], - is_public=input_prompt["is_public"], - active=input_prompt.get("active", True), - db_session=db_session, - commit=True, - ) + for input_prompt in all_input_prompts: + # If these prompts are deleted (which is a hard delete in the DB), on server startup + # they will be recreated, but the user can always just deactivate them, just a light inconvenience + + insert_input_prompt_if_not_exists( + user=None, + input_prompt_id=input_prompt.get("id"), + prompt=input_prompt["prompt"], + content=input_prompt["content"], + is_public=input_prompt["is_public"], + active=input_prompt.get("active", True), + db_session=db_session, + commit=True, + ) def load_chat_yamls( + db_session: Session, prompt_yaml: str = PROMPTS_YAML, personas_yaml: str = PERSONAS_YAML, input_prompts_yaml: str = INPUT_PROMPT_YAML, ) -> None: - load_prompts_from_yaml(prompt_yaml) - load_personas_from_yaml(personas_yaml) - load_input_prompts_from_yaml(input_prompts_yaml) + load_prompts_from_yaml(db_session, prompt_yaml) + load_personas_from_yaml(db_session, personas_yaml) + load_input_prompts_from_yaml(db_session, input_prompts_yaml) diff --git a/backend/danswer/chat/models.py b/backend/danswer/chat/models.py index 97d5b9e7275..159506c0734 100644 --- a/backend/danswer/chat/models.py +++ b/backend/danswer/chat/models.py @@ -10,7 +10,7 @@ from danswer.search.enums import SearchType from danswer.search.models import RetrievalDocs from danswer.search.models import SearchResponse -from danswer.tools.custom.base_tool_types import ToolResultType +from danswer.tools.tool_implementations.custom.base_tool_types import ToolResultType class LlmDoc(BaseModel): @@ -156,7 +156,7 @@ class QAResponse(SearchResponse, DanswerAnswer): error_msg: str | None = None -class ImageGenerationDisplay(BaseModel): +class FileChatDisplay(BaseModel): file_ids: list[str] @@ -170,7 +170,7 @@ class CustomToolResponse(BaseModel): | DanswerQuotes | CitationInfo | DanswerContexts - | ImageGenerationDisplay + | FileChatDisplay | CustomToolResponse | StreamingError | StreamStopInfo diff --git a/backend/danswer/chat/personas.yaml b/backend/danswer/chat/personas.yaml index 0aececcee6c..d7b16aad99e 100644 --- a/backend/danswer/chat/personas.yaml +++ b/backend/danswer/chat/personas.yaml @@ -41,6 +41,19 @@ personas: icon_color: "#6FB1FF" display_priority: 1 is_visible: true + starter_messages: + - name: "General Information" + description: "Ask about available information" + message: "Hello! I'm interested in learning more about the information available here. Could you give me an overview of the types of data or documents that might be accessible?" + - name: "Specific Topic Search" + description: "Search for specific information" + message: "Hi! I'd like to learn more about a specific topic. Could you help me find relevant documents and information?" + - name: "Recent Updates" + description: "Inquire about latest additions" + message: "Hello! I'm curious about any recent updates or additions to the knowledge base. Can you tell me what new information has been added lately?" + - name: "Cross-referencing Information" + description: "Connect information from different sources" + message: "Hi! I'm working on a project that requires connecting information from multiple sources. How can I effectively cross-reference data across different documents or categories?" - id: 1 name: "General" @@ -57,6 +70,19 @@ personas: icon_color: "#FF6F6F" display_priority: 0 is_visible: true + starter_messages: + - name: "Open Discussion" + description: "Start an open-ended conversation" + message: "Hi! Can you help me write a professional email?" + - name: "Problem Solving" + description: "Get help with a challenge" + message: "Hello! I need help managing my daily tasks better. Do you have any simple tips?" + - name: "Learn Something New" + description: "Explore a new topic" + message: "Hi! Could you explain what project management is in simple terms?" + - name: "Creative Brainstorming" + description: "Generate creative ideas" + message: "Hello! I need to brainstorm some team building activities. Do you have any fun suggestions?" - id: 2 name: "Paraphrase" @@ -73,7 +99,19 @@ personas: icon_color: "#6FFF8D" display_priority: 2 is_visible: false - + starter_messages: + - name: "Document Search" + description: "Find exact information" + message: "Hi! Could you help me find information about our team structure and reporting lines from our internal documents?" + - name: "Process Verification" + description: "Find exact quotes" + message: "Hello! I need to understand our project approval process. Could you find the exact steps from our documentation?" + - name: "Technical Documentation" + description: "Search technical details" + message: "Hi there! I'm looking for information about our deployment procedures. Can you find the specific steps from our technical guides?" + - name: "Policy Reference" + description: "Check official policies" + message: "Hello! Could you help me find our official guidelines about client communication? I need the exact wording from our documentation." - id: 3 name: "Art" @@ -86,8 +124,21 @@ personas: llm_filter_extraction: false recency_bias: "no_decay" document_sets: [] - icon_shape: 234124 + icon_shape: 234124 icon_color: "#9B59B6" - image_generation: true + image_generation: true display_priority: 3 is_visible: true + starter_messages: + - name: "Landscape" + description: "Generate a landscape image" + message: "Create an image of a serene mountain lake at sunset, with snow-capped peaks reflected in the calm water and a small wooden cabin on the shore." + - name: "Character" + description: "Generate a character image" + message: "Generate an image of a futuristic robot with glowing blue eyes, sleek metallic body, and intricate circuitry visible through transparent panels on its chest and arms." + - name: "Abstract" + description: "Create an abstract image" + message: "Create an abstract image representing the concept of time, using swirling clock hands, fragmented hourglasses, and streaks of light to convey the passage of moments and eras." + - name: "Urban Scene" + description: "Generate an urban landscape" + message: "Generate an image of a bustling futuristic cityscape at night, with towering skyscrapers, flying vehicles, holographic advertisements, and a mix of neon and bioluminescent lighting." diff --git a/backend/danswer/chat/process_message.py b/backend/danswer/chat/process_message.py index f09ac18f32a..314e432b86a 100644 --- a/backend/danswer/chat/process_message.py +++ b/backend/danswer/chat/process_message.py @@ -11,13 +11,18 @@ from danswer.chat.models import CitationInfo from danswer.chat.models import CustomToolResponse from danswer.chat.models import DanswerAnswerPiece +from danswer.chat.models import FileChatDisplay from danswer.chat.models import FinalUsedContextDocsResponse -from danswer.chat.models import ImageGenerationDisplay from danswer.chat.models import LLMRelevanceFilterResponse from danswer.chat.models import MessageResponseIDInfo from danswer.chat.models import MessageSpecificCitations from danswer.chat.models import QADocsResponse from danswer.chat.models import StreamingError +from danswer.chat.models import StreamStopInfo +from danswer.configs.app_configs import AZURE_DALLE_API_BASE +from danswer.configs.app_configs import AZURE_DALLE_API_KEY +from danswer.configs.app_configs import AZURE_DALLE_API_VERSION +from danswer.configs.app_configs import AZURE_DALLE_DEPLOYMENT_NAME from danswer.configs.chat_configs import BING_API_KEY from danswer.configs.chat_configs import CHAT_TARGET_CHUNK_PERCENTAGE from danswer.configs.chat_configs import DISABLE_LLM_CHOOSE_SEARCH @@ -73,34 +78,53 @@ from danswer.server.query_and_chat.models import CreateChatMessageRequest from danswer.server.utils import get_json_line from danswer.tools.built_in_tools import get_built_in_tool_by_id -from danswer.tools.custom.custom_tool import ( +from danswer.tools.force import ForceUseTool +from danswer.tools.models import DynamicSchemaInfo +from danswer.tools.models import ToolResponse +from danswer.tools.tool import Tool +from danswer.tools.tool_implementations.custom.custom_tool import ( build_custom_tools_from_openapi_schema_and_headers, ) -from danswer.tools.custom.custom_tool import CUSTOM_TOOL_RESPONSE_ID -from danswer.tools.custom.custom_tool import CustomToolCallSummary -from danswer.tools.force import ForceUseTool -from danswer.tools.images.image_generation_tool import IMAGE_GENERATION_RESPONSE_ID -from danswer.tools.images.image_generation_tool import ImageGenerationResponse -from danswer.tools.images.image_generation_tool import ImageGenerationTool -from danswer.tools.internet_search.internet_search_tool import ( +from danswer.tools.tool_implementations.custom.custom_tool import ( + CUSTOM_TOOL_RESPONSE_ID, +) +from danswer.tools.tool_implementations.custom.custom_tool import CustomToolCallSummary +from danswer.tools.tool_implementations.images.image_generation_tool import ( + IMAGE_GENERATION_RESPONSE_ID, +) +from danswer.tools.tool_implementations.images.image_generation_tool import ( + ImageGenerationResponse, +) +from danswer.tools.tool_implementations.images.image_generation_tool import ( + ImageGenerationTool, +) +from danswer.tools.tool_implementations.internet_search.internet_search_tool import ( INTERNET_SEARCH_RESPONSE_ID, ) -from danswer.tools.internet_search.internet_search_tool import ( +from danswer.tools.tool_implementations.internet_search.internet_search_tool import ( internet_search_response_to_search_docs, ) -from danswer.tools.internet_search.internet_search_tool import InternetSearchResponse -from danswer.tools.internet_search.internet_search_tool import InternetSearchTool -from danswer.tools.models import DynamicSchemaInfo -from danswer.tools.search.search_tool import FINAL_CONTEXT_DOCUMENTS_ID -from danswer.tools.search.search_tool import SEARCH_RESPONSE_SUMMARY_ID -from danswer.tools.search.search_tool import SearchResponseSummary -from danswer.tools.search.search_tool import SearchTool -from danswer.tools.search.search_tool import SECTION_RELEVANCE_LIST_ID -from danswer.tools.tool import Tool -from danswer.tools.tool import ToolResponse +from danswer.tools.tool_implementations.internet_search.internet_search_tool import ( + InternetSearchResponse, +) +from danswer.tools.tool_implementations.internet_search.internet_search_tool import ( + InternetSearchTool, +) +from danswer.tools.tool_implementations.search.search_tool import ( + FINAL_CONTEXT_DOCUMENTS_ID, +) +from danswer.tools.tool_implementations.search.search_tool import ( + SEARCH_RESPONSE_SUMMARY_ID, +) +from danswer.tools.tool_implementations.search.search_tool import SearchResponseSummary +from danswer.tools.tool_implementations.search.search_tool import SearchTool +from danswer.tools.tool_implementations.search.search_tool import ( + SECTION_RELEVANCE_LIST_ID, +) from danswer.tools.tool_runner import ToolCallFinalResult from danswer.tools.utils import compute_all_tool_tokens from danswer.tools.utils import explicit_tool_calling_supported +from danswer.utils.headers import header_dict_to_header_list from danswer.utils.logger import setup_logger from danswer.utils.timing import log_generator_function_time @@ -251,10 +275,11 @@ def _get_force_search_settings( | DanswerAnswerPiece | AllCitations | CitationInfo - | ImageGenerationDisplay + | FileChatDisplay | CustomToolResponse | MessageSpecificCitations | MessageResponseIDInfo + | StreamStopInfo ) ChatPacketStream = Iterator[ChatPacket] @@ -272,6 +297,7 @@ def stream_chat_message_objects( # on the `new_msg_req.message`. Currently, requires a state where the last message is a use_existing_user_message: bool = False, litellm_additional_headers: dict[str, str] | None = None, + custom_tool_additional_headers: dict[str, str] | None = None, is_connected: Callable[[], bool] | None = None, enforce_chat_session_id_for_search_docs: bool = True, ) -> ChatPacketStream: @@ -526,6 +552,13 @@ def stream_chat_message_objects( if not persona else PromptConfig.from_model(persona.prompts[0]) ) + answer_style_config = AnswerStyleConfig( + citation_config=CitationConfig( + all_docs_useful=selected_db_search_docs is not None + ), + document_pruning_config=document_pruning_config, + structured_response_format=new_msg_req.structured_response_format, + ) # find out what tools to use search_tool: SearchTool | None = None @@ -544,13 +577,16 @@ def stream_chat_message_objects( llm=llm, fast_llm=fast_llm, pruning_config=document_pruning_config, + answer_style_config=answer_style_config, selected_sections=selected_sections, chunks_above=new_msg_req.chunks_above, chunks_below=new_msg_req.chunks_below, full_doc=new_msg_req.full_doc, - evaluation_type=LLMEvaluationType.BASIC - if persona.llm_relevance_filter - else LLMEvaluationType.SKIP, + evaluation_type=( + LLMEvaluationType.BASIC + if persona.llm_relevance_filter + else LLMEvaluationType.SKIP + ), ) tool_dict[db_tool_model.id] = [search_tool] elif tool_cls.__name__ == ImageGenerationTool.__name__: @@ -560,7 +596,26 @@ def stream_chat_message_objects( and llm.config.api_key and llm.config.model_provider == "openai" ): - img_generation_llm_config = llm.config + img_generation_llm_config = LLMConfig( + model_provider=llm.config.model_provider, + model_name="dall-e-3", + temperature=GEN_AI_TEMPERATURE, + api_key=llm.config.api_key, + api_base=llm.config.api_base, + api_version=llm.config.api_version, + ) + elif ( + llm.config.model_provider == "azure" + and AZURE_DALLE_API_KEY is not None + ): + img_generation_llm_config = LLMConfig( + model_provider="azure", + model_name=f"azure/{AZURE_DALLE_DEPLOYMENT_NAME}", + temperature=GEN_AI_TEMPERATURE, + api_key=AZURE_DALLE_API_KEY, + api_base=AZURE_DALLE_API_BASE, + api_version=AZURE_DALLE_API_VERSION, + ) else: llm_providers = fetch_existing_llm_providers(db_session) openai_provider = next( @@ -579,7 +634,7 @@ def stream_chat_message_objects( ) img_generation_llm_config = LLMConfig( model_provider=openai_provider.provider, - model_name=openai_provider.default_model_name, + model_name="dall-e-3", temperature=GEN_AI_TEMPERATURE, api_key=openai_provider.api_key, api_base=openai_provider.api_base, @@ -591,6 +646,7 @@ def stream_chat_message_objects( api_base=img_generation_llm_config.api_base, api_version=img_generation_llm_config.api_version, additional_headers=litellm_additional_headers, + model=img_generation_llm_config.model_name, ) ] elif tool_cls.__name__ == InternetSearchTool.__name__: @@ -600,7 +656,11 @@ def stream_chat_message_objects( "Internet search tool requires a Bing API key, please contact your Danswer admin to get it added!" ) tool_dict[db_tool_model.id] = [ - InternetSearchTool(api_key=bing_api_key) + InternetSearchTool( + api_key=bing_api_key, + answer_style_config=answer_style_config, + prompt_config=prompt_config, + ) ] continue @@ -615,7 +675,12 @@ def stream_chat_message_objects( chat_session_id=chat_session_id, message_id=user_message.id if user_message else None, ), - custom_headers=db_tool_model.custom_headers, + custom_headers=(db_tool_model.custom_headers or []) + + ( + header_dict_to_header_list( + custom_tool_additional_headers or {} + ) + ), ), ) @@ -636,12 +701,7 @@ def stream_chat_message_objects( is_connected=is_connected, question=final_msg.message, latest_query_files=latest_query_files, - answer_style_config=AnswerStyleConfig( - citation_config=CitationConfig( - all_docs_useful=selected_db_search_docs is not None - ), - document_pruning_config=document_pruning_config, - ), + answer_style_config=answer_style_config, prompt_config=prompt_config, llm=( llm @@ -709,7 +769,6 @@ def stream_chat_message_objects( yield LLMRelevanceFilterResponse( llm_selected_doc_indices=llm_indices ) - elif packet.id == FINAL_CONTEXT_DOCUMENTS_ID: yield FinalUsedContextDocsResponse( final_context_docs=packet.response @@ -727,7 +786,7 @@ def stream_chat_message_objects( FileDescriptor(id=str(file_id), type=ChatFileType.IMAGE) for file_id in file_ids ] - yield ImageGenerationDisplay( + yield FileChatDisplay( file_ids=[str(file_id) for file_id in file_ids] ) elif packet.id == INTERNET_SEARCH_RESPONSE_ID: @@ -741,11 +800,32 @@ def stream_chat_message_objects( yield qa_docs_response elif packet.id == CUSTOM_TOOL_RESPONSE_ID: custom_tool_response = cast(CustomToolCallSummary, packet.response) - yield CustomToolResponse( - response=custom_tool_response.tool_result, - tool_name=custom_tool_response.tool_name, - ) + if ( + custom_tool_response.response_type == "image" + or custom_tool_response.response_type == "csv" + ): + file_ids = custom_tool_response.tool_result.file_ids + ai_message_files = [ + FileDescriptor( + id=str(file_id), + type=ChatFileType.IMAGE + if custom_tool_response.response_type == "image" + else ChatFileType.CSV, + ) + for file_id in file_ids + ] + yield FileChatDisplay( + file_ids=[str(file_id) for file_id in file_ids] + ) + else: + yield CustomToolResponse( + response=custom_tool_response.tool_result, + tool_name=custom_tool_response.tool_name, + ) + + elif isinstance(packet, StreamStopInfo): + pass else: if isinstance(packet, ToolCallFinalResult): tool_result = packet @@ -775,6 +855,7 @@ def stream_chat_message_objects( # Post-LLM answer processing try: + logger.debug("Post-LLM answer processing") message_specific_citations: MessageSpecificCitations | None = None if reference_db_search_docs: message_specific_citations = _translate_citations( @@ -802,17 +883,15 @@ def stream_chat_message_objects( if message_specific_citations else None, error=None, - tool_calls=( - [ - ToolCall( - tool_id=tool_name_to_tool_id[tool_result.tool_name], - tool_name=tool_result.tool_name, - tool_arguments=tool_result.tool_args, - tool_result=tool_result.tool_result, - ) - ] + tool_call=( + ToolCall( + tool_id=tool_name_to_tool_id[tool_result.tool_name], + tool_name=tool_result.tool_name, + tool_arguments=tool_result.tool_args, + tool_result=tool_result.tool_result, + ) if tool_result - else [] + else None ), ) @@ -838,6 +917,7 @@ def stream_chat_message( user: User | None, use_existing_user_message: bool = False, litellm_additional_headers: dict[str, str] | None = None, + custom_tool_additional_headers: dict[str, str] | None = None, is_connected: Callable[[], bool] | None = None, ) -> Iterator[str]: with get_session_context_manager() as db_session: @@ -847,6 +927,7 @@ def stream_chat_message( db_session=db_session, use_existing_user_message=use_existing_user_message, litellm_additional_headers=litellm_additional_headers, + custom_tool_additional_headers=custom_tool_additional_headers, is_connected=is_connected, ) for obj in objects: diff --git a/backend/danswer/chat/prompts.yaml b/backend/danswer/chat/prompts.yaml index d83d4ede4b5..85752b4d20d 100644 --- a/backend/danswer/chat/prompts.yaml +++ b/backend/danswer/chat/prompts.yaml @@ -9,19 +9,19 @@ prompts: system: > You are a question answering system that is constantly learning and improving. The current date is DANSWER_DATETIME_REPLACEMENT. - + You can process and comprehend vast amounts of text and utilize this knowledge to provide grounded, accurate, and concise answers to diverse queries. - + You always clearly communicate ANY UNCERTAINTY in your answer. # Task Prompt (as shown in UI) task: > Answer my query based on the documents provided. The documents may not all be relevant, ignore any documents that are not directly relevant to the most recent user query. - + I have not read or seen any of the documents and do not want to read them. - + If there are no relevant documents, refer to the chat history and your internal knowledge. # Inject a statement at the end of system prompt to inform the LLM of the current date/time # If the DANSWER_DATETIME_REPLACEMENT is set, the date/time is inserted there instead @@ -30,21 +30,21 @@ prompts: # Prompts the LLM to include citations in the for [1], [2] etc. # which get parsed to match the passed in sources include_citations: true - + - name: "ImageGeneration" - description: "Generates images based on user prompts!" + description: "Generates images from user descriptions!" system: > - You are an advanced image generation system capable of creating diverse and detailed images. - - You can interpret user prompts and generate high-quality, creative images that match their descriptions. - - You always strive to create safe and appropriate content, avoiding any harmful or offensive imagery. + You are an AI image generation assistant. Your role is to create high-quality images based on user descriptions. + + For appropriate requests, you will generate an image that matches the user's requirements. + For inappropriate or unsafe requests, you will politely decline and explain why the request cannot be fulfilled. + + You aim to be helpful while maintaining appropriate content standards. task: > - Generate an image based on the user's description. - - Provide a detailed description of the generated image, including key elements, colors, and composition. - - If the request is not possible or appropriate, explain why and suggest alternatives. + Based on the user's description, create a high-quality image that accurately reflects their request. + Pay close attention to the specified details, styles, and desired elements. + + If the request is not appropriate or cannot be fulfilled, explain why and suggest alternatives. datetime_aware: true include_citations: false @@ -64,14 +64,13 @@ prompts: datetime_aware: true include_citations: true - - name: "Summarize" description: "Summarize relevant information from retrieved context!" system: > You are a text summarizing assistant that highlights the most important knowledge from the context provided, prioritizing the information that relates to the user query. The current date is DANSWER_DATETIME_REPLACEMENT. - + You ARE NOT creative and always stick to the provided documents. If there are no documents, refer to the conversation history. @@ -84,7 +83,6 @@ prompts: datetime_aware: true include_citations: true - - name: "Paraphrase" description: "Recites information from retrieved context! Least creative but most safe!" system: > @@ -92,10 +90,10 @@ prompts: The current date is DANSWER_DATETIME_REPLACEMENT. You only provide quotes that are EXACT substrings from provided documents! - + If there are no documents provided, simply tell the user that there are no documents to reference. - + You NEVER generate new text or phrases outside of the citation. DO NOT explain your responses, only provide the quotes and NOTHING ELSE. task: > diff --git a/backend/danswer/configs/app_configs.py b/backend/danswer/configs/app_configs.py index aa3cccc512f..3e2695072a2 100644 --- a/backend/danswer/configs/app_configs.py +++ b/backend/danswer/configs/app_configs.py @@ -43,6 +43,9 @@ AUTH_TYPE = AuthType((os.environ.get("AUTH_TYPE") or AuthType.DISABLED.value).lower()) DISABLE_AUTH = AUTH_TYPE == AuthType.DISABLED +# Necessary for cloud integration tests +DISABLE_VERIFICATION = os.environ.get("DISABLE_VERIFICATION", "").lower() == "true" + # Encryption key secret is used to encrypt connector credentials, api keys, and other sensitive # information. This provides an extra layer of security on top of Postgres access controls # and is available in Danswer EE @@ -53,7 +56,6 @@ os.environ.get("MASK_CREDENTIAL_PREFIX", "True").lower() != "false" ) - SESSION_EXPIRE_TIME_SECONDS = int( os.environ.get("SESSION_EXPIRE_TIME_SECONDS") or 86400 * 7 ) # 7 days @@ -116,17 +118,22 @@ VESPA_CONFIG_SERVER_HOST = os.environ.get("VESPA_CONFIG_SERVER_HOST") or VESPA_HOST VESPA_PORT = os.environ.get("VESPA_PORT") or "8081" VESPA_TENANT_PORT = os.environ.get("VESPA_TENANT_PORT") or "19071" + +VESPA_CLOUD_URL = os.environ.get("VESPA_CLOUD_URL", "") + # The default below is for dockerized deployment VESPA_DEPLOYMENT_ZIP = ( os.environ.get("VESPA_DEPLOYMENT_ZIP") or "/app/danswer/vespa-app.zip" ) +VESPA_CLOUD_CERT_PATH = os.environ.get("VESPA_CLOUD_CERT_PATH") +VESPA_CLOUD_KEY_PATH = os.environ.get("VESPA_CLOUD_KEY_PATH") + # Number of documents in a batch during indexing (further batching done by chunks before passing to bi-encoder) try: INDEX_BATCH_SIZE = int(os.environ.get("INDEX_BATCH_SIZE", 16)) except ValueError: INDEX_BATCH_SIZE = 16 - # Below are intended to match the env variables names used by the official postgres docker image # https://hub.docker.com/_/postgres POSTGRES_USER = os.environ.get("POSTGRES_USER") or "postgres" @@ -138,6 +145,12 @@ POSTGRES_PORT = os.environ.get("POSTGRES_PORT") or "5432" POSTGRES_DB = os.environ.get("POSTGRES_DB") or "postgres" +POSTGRES_API_SERVER_POOL_SIZE = int( + os.environ.get("POSTGRES_API_SERVER_POOL_SIZE") or 40 +) +POSTGRES_API_SERVER_POOL_OVERFLOW = int( + os.environ.get("POSTGRES_API_SERVER_POOL_OVERFLOW") or 10 +) # defaults to False POSTGRES_POOL_PRE_PING = os.environ.get("POSTGRES_POOL_PRE_PING", "").lower() == "true" @@ -150,6 +163,17 @@ except ValueError: POSTGRES_POOL_RECYCLE = POSTGRES_POOL_RECYCLE_DEFAULT +# Experimental setting to control idle transactions +POSTGRES_IDLE_SESSIONS_TIMEOUT_DEFAULT = 0 # milliseconds +try: + POSTGRES_IDLE_SESSIONS_TIMEOUT = int( + os.environ.get( + "POSTGRES_IDLE_SESSIONS_TIMEOUT", POSTGRES_IDLE_SESSIONS_TIMEOUT_DEFAULT + ) + ) +except ValueError: + POSTGRES_IDLE_SESSIONS_TIMEOUT = POSTGRES_IDLE_SESSIONS_TIMEOUT_DEFAULT + REDIS_SSL = os.getenv("REDIS_SSL", "").lower() == "true" REDIS_HOST = os.environ.get("REDIS_HOST") or "localhost" REDIS_PORT = int(os.environ.get("REDIS_PORT", 6379)) @@ -187,6 +211,41 @@ except ValueError: CELERY_BROKER_POOL_LIMIT = CELERY_BROKER_POOL_LIMIT_DEFAULT +CELERY_WORKER_LIGHT_CONCURRENCY_DEFAULT = 24 +try: + CELERY_WORKER_LIGHT_CONCURRENCY = int( + os.environ.get( + "CELERY_WORKER_LIGHT_CONCURRENCY", CELERY_WORKER_LIGHT_CONCURRENCY_DEFAULT + ) + ) +except ValueError: + CELERY_WORKER_LIGHT_CONCURRENCY = CELERY_WORKER_LIGHT_CONCURRENCY_DEFAULT + +CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER_DEFAULT = 8 +try: + CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER = int( + os.environ.get( + "CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER", + CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER_DEFAULT, + ) + ) +except ValueError: + CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER = ( + CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER_DEFAULT + ) + +CELERY_WORKER_INDEXING_CONCURRENCY_DEFAULT = 1 +try: + env_value = os.environ.get("CELERY_WORKER_INDEXING_CONCURRENCY") + if not env_value: + env_value = os.environ.get("NUM_INDEXING_WORKERS") + + if not env_value: + env_value = str(CELERY_WORKER_INDEXING_CONCURRENCY_DEFAULT) + CELERY_WORKER_INDEXING_CONCURRENCY = int(env_value) +except ValueError: + CELERY_WORKER_INDEXING_CONCURRENCY = CELERY_WORKER_INDEXING_CONCURRENCY_DEFAULT + ##### # Connector Configs ##### @@ -203,9 +262,6 @@ # for some connectors ENABLE_EXPENSIVE_EXPERT_CALLS = False -GOOGLE_DRIVE_INCLUDE_SHARED = False -GOOGLE_DRIVE_FOLLOW_SHORTCUTS = False -GOOGLE_DRIVE_ONLY_ORG_PUBLIC = False # TODO these should be available for frontend configuration, via advanced options expandable WEB_CONNECTOR_IGNORED_CLASSES = os.environ.get( @@ -242,12 +298,6 @@ os.environ.get("CONFLUENCE_CONNECTOR_INDEX_ARCHIVED_PAGES", "").lower() == "true" ) -# Save pages labels as Danswer metadata tags -# The reason to skip this would be to reduce the number of calls to Confluence due to rate limit concerns -CONFLUENCE_CONNECTOR_SKIP_LABEL_INDEXING = ( - os.environ.get("CONFLUENCE_CONNECTOR_SKIP_LABEL_INDEXING", "").lower() == "true" -) - # Attachments exceeding this size will not be retrieved (in bytes) CONFLUENCE_CONNECTOR_ATTACHMENT_SIZE_THRESHOLD = int( os.environ.get("CONFLUENCE_CONNECTOR_ATTACHMENT_SIZE_THRESHOLD", 10 * 1024 * 1024) @@ -354,12 +404,10 @@ # exception without aborting the attempt. INDEXING_EXCEPTION_LIMIT = int(os.environ.get("INDEXING_EXCEPTION_LIMIT", 0)) + ##### # Miscellaneous ##### -# File based Key Value store no longer used -DYNAMIC_CONFIG_STORE = "PostgresBackedDynamicConfigStore" - JOB_TIMEOUT = 60 * 60 * 6 # 6 hours default # used to allow the background indexing jobs to use a different embedding # model server than the API server @@ -397,6 +445,11 @@ os.environ.get("CUSTOM_ANSWER_VALIDITY_CONDITIONS", "[]") ) +VESPA_REQUEST_TIMEOUT = int(os.environ.get("VESPA_REQUEST_TIMEOUT") or "15") + +SYSTEM_RECURSION_LIMIT = int(os.environ.get("SYSTEM_RECURSION_LIMIT") or "1000") + +PARSE_WITH_TRAFILATURA = os.environ.get("PARSE_WITH_TRAFILATURA", "").lower() == "true" ##### # Enterprise Edition Configs @@ -408,3 +461,35 @@ ENTERPRISE_EDITION_ENABLED = ( os.environ.get("ENABLE_PAID_ENTERPRISE_EDITION_FEATURES", "").lower() == "true" ) + +# Azure DALL-E Configurations +AZURE_DALLE_API_VERSION = os.environ.get("AZURE_DALLE_API_VERSION") +AZURE_DALLE_API_KEY = os.environ.get("AZURE_DALLE_API_KEY") +AZURE_DALLE_API_BASE = os.environ.get("AZURE_DALLE_API_BASE") +AZURE_DALLE_DEPLOYMENT_NAME = os.environ.get("AZURE_DALLE_DEPLOYMENT_NAME") + + +# Use managed Vespa (Vespa Cloud). If set, must also set VESPA_CLOUD_URL, VESPA_CLOUD_CERT_PATH and VESPA_CLOUD_KEY_PATH +MANAGED_VESPA = os.environ.get("MANAGED_VESPA", "").lower() == "true" + +ENABLE_EMAIL_INVITES = os.environ.get("ENABLE_EMAIL_INVITES", "").lower() == "true" + +# Security and authentication +DATA_PLANE_SECRET = os.environ.get( + "DATA_PLANE_SECRET", "" +) # Used for secure communication between the control and data plane +EXPECTED_API_KEY = os.environ.get( + "EXPECTED_API_KEY", "" +) # Additional security check for the control plane API + +# API configuration +CONTROL_PLANE_API_BASE_URL = os.environ.get( + "CONTROL_PLANE_API_BASE_URL", "http://localhost:8082" +) + +# JWT configuration +JWT_ALGORITHM = "HS256" + +# Super Users +SUPER_USERS = json.loads(os.environ.get("SUPER_USERS", '["pablo@danswer.ai"]')) +SUPER_CLOUD_API_KEY = os.environ.get("SUPER_CLOUD_API_KEY", "api_key") diff --git a/backend/danswer/configs/constants.py b/backend/danswer/configs/constants.py index 678b3a5499d..36b9a8bf3de 100644 --- a/backend/danswer/configs/constants.py +++ b/backend/danswer/configs/constants.py @@ -39,6 +39,8 @@ POSTGRES_CELERY_WORKER_PRIMARY_APP_NAME = "celery_worker_primary" POSTGRES_CELERY_WORKER_LIGHT_APP_NAME = "celery_worker_light" POSTGRES_CELERY_WORKER_HEAVY_APP_NAME = "celery_worker_heavy" +POSTGRES_CELERY_WORKER_INDEXING_APP_NAME = "celery_worker_indexing" +POSTGRES_CELERY_WORKER_INDEXING_CHILD_APP_NAME = "celery_worker_indexing_child" POSTGRES_PERMISSIONS_APP_NAME = "permissions" POSTGRES_UNKNOWN_APP_NAME = "unknown" @@ -50,6 +52,7 @@ # Key-Value store keys KV_REINDEX_KEY = "needs_reindexing" KV_SEARCH_SETTINGS = "search_settings" +KV_UNSTRUCTURED_API_KEY = "unstructured_api_key" KV_USER_STORE_KEY = "INVITED_USERS" KV_NO_AUTH_USER_PREFERENCES_KEY = "no_auth_user_preferences" KV_CRED_KEY = "credential_id_{}" @@ -64,10 +67,21 @@ KV_INSTANCE_DOMAIN_KEY = "instance_domain" KV_ENTERPRISE_SETTINGS_KEY = "danswer_enterprise_settings" KV_CUSTOM_ANALYTICS_SCRIPT_KEY = "__custom_analytics_script__" +KV_DOCUMENTS_SEEDED_KEY = "documents_seeded" CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT = 60 CELERY_PRIMARY_WORKER_LOCK_TIMEOUT = 120 +# needs to be long enough to cover the maximum time it takes to download an object +# if we can get callbacks as object bytes download, we could lower this a lot. +CELERY_INDEXING_LOCK_TIMEOUT = 60 * 60 # 60 min + +# needs to be long enough to cover the maximum time it takes to download an object +# if we can get callbacks as object bytes download, we could lower this a lot. +CELERY_PRUNING_LOCK_TIMEOUT = 300 # 5 min + +DANSWER_REDIS_FUNCTION_LOCK_PREFIX = "da_function_lock:" + class DocumentSource(str, Enum): # Special case, document passed in via Danswer APIs without specifying a source type @@ -111,10 +125,16 @@ class DocumentSource(str, Enum): OCI_STORAGE = "oci_storage" XENFORO = "xenforo" NOT_APPLICABLE = "not_applicable" + FRESHDESK = "freshdesk" + + +DocumentSourceRequiringTenantContext: list[DocumentSource] = [DocumentSource.FILE] class NotificationType(str, Enum): REINDEX = "reindex" + PERSONA_SHARED = "persona_shared" + TRIAL_ENDS_TWO_DAYS = "two_day_trial_ending" # 2 days left in trial class BlobType(str, Enum): @@ -139,6 +159,9 @@ class AuthType(str, Enum): OIDC = "oidc" SAML = "saml" + # google auth and basic + CLOUD = "cloud" + class SessionType(str, Enum): CHAT = "Chat" @@ -185,18 +208,22 @@ class PostgresAdvisoryLocks(Enum): class DanswerCeleryQueues: - VESPA_DOCSET_SYNC_GENERATOR = "vespa_docset_sync_generator" - VESPA_USERGROUP_SYNC_GENERATOR = "vespa_usergroup_sync_generator" VESPA_METADATA_SYNC = "vespa_metadata_sync" CONNECTOR_DELETION = "connector_deletion" + CONNECTOR_PRUNING = "connector_pruning" + CONNECTOR_INDEXING = "connector_indexing" class DanswerRedisLocks: PRIMARY_WORKER = "da_lock:primary_worker" CHECK_VESPA_SYNC_BEAT_LOCK = "da_lock:check_vespa_sync_beat" - MONITOR_VESPA_SYNC_BEAT_LOCK = "da_lock:monitor_vespa_sync_beat" CHECK_CONNECTOR_DELETION_BEAT_LOCK = "da_lock:check_connector_deletion_beat" - MONITOR_CONNECTOR_DELETION_BEAT_LOCK = "da_lock:monitor_connector_deletion_beat" + CHECK_PRUNE_BEAT_LOCK = "da_lock:check_prune_beat" + CHECK_INDEXING_BEAT_LOCK = "da_lock:check_indexing_beat" + MONITOR_VESPA_SYNC_BEAT_LOCK = "da_lock:monitor_vespa_sync_beat" + + PRUNING_LOCK_PREFIX = "da_lock:pruning" + INDEXING_METADATA_PREFIX = "da_metadata:indexing" class DanswerCeleryPriority(int, Enum): diff --git a/backend/danswer/configs/tool_configs.py b/backend/danswer/configs/tool_configs.py new file mode 100644 index 00000000000..3170cb31ff9 --- /dev/null +++ b/backend/danswer/configs/tool_configs.py @@ -0,0 +1,22 @@ +import json +import os + + +# if specified, will pass through request headers to the call to API calls made by custom tools +CUSTOM_TOOL_PASS_THROUGH_HEADERS: list[str] | None = None +_CUSTOM_TOOL_PASS_THROUGH_HEADERS_RAW = os.environ.get( + "CUSTOM_TOOL_PASS_THROUGH_HEADERS" +) +if _CUSTOM_TOOL_PASS_THROUGH_HEADERS_RAW: + try: + CUSTOM_TOOL_PASS_THROUGH_HEADERS = json.loads( + _CUSTOM_TOOL_PASS_THROUGH_HEADERS_RAW + ) + except Exception: + # need to import here to avoid circular imports + from danswer.utils.logger import setup_logger + + logger = setup_logger() + logger.error( + "Failed to parse CUSTOM_TOOL_PASS_THROUGH_HEADERS, must be a valid JSON object" + ) diff --git a/backend/danswer/connectors/README.md b/backend/danswer/connectors/README.md index ef6c63d2697..bb7f5a5fe4f 100644 --- a/backend/danswer/connectors/README.md +++ b/backend/danswer/connectors/README.md @@ -13,8 +13,8 @@ Connectors come in 3 different flows: documents via a connector's API or loads the documents from some sort of a dump file. - Poll connector: - Incrementally updates documents based on a provided time range. It is used by the background job to pull the latest - changes additions and changes since the last round of polling. This connector helps keep the document index up to date - without needing to fetch/embed/index every document which generally be too slow to do frequently on large sets of + changes and additions since the last round of polling. This connector helps keep the document index up to date + without needing to fetch/embed/index every document which would be too slow to do frequently on large sets of documents. - Event Based connectors: - Connectors that listen to events and update documents accordingly. diff --git a/backend/danswer/connectors/axero/connector.py b/backend/danswer/connectors/axero/connector.py index a4d5162b6ce..000151209de 100644 --- a/backend/danswer/connectors/axero/connector.py +++ b/backend/danswer/connectors/axero/connector.py @@ -15,7 +15,6 @@ from danswer.connectors.cross_connector_utils.rate_limit_wrapper import ( rate_limit_builder, ) -from danswer.connectors.cross_connector_utils.retry_wrapper import retry_builder from danswer.connectors.interfaces import GenerateDocumentsOutput from danswer.connectors.interfaces import PollConnector from danswer.connectors.interfaces import SecondsSinceUnixEpoch @@ -24,6 +23,7 @@ from danswer.connectors.models import Section from danswer.file_processing.html_utils import parse_html_page_basic from danswer.utils.logger import setup_logger +from danswer.utils.retry_wrapper import retry_builder logger = setup_logger() diff --git a/backend/danswer/connectors/blob/connector.py b/backend/danswer/connectors/blob/connector.py index a664a3d764a..1f030a7564f 100644 --- a/backend/danswer/connectors/blob/connector.py +++ b/backend/danswer/connectors/blob/connector.py @@ -194,8 +194,8 @@ def _yield_blob_objects( try: text = extract_file_text( - name, BytesIO(downloaded_file), + file_name=name, break_on_unprocessable=False, ) batch.append( diff --git a/backend/danswer/connectors/bookstack/connector.py b/backend/danswer/connectors/bookstack/connector.py index f2e692d2c5f..9255bc3b8e5 100644 --- a/backend/danswer/connectors/bookstack/connector.py +++ b/backend/danswer/connectors/bookstack/connector.py @@ -44,8 +44,6 @@ def _get_doc_batch( start: SecondsSinceUnixEpoch | None = None, end: SecondsSinceUnixEpoch | None = None, ) -> tuple[list[Document], int]: - doc_batch: list[Document] = [] - params = { "count": str(batch_size), "offset": str(start_ind), @@ -63,8 +61,7 @@ def _get_doc_batch( ) batch = bookstack_client.get(endpoint, params=params).get("data", []) - for item in batch: - doc_batch.append(transformer(bookstack_client, item)) + doc_batch = [transformer(bookstack_client, item) for item in batch] return doc_batch, len(batch) diff --git a/backend/danswer/connectors/clickup/connector.py b/backend/danswer/connectors/clickup/connector.py index 78d572af413..2ccc5ef4f65 100644 --- a/backend/danswer/connectors/clickup/connector.py +++ b/backend/danswer/connectors/clickup/connector.py @@ -10,7 +10,6 @@ from danswer.connectors.cross_connector_utils.rate_limit_wrapper import ( rate_limit_builder, ) -from danswer.connectors.cross_connector_utils.retry_wrapper import retry_builder from danswer.connectors.interfaces import GenerateDocumentsOutput from danswer.connectors.interfaces import LoadConnector from danswer.connectors.interfaces import PollConnector @@ -19,6 +18,7 @@ from danswer.connectors.models import ConnectorMissingCredentialError from danswer.connectors.models import Document from danswer.connectors.models import Section +from danswer.utils.retry_wrapper import retry_builder CLICKUP_API_BASE_URL = "https://api.clickup.com/api/v2" @@ -210,6 +210,7 @@ def poll_source( "clickup_team_id": os.environ["clickup_team_id"], } ) + latest_docs = clickup_connector.load_from_state() for doc in latest_docs: diff --git a/backend/danswer/connectors/confluence/confluence_utils.py b/backend/danswer/connectors/confluence/confluence_utils.py deleted file mode 100644 index 927e989bf3f..00000000000 --- a/backend/danswer/connectors/confluence/confluence_utils.py +++ /dev/null @@ -1,32 +0,0 @@ -import bs4 - - -def build_confluence_document_id(base_url: str, content_url: str) -> str: - """For confluence, the document id is the page url for a page based document - or the attachment download url for an attachment based document - - Args: - base_url (str): The base url of the Confluence instance - content_url (str): The url of the page or attachment download url - - Returns: - str: The document id - """ - return f"{base_url}{content_url}" - - -def get_used_attachments(text: str) -> list[str]: - """Parse a Confluence html page to generate a list of current - attachment in used - - Args: - text (str): The page content - - Returns: - list[str]: List of filenames currently in use by the page text - """ - files_in_used = [] - soup = bs4.BeautifulSoup(text, "html.parser") - for attachment in soup.findAll("ri:attachment"): - files_in_used.append(attachment.attrs["ri:filename"]) - return files_in_used diff --git a/backend/danswer/connectors/confluence/connector.py b/backend/danswer/connectors/confluence/connector.py index d3caf66cc14..9c93f93f99b 100644 --- a/backend/danswer/connectors/confluence/connector.py +++ b/backend/danswer/connectors/confluence/connector.py @@ -1,44 +1,29 @@ -import io -import os -from collections.abc import Callable -from collections.abc import Collection from datetime import datetime from datetime import timezone -from functools import lru_cache from typing import Any -from typing import cast +from urllib.parse import quote -import bs4 -from atlassian import Confluence # type:ignore -from requests import HTTPError - -from danswer.configs.app_configs import ( - CONFLUENCE_CONNECTOR_ATTACHMENT_CHAR_COUNT_THRESHOLD, -) -from danswer.configs.app_configs import CONFLUENCE_CONNECTOR_ATTACHMENT_SIZE_THRESHOLD -from danswer.configs.app_configs import CONFLUENCE_CONNECTOR_INDEX_ARCHIVED_PAGES from danswer.configs.app_configs import CONFLUENCE_CONNECTOR_LABELS_TO_SKIP -from danswer.configs.app_configs import CONFLUENCE_CONNECTOR_SKIP_LABEL_INDEXING from danswer.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.constants import DocumentSource -from danswer.connectors.confluence.confluence_utils import ( - build_confluence_document_id, -) -from danswer.connectors.confluence.confluence_utils import get_used_attachments -from danswer.connectors.confluence.rate_limit_handler import ( - make_confluence_call_handle_rate_limit, -) +from danswer.connectors.confluence.onyx_confluence import OnyxConfluence +from danswer.connectors.confluence.utils import attachment_to_content +from danswer.connectors.confluence.utils import build_confluence_client +from danswer.connectors.confluence.utils import build_confluence_document_id +from danswer.connectors.confluence.utils import datetime_from_string +from danswer.connectors.confluence.utils import extract_text_from_confluence_html from danswer.connectors.interfaces import GenerateDocumentsOutput +from danswer.connectors.interfaces import GenerateSlimDocumentOutput from danswer.connectors.interfaces import LoadConnector from danswer.connectors.interfaces import PollConnector from danswer.connectors.interfaces import SecondsSinceUnixEpoch +from danswer.connectors.interfaces import SlimConnector from danswer.connectors.models import BasicExpertInfo from danswer.connectors.models import ConnectorMissingCredentialError from danswer.connectors.models import Document from danswer.connectors.models import Section -from danswer.file_processing.extract_file_text import extract_file_text -from danswer.file_processing.html_utils import format_document_soup +from danswer.connectors.models import SlimDocument from danswer.utils.logger import setup_logger logger = setup_logger() @@ -47,248 +32,35 @@ # 1. Include attachments, etc # 2. Segment into Sections for more accurate linking, can split by headers but make sure no text/ordering is lost - -NO_PERMISSIONS_TO_VIEW_ATTACHMENTS_ERROR_STR = ( - "User not permitted to view attachments on content" -) -NO_PARENT_OR_NO_PERMISSIONS_ERROR_STR = ( - "No parent or not permitted to view content with id" -) - - -@lru_cache() -def _get_user(user_id: str, confluence_client: Confluence) -> str: - """Get Confluence Display Name based on the account-id or userkey value - - Args: - user_id (str): The user id (i.e: the account-id or userkey) - confluence_client (Confluence): The Confluence Client - - Returns: - str: The User Display Name. 'Unknown User' if the user is deactivated or not found - """ - user_not_found = "Unknown User" - - get_user_details_by_accountid = make_confluence_call_handle_rate_limit( - confluence_client.get_user_details_by_accountid - ) - try: - return get_user_details_by_accountid(user_id).get("displayName", user_not_found) - except Exception as e: - logger.warning( - f"Unable to get the User Display Name with the id: '{user_id}' - {e}" - ) - return user_not_found - - -def parse_html_page(text: str, confluence_client: Confluence) -> str: - """Parse a Confluence html page and replace the 'user Id' by the real - User Display Name - - Args: - text (str): The page content - confluence_client (Confluence): Confluence client - - Returns: - str: loaded and formated Confluence page - """ - soup = bs4.BeautifulSoup(text, "html.parser") - for user in soup.findAll("ri:user"): - user_id = ( - user.attrs["ri:account-id"] - if "ri:account-id" in user.attrs - else user.get("ri:userkey") - ) - if not user_id: - logger.warning( - "ri:userkey not found in ri:user element. " f"Found attrs: {user.attrs}" - ) - continue - # Include @ sign for tagging, more clear for LLM - user.replaceWith("@" + _get_user(user_id, confluence_client)) - return format_document_soup(soup) - - -def _comment_dfs( - comments_str: str, - comment_pages: Collection[dict[str, Any]], - confluence_client: Confluence, -) -> str: - get_page_child_by_type = make_confluence_call_handle_rate_limit( - confluence_client.get_page_child_by_type - ) - - for comment_page in comment_pages: - comment_html = comment_page["body"]["storage"]["value"] - comments_str += "\nComment:\n" + parse_html_page( - comment_html, confluence_client - ) - try: - child_comment_pages = get_page_child_by_type( - comment_page["id"], - type="comment", - start=None, - limit=None, - expand="body.storage.value", - ) - comments_str = _comment_dfs( - comments_str, child_comment_pages, confluence_client - ) - except HTTPError as e: - # not the cleanest, but I'm not aware of a nicer way to check the error - if NO_PARENT_OR_NO_PERMISSIONS_ERROR_STR not in str(e): - raise - - return comments_str - - -def _datetime_from_string(datetime_string: str) -> datetime: - datetime_object = datetime.fromisoformat(datetime_string) - - if datetime_object.tzinfo is None: - # If no timezone info, assume it is UTC - datetime_object = datetime_object.replace(tzinfo=timezone.utc) - else: - # If not in UTC, translate it - datetime_object = datetime_object.astimezone(timezone.utc) - - return datetime_object - - -class RecursiveIndexer: - def __init__( - self, - batch_size: int, - confluence_client: Confluence, - index_recursively: bool, - origin_page_id: str, - ) -> None: - self.batch_size = 1 - # batch_size - self.confluence_client = confluence_client - self.index_recursively = index_recursively - self.origin_page_id = origin_page_id - self.pages = self.recurse_children_pages(0, self.origin_page_id) - - def get_origin_page(self) -> list[dict[str, Any]]: - return [self._fetch_origin_page()] - - def get_pages(self, ind: int, size: int) -> list[dict]: - if ind * size > len(self.pages): - return [] - return self.pages[ind * size : (ind + 1) * size] - - def _fetch_origin_page( - self, - ) -> dict[str, Any]: - get_page_by_id = make_confluence_call_handle_rate_limit( - self.confluence_client.get_page_by_id - ) - try: - origin_page = get_page_by_id( - self.origin_page_id, expand="body.storage.value,version" - ) - return origin_page - except Exception as e: - logger.warning( - f"Appending orgin page with id {self.origin_page_id} failed: {e}" - ) - return {} - - def recurse_children_pages( - self, - start_ind: int, - page_id: str, - ) -> list[dict[str, Any]]: - pages: list[dict[str, Any]] = [] - current_level_pages: list[dict[str, Any]] = [] - next_level_pages: list[dict[str, Any]] = [] - - # Initial fetch of first level children - index = start_ind - while batch := self._fetch_single_depth_child_pages( - index, self.batch_size, page_id - ): - current_level_pages.extend(batch) - index += len(batch) - - pages.extend(current_level_pages) - - # Recursively index children and children's children, etc. - while current_level_pages: - for child in current_level_pages: - child_index = 0 - while child_batch := self._fetch_single_depth_child_pages( - child_index, self.batch_size, child["id"] - ): - next_level_pages.extend(child_batch) - child_index += len(child_batch) - - pages.extend(next_level_pages) - current_level_pages = next_level_pages - next_level_pages = [] - - try: - origin_page = self._fetch_origin_page() - pages.append(origin_page) - except Exception as e: - logger.warning(f"Appending origin page with id {page_id} failed: {e}") - - return pages - - def _fetch_single_depth_child_pages( - self, start_ind: int, batch_size: int, page_id: str - ) -> list[dict[str, Any]]: - child_pages: list[dict[str, Any]] = [] - - get_page_child_by_type = make_confluence_call_handle_rate_limit( - self.confluence_client.get_page_child_by_type - ) - - try: - child_page = get_page_child_by_type( - page_id, - type="page", - start=start_ind, - limit=batch_size, - expand="body.storage.value,version", - ) - - child_pages.extend(child_page) - return child_pages - - except Exception: - logger.warning( - f"Batch failed with page {page_id} at offset {start_ind} " - f"with size {batch_size}, processing pages individually..." - ) - - for i in range(batch_size): - ind = start_ind + i - try: - child_page = get_page_child_by_type( - page_id, - type="page", - start=ind, - limit=1, - expand="body.storage.value,version", - ) - child_pages.extend(child_page) - except Exception as e: - logger.warning(f"Page {page_id} at offset {ind} failed: {e}") - raise e - - return child_pages - - -class ConfluenceConnector(LoadConnector, PollConnector): +_COMMENT_EXPANSION_FIELDS = ["body.storage.value"] +_PAGE_EXPANSION_FIELDS = [ + "body.storage.value", + "version", + "space", + "metadata.labels", +] +_ATTACHMENT_EXPANSION_FIELDS = [ + "version", + "space", + "metadata.labels", +] + +_RESTRICTIONS_EXPANSION_FIELDS = [ + "space", + "restrictions.read.restrictions.user", + "restrictions.read.restrictions.group", +] + + +class ConfluenceConnector(LoadConnector, PollConnector, SlimConnector): def __init__( self, wiki_base: str, - space: str, is_cloud: bool, + space: str = "", page_id: str = "", index_recursively: bool = True, + cql_query: str | None = None, batch_size: int = INDEX_BATCH_SIZE, continue_on_failure: bool = CONTINUE_ON_CONNECTOR_FAILURE, # if a page has one of the labels specified in this list, we will just @@ -298,504 +70,233 @@ def __init__( ) -> None: self.batch_size = batch_size self.continue_on_failure = continue_on_failure - self.labels_to_skip = set(labels_to_skip) - self.recursive_indexer: RecursiveIndexer | None = None - self.index_recursively = index_recursively + self.confluence_client: OnyxConfluence | None = None + self.is_cloud = is_cloud # Remove trailing slash from wiki_base if present self.wiki_base = wiki_base.rstrip("/") - self.space = space - self.page_id = page_id - self.is_cloud = is_cloud + # if nothing is provided, we will fetch all pages + cql_page_query = "type=page" + if cql_query: + # if a cql_query is provided, we will use it to fetch the pages + cql_page_query = cql_query + elif space: + # if no cql_query is provided, we will use the space to fetch the pages + cql_page_query += f" and space='{quote(space)}'" + elif page_id: + if index_recursively: + cql_page_query += f" and ancestor='{page_id}'" + else: + # if neither a space nor a cql_query is provided, we will use the page_id to fetch the page + cql_page_query += f" and id='{page_id}'" - self.space_level_scan = False - self.confluence_client: Confluence | None = None + self.cql_page_query = cql_page_query + self.cql_time_filter = "" - if self.page_id is None or self.page_id == "": - self.space_level_scan = True - - logger.info( - f"wiki_base: {self.wiki_base}, space: {self.space}, page_id: {self.page_id}," - + f" space_level_scan: {self.space_level_scan}, index_recursively: {self.index_recursively}" - ) + self.cql_label_filter = "" + if labels_to_skip: + labels_to_skip = list(set(labels_to_skip)) + comma_separated_labels = ",".join(f"'{label}'" for label in labels_to_skip) + self.cql_label_filter = f" and label not in ({comma_separated_labels})" def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: - username = credentials["confluence_username"] - access_token = credentials["confluence_access_token"] - self.confluence_client = Confluence( - url=self.wiki_base, - # passing in username causes issues for Confluence data center - username=username if self.is_cloud else None, - password=access_token if self.is_cloud else None, - token=access_token if not self.is_cloud else None, + # see https://github.com/atlassian-api/atlassian-python-api/blob/master/atlassian/rest_client.py + # for a list of other hidden constructor args + self.confluence_client = build_confluence_client( + credentials_json=credentials, + is_cloud=self.is_cloud, + wiki_base=self.wiki_base, ) return None - def _fetch_pages( - self, - confluence_client: Confluence, - start_ind: int, - ) -> list[dict[str, Any]]: - def _fetch_space(start_ind: int, batch_size: int) -> list[dict[str, Any]]: - get_all_pages_from_space = make_confluence_call_handle_rate_limit( - confluence_client.get_all_pages_from_space - ) - try: - return get_all_pages_from_space( - self.space, - start=start_ind, - limit=batch_size, - status=( - None if CONFLUENCE_CONNECTOR_INDEX_ARCHIVED_PAGES else "current" - ), - expand="body.storage.value,version", - ) - except Exception: - logger.warning( - f"Batch failed with space {self.space} at offset {start_ind} " - f"with size {batch_size}, processing pages individually..." - ) + def _get_comment_string_for_page_id(self, page_id: str) -> str: + if self.confluence_client is None: + raise ConnectorMissingCredentialError("Confluence") - view_pages: list[dict[str, Any]] = [] - for i in range(self.batch_size): - try: - # Could be that one of the pages here failed due to this bug: - # https://jira.atlassian.com/browse/CONFCLOUD-76433 - view_pages.extend( - get_all_pages_from_space( - self.space, - start=start_ind + i, - limit=1, - status=( - None - if CONFLUENCE_CONNECTOR_INDEX_ARCHIVED_PAGES - else "current" - ), - expand="body.storage.value,version", - ) - ) - except HTTPError as e: - logger.warning( - f"Page failed with space {self.space} at offset {start_ind + i}, " - f"trying alternative expand option: {e}" - ) - # Use view instead, which captures most info but is less complete - view_pages.extend( - get_all_pages_from_space( - self.space, - start=start_ind + i, - limit=1, - expand="body.view.value,version", - ) - ) + comment_string = "" - return view_pages + comment_cql = f"type=comment and container='{page_id}'" + comment_cql += self.cql_label_filter - def _fetch_page(start_ind: int, batch_size: int) -> list[dict[str, Any]]: - if self.recursive_indexer is None: - self.recursive_indexer = RecursiveIndexer( - origin_page_id=self.page_id, - batch_size=self.batch_size, + expand = ",".join(_COMMENT_EXPANSION_FIELDS) + for comments in self.confluence_client.paginated_cql_page_retrieval( + cql=comment_cql, + expand=expand, + ): + for comment in comments: + comment_string += "\nComment:\n" + comment_string += extract_text_from_confluence_html( confluence_client=self.confluence_client, - index_recursively=self.index_recursively, - ) - - if self.index_recursively: - return self.recursive_indexer.get_pages(start_ind, batch_size) - else: - return self.recursive_indexer.get_origin_page() - - pages: list[dict[str, Any]] = [] - - try: - pages = ( - _fetch_space(start_ind, self.batch_size) - if self.space_level_scan - else _fetch_page(start_ind, self.batch_size) - ) - return pages - - except Exception as e: - if not self.continue_on_failure: - raise e - - # error checking phase, only reachable if `self.continue_on_failure=True` - for i in range(self.batch_size): - try: - pages = ( - _fetch_space(start_ind, self.batch_size) - if self.space_level_scan - else _fetch_page(start_ind, self.batch_size) - ) - return pages - - except Exception: - logger.exception( - "Ran into exception when fetching pages from Confluence" + confluence_object=comment, ) - return pages + return comment_string - def _fetch_comments(self, confluence_client: Confluence, page_id: str) -> str: - get_page_child_by_type = make_confluence_call_handle_rate_limit( - confluence_client.get_page_child_by_type - ) - - try: - comment_pages = cast( - Collection[dict[str, Any]], - get_page_child_by_type( - page_id, - type="comment", - start=None, - limit=None, - expand="body.storage.value", - ), - ) - return _comment_dfs("", comment_pages, confluence_client) - except Exception as e: - if not self.continue_on_failure: - raise e - - logger.exception( - "Ran into exception when fetching comments from Confluence" - ) - return "" + def _convert_object_to_document( + self, confluence_object: dict[str, Any] + ) -> Document | None: + """ + Takes in a confluence object, extracts all metadata, and converts it into a document. + If its a page, it extracts the text, adds the comments for the document text. + If its an attachment, it just downloads the attachment and converts that into a document. + """ + if self.confluence_client is None: + raise ConnectorMissingCredentialError("Confluence") - def _fetch_labels(self, confluence_client: Confluence, page_id: str) -> list[str]: - get_page_labels = make_confluence_call_handle_rate_limit( - confluence_client.get_page_labels + # The url and the id are the same + object_url = build_confluence_document_id( + self.wiki_base, confluence_object["_links"]["webui"] ) - try: - labels_response = get_page_labels(page_id) - return [label["name"] for label in labels_response["results"]] - except Exception as e: - if not self.continue_on_failure: - raise e - - logger.exception("Ran into exception when fetching labels from Confluence") - return [] - - @classmethod - def _attachment_to_download_link( - cls, confluence_client: Confluence, attachment: dict[str, Any] - ) -> str: - return confluence_client.url + attachment["_links"]["download"] - - @classmethod - def _attachment_to_content( - cls, - confluence_client: Confluence, - attachment: dict[str, Any], - ) -> str | None: - """If it returns None, assume that we should skip this attachment.""" - if attachment["metadata"]["mediaType"] in [ - "image/jpeg", - "image/png", - "image/gif", - "image/svg+xml", - "video/mp4", - "video/quicktime", - ]: - return None - download_link = cls._attachment_to_download_link(confluence_client, attachment) - - attachment_size = attachment["extensions"]["fileSize"] - if attachment_size > CONFLUENCE_CONNECTOR_ATTACHMENT_SIZE_THRESHOLD: - logger.warning( - f"Skipping {download_link} due to size. " - f"size={attachment_size} " - f"threshold={CONFLUENCE_CONNECTOR_ATTACHMENT_SIZE_THRESHOLD}" + object_text = None + # Extract text from page + if confluence_object["type"] == "page": + object_text = extract_text_from_confluence_html( + self.confluence_client, confluence_object ) - return None - - response = confluence_client._session.get(download_link) - if response.status_code != 200: - logger.warning( - f"Failed to fetch {download_link} with invalid status code {response.status_code}" + # Add comments to text + object_text += self._get_comment_string_for_page_id(confluence_object["id"]) + elif confluence_object["type"] == "attachment": + object_text = attachment_to_content( + self.confluence_client, confluence_object ) - return None - extracted_text = extract_file_text( - attachment["title"], io.BytesIO(response.content), False - ) - if len(extracted_text) > CONFLUENCE_CONNECTOR_ATTACHMENT_CHAR_COUNT_THRESHOLD: - logger.warning( - f"Skipping {download_link} due to char count. " - f"char count={len(extracted_text)} " - f"threshold={CONFLUENCE_CONNECTOR_ATTACHMENT_CHAR_COUNT_THRESHOLD}" - ) + if object_text is None: return None - return extracted_text - - def _fetch_attachments( - self, confluence_client: Confluence, page_id: str, files_in_used: list[str] - ) -> tuple[str, list[dict[str, Any]]]: - unused_attachments: list = [] + # Get space name + doc_metadata: dict[str, str | list[str]] = { + "Wiki Space Name": confluence_object["space"]["name"] + } - get_attachments_from_content = make_confluence_call_handle_rate_limit( - confluence_client.get_attachments_from_content + # Get labels + label_dicts = confluence_object["metadata"]["labels"]["results"] + page_labels = [label["name"] for label in label_dicts] + if page_labels: + doc_metadata["labels"] = page_labels + + # Get last modified and author email + last_modified = datetime_from_string(confluence_object["version"]["when"]) + author_email = confluence_object["version"].get("by", {}).get("email") + + return Document( + id=object_url, + sections=[Section(link=object_url, text=object_text)], + source=DocumentSource.CONFLUENCE, + semantic_identifier=confluence_object["title"], + doc_updated_at=last_modified, + primary_owners=( + [BasicExpertInfo(email=author_email)] if author_email else None + ), + metadata=doc_metadata, ) - files_attachment_content: list = [] - - try: - expand = "history.lastUpdated,metadata.labels" - attachments_container = get_attachments_from_content( - page_id, start=0, limit=500, expand=expand - ) - for attachment in attachments_container["results"]: - if attachment["title"] not in files_in_used: - unused_attachments.append(attachment) - continue - - attachment_content = self._attachment_to_content( - confluence_client, attachment - ) - if attachment_content: - files_attachment_content.append(attachment_content) - - except Exception as e: - if isinstance( - e, HTTPError - ) and NO_PERMISSIONS_TO_VIEW_ATTACHMENTS_ERROR_STR in str(e): - logger.warning( - f"User does not have access to attachments on page '{page_id}'" - ) - return "", [] - - if not self.continue_on_failure: - raise e - logger.exception( - f"Ran into exception when fetching attachments from Confluence: {e}" - ) - - return "\n".join(files_attachment_content), unused_attachments - - def _get_doc_batch( - self, start_ind: int, time_filter: Callable[[datetime], bool] | None = None - ) -> tuple[list[Document], list[dict[str, Any]], int]: - doc_batch: list[Document] = [] - unused_attachments: list[dict[str, Any]] = [] + def _fetch_document_batches(self) -> GenerateDocumentsOutput: if self.confluence_client is None: raise ConnectorMissingCredentialError("Confluence") - batch = self._fetch_pages(self.confluence_client, start_ind) - - for page in batch: - last_modified = _datetime_from_string(page["version"]["when"]) - author = cast(str | None, page["version"].get("by", {}).get("email")) - - if time_filter and not time_filter(last_modified): - continue - - page_id = page["id"] - if self.labels_to_skip or not CONFLUENCE_CONNECTOR_SKIP_LABEL_INDEXING: - page_labels = self._fetch_labels(self.confluence_client, page_id) - - # check disallowed labels - if self.labels_to_skip: - label_intersection = self.labels_to_skip.intersection(page_labels) - if label_intersection: - logger.info( - f"Page with ID '{page_id}' has a label which has been " - f"designated as disallowed: {label_intersection}. Skipping." - ) + doc_batch: list[Document] = [] + confluence_page_ids: list[str] = [] + + page_query = self.cql_page_query + self.cql_label_filter + self.cql_time_filter + # Fetch pages as Documents + for page_batch in self.confluence_client.paginated_cql_page_retrieval( + cql=page_query, + expand=",".join(_PAGE_EXPANSION_FIELDS), + limit=self.batch_size, + ): + for page in page_batch: + confluence_page_ids.append(page["id"]) + doc = self._convert_object_to_document(page) + if doc is not None: + doc_batch.append(doc) + if len(doc_batch) >= self.batch_size: + yield doc_batch + doc_batch = [] + + # Fetch attachments as Documents + for confluence_page_id in confluence_page_ids: + attachment_cql = f"type=attachment and container='{confluence_page_id}'" + attachment_cql += self.cql_label_filter + # TODO: maybe should add time filter as well? + for attachments in self.confluence_client.paginated_cql_page_retrieval( + cql=attachment_cql, + expand=",".join(_ATTACHMENT_EXPANSION_FIELDS), + ): + for attachment in attachments: + doc = self._convert_object_to_document(attachment) + if doc is not None: + doc_batch.append(doc) + if len(doc_batch) >= self.batch_size: + yield doc_batch + doc_batch = [] + + if doc_batch: + yield doc_batch - continue + def load_from_state(self) -> GenerateDocumentsOutput: + return self._fetch_document_batches() - page_html = ( - page["body"].get("storage", page["body"].get("view", {})).get("value") - ) - # The url and the id are the same - page_url = build_confluence_document_id( - self.wiki_base, page["_links"]["webui"] - ) - if not page_html: - logger.debug("Page is empty, skipping: %s", page_url) - continue - page_text = parse_html_page(page_html, self.confluence_client) - - files_in_used = get_used_attachments(page_html) - attachment_text, unused_page_attachments = self._fetch_attachments( - self.confluence_client, page_id, files_in_used - ) - unused_attachments.extend(unused_page_attachments) - - page_text += attachment_text - comments_text = self._fetch_comments(self.confluence_client, page_id) - page_text += comments_text - doc_metadata: dict[str, str | list[str]] = {"Wiki Space Name": self.space} - if not CONFLUENCE_CONNECTOR_SKIP_LABEL_INDEXING and page_labels: - doc_metadata["labels"] = page_labels - - doc_batch.append( - Document( - id=page_url, - sections=[Section(link=page_url, text=page_text)], - source=DocumentSource.CONFLUENCE, - semantic_identifier=page["title"], - doc_updated_at=last_modified, - primary_owners=( - [BasicExpertInfo(email=author)] if author else None - ), - metadata=doc_metadata, - ) - ) - return ( - doc_batch, - unused_attachments, - len(batch), + def poll_source(self, start: float, end: float) -> GenerateDocumentsOutput: + # Add time filters + formatted_start_time = datetime.fromtimestamp(start, tz=timezone.utc).strftime( + "%Y-%m-%d %H:%M" + ) + formatted_end_time = datetime.fromtimestamp(end, tz=timezone.utc).strftime( + "%Y-%m-%d %H:%M" ) + self.cql_time_filter = f" and lastmodified >= '{formatted_start_time}'" + self.cql_time_filter += f" and lastmodified <= '{formatted_end_time}'" + return self._fetch_document_batches() - def _get_attachment_batch( + def retrieve_all_slim_documents( self, - start_ind: int, - attachments: list[dict[str, Any]], - time_filter: Callable[[datetime], bool] | None = None, - ) -> tuple[list[Document], int]: - doc_batch: list[Document] = [] - + start: SecondsSinceUnixEpoch | None = None, + end: SecondsSinceUnixEpoch | None = None, + ) -> GenerateSlimDocumentOutput: if self.confluence_client is None: raise ConnectorMissingCredentialError("Confluence") - end_ind = min(start_ind + self.batch_size, len(attachments)) - - for attachment in attachments[start_ind:end_ind]: - last_updated = _datetime_from_string( - attachment["history"]["lastUpdated"]["when"] - ) + doc_metadata_list: list[SlimDocument] = [] - if time_filter and not time_filter(last_updated): - continue + restrictions_expand = ",".join(_RESTRICTIONS_EXPANSION_FIELDS) - # The url and the id are the same - attachment_url = build_confluence_document_id( - self.wiki_base, attachment["_links"]["download"] - ) - attachment_content = self._attachment_to_content( - self.confluence_client, attachment - ) - if attachment_content is None: - continue - - creator_email = attachment["history"]["createdBy"].get("email") - - comment = attachment["metadata"].get("comment", "") - doc_metadata: dict[str, str | list[str]] = {"comment": comment} - - attachment_labels: list[str] = [] - if not CONFLUENCE_CONNECTOR_SKIP_LABEL_INDEXING: - for label in attachment["metadata"]["labels"]["results"]: - attachment_labels.append(label["name"]) - - doc_metadata["labels"] = attachment_labels - - doc_batch.append( - Document( - id=attachment_url, - sections=[Section(link=attachment_url, text=attachment_content)], - source=DocumentSource.CONFLUENCE, - semantic_identifier=attachment["title"], - doc_updated_at=last_updated, - primary_owners=( - [BasicExpertInfo(email=creator_email)] - if creator_email - else None - ), - metadata=doc_metadata, + page_query = self.cql_page_query + self.cql_label_filter + for pages in self.confluence_client.cql_paginate_all_expansions( + cql=page_query, + expand=restrictions_expand, + ): + for page in pages: + # If the page has restrictions, add them to the perm_sync_data + # These will be used by doc_sync.py to sync permissions + perm_sync_data = { + "restrictions": page.get("restrictions", {}), + "space_key": page.get("space", {}).get("key"), + } + + doc_metadata_list.append( + SlimDocument( + id=build_confluence_document_id( + self.wiki_base, page["_links"]["webui"] + ), + perm_sync_data=perm_sync_data, + ) ) - ) - - return doc_batch, end_ind - start_ind - - def load_from_state(self) -> GenerateDocumentsOutput: - unused_attachments = [] - - if self.confluence_client is None: - raise ConnectorMissingCredentialError("Confluence") - - start_ind = 0 - while True: - doc_batch, unused_attachments_batch, num_pages = self._get_doc_batch( - start_ind - ) - unused_attachments.extend(unused_attachments_batch) - start_ind += num_pages - if doc_batch: - yield doc_batch - - if num_pages < self.batch_size: - break - - start_ind = 0 - while True: - attachment_batch, num_attachments = self._get_attachment_batch( - start_ind, unused_attachments - ) - start_ind += num_attachments - if attachment_batch: - yield attachment_batch - - if num_attachments < self.batch_size: - break - - def poll_source( - self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch - ) -> GenerateDocumentsOutput: - unused_attachments = [] - - if self.confluence_client is None: - raise ConnectorMissingCredentialError("Confluence") - - start_time = datetime.fromtimestamp(start, tz=timezone.utc) - end_time = datetime.fromtimestamp(end, tz=timezone.utc) - - start_ind = 0 - while True: - doc_batch, unused_attachments_batch, num_pages = self._get_doc_batch( - start_ind, time_filter=lambda t: start_time <= t <= end_time - ) - unused_attachments.extend(unused_attachments_batch) - - start_ind += num_pages - if doc_batch: - yield doc_batch - - if num_pages < self.batch_size: - break - - start_ind = 0 - while True: - attachment_batch, num_attachments = self._get_attachment_batch( - start_ind, - unused_attachments, - time_filter=lambda t: start_time <= t <= end_time, - ) - start_ind += num_attachments - if attachment_batch: - yield attachment_batch - - if num_attachments < self.batch_size: - break - - -if __name__ == "__main__": - connector = ConfluenceConnector( - wiki_base=os.environ["CONFLUENCE_TEST_SPACE_URL"], - space=os.environ["CONFLUENCE_TEST_SPACE"], - is_cloud=os.environ.get("CONFLUENCE_IS_CLOUD", "true").lower() == "true", - page_id=os.environ.get("CONFLUENCE_TEST_PAGE_ID", ""), - index_recursively=True, - ) - connector.load_credentials( - { - "confluence_username": os.environ["CONFLUENCE_USER_NAME"], - "confluence_access_token": os.environ["CONFLUENCE_ACCESS_TOKEN"], - } - ) - document_batches = connector.load_from_state() - print(next(document_batches)) + attachment_cql = f"type=attachment and container='{page['id']}'" + attachment_cql += self.cql_label_filter + for attachments in self.confluence_client.cql_paginate_all_expansions( + cql=attachment_cql, + expand=restrictions_expand, + ): + for attachment in attachments: + doc_metadata_list.append( + SlimDocument( + id=build_confluence_document_id( + self.wiki_base, attachment["_links"]["webui"] + ), + perm_sync_data=perm_sync_data, + ) + ) + yield doc_metadata_list + doc_metadata_list = [] diff --git a/backend/danswer/connectors/confluence/onyx_confluence.py b/backend/danswer/connectors/confluence/onyx_confluence.py new file mode 100644 index 00000000000..c01f45dea6a --- /dev/null +++ b/backend/danswer/connectors/confluence/onyx_confluence.py @@ -0,0 +1,226 @@ +import math +import time +from collections.abc import Callable +from collections.abc import Iterator +from typing import Any +from typing import cast +from typing import TypeVar +from urllib.parse import quote + +from atlassian import Confluence # type:ignore +from requests import HTTPError + +from danswer.utils.logger import setup_logger + +logger = setup_logger() + + +F = TypeVar("F", bound=Callable[..., Any]) + + +RATE_LIMIT_MESSAGE_LOWERCASE = "Rate limit exceeded".lower() + + +class ConfluenceRateLimitError(Exception): + pass + + +def _handle_http_error(e: HTTPError, attempt: int) -> int: + MIN_DELAY = 2 + MAX_DELAY = 60 + STARTING_DELAY = 5 + BACKOFF = 2 + + # Check if the response or headers are None to avoid potential AttributeError + if e.response is None or e.response.headers is None: + logger.warning("HTTPError with `None` as response or as headers") + raise e + + if ( + e.response.status_code != 429 + and RATE_LIMIT_MESSAGE_LOWERCASE not in e.response.text.lower() + ): + raise e + + retry_after = None + + retry_after_header = e.response.headers.get("Retry-After") + if retry_after_header is not None: + try: + retry_after = int(retry_after_header) + if retry_after > MAX_DELAY: + logger.warning( + f"Clamping retry_after from {retry_after} to {MAX_DELAY} seconds..." + ) + retry_after = MAX_DELAY + if retry_after < MIN_DELAY: + retry_after = MIN_DELAY + except ValueError: + pass + + if retry_after is not None: + logger.warning( + f"Rate limiting with retry header. Retrying after {retry_after} seconds..." + ) + delay = retry_after + else: + logger.warning( + "Rate limiting without retry header. Retrying with exponential backoff..." + ) + delay = min(STARTING_DELAY * (BACKOFF**attempt), MAX_DELAY) + + delay_until = math.ceil(time.monotonic() + delay) + return delay_until + + +# https://developer.atlassian.com/cloud/confluence/rate-limiting/ +# this uses the native rate limiting option provided by the +# confluence client and otherwise applies a simpler set of error handling +def handle_confluence_rate_limit(confluence_call: F) -> F: + def wrapped_call(*args: list[Any], **kwargs: Any) -> Any: + MAX_RETRIES = 5 + + TIMEOUT = 3600 + timeout_at = time.monotonic() + TIMEOUT + + for attempt in range(MAX_RETRIES): + if time.monotonic() > timeout_at: + raise TimeoutError( + f"Confluence call attempts took longer than {TIMEOUT} seconds." + ) + + try: + # we're relying more on the client to rate limit itself + # and applying our own retries in a more specific set of circumstances + return confluence_call(*args, **kwargs) + except HTTPError as e: + delay_until = _handle_http_error(e, attempt) + while time.monotonic() < delay_until: + # in the future, check a signal here to exit + time.sleep(1) + except AttributeError as e: + # Some error within the Confluence library, unclear why it fails. + # Users reported it to be intermittent, so just retry + if attempt == MAX_RETRIES - 1: + raise e + + logger.exception( + "Confluence Client raised an AttributeError. Retrying..." + ) + time.sleep(5) + + return cast(F, wrapped_call) + + +_DEFAULT_PAGINATION_LIMIT = 100 + + +class OnyxConfluence(Confluence): + """ + This is a custom Confluence class that overrides the default Confluence class to add a custom CQL method. + This is necessary because the default Confluence class does not properly support cql expansions. + All methods are automatically wrapped with handle_confluence_rate_limit. + """ + + def __init__(self, url: str, *args: Any, **kwargs: Any) -> None: + super(OnyxConfluence, self).__init__(url, *args, **kwargs) + self._wrap_methods() + + def _wrap_methods(self) -> None: + """ + For each attribute that is callable (i.e., a method) and doesn't start with an underscore, + wrap it with handle_confluence_rate_limit. + """ + for attr_name in dir(self): + if callable(getattr(self, attr_name)) and not attr_name.startswith("_"): + setattr( + self, + attr_name, + handle_confluence_rate_limit(getattr(self, attr_name)), + ) + + def _paginate_url( + self, url_suffix: str, limit: int | None = None + ) -> Iterator[list[dict[str, Any]]]: + """ + This will paginate through the top level query. + """ + if not limit: + limit = _DEFAULT_PAGINATION_LIMIT + + connection_char = "&" if "?" in url_suffix else "?" + url_suffix += f"{connection_char}limit={limit}" + + while url_suffix: + try: + next_response = self.get(url_suffix) + except Exception as e: + logger.exception("Error in danswer_cql: \n") + raise e + yield next_response.get("results", []) + url_suffix = next_response.get("_links", {}).get("next") + + def paginated_groups_retrieval( + self, + limit: int | None = None, + ) -> Iterator[list[dict[str, Any]]]: + return self._paginate_url("rest/api/group", limit) + + def paginated_group_members_retrieval( + self, + group_name: str, + limit: int | None = None, + ) -> Iterator[list[dict[str, Any]]]: + group_name = quote(group_name) + return self._paginate_url(f"rest/api/group/{group_name}/member", limit) + + def paginated_cql_user_retrieval( + self, + cql: str, + expand: str | None = None, + limit: int | None = None, + ) -> Iterator[list[dict[str, Any]]]: + expand_string = f"&expand={expand}" if expand else "" + return self._paginate_url( + f"rest/api/search/user?cql={cql}{expand_string}", limit + ) + + def paginated_cql_page_retrieval( + self, + cql: str, + expand: str | None = None, + limit: int | None = None, + ) -> Iterator[list[dict[str, Any]]]: + expand_string = f"&expand={expand}" if expand else "" + return self._paginate_url( + f"rest/api/content/search?cql={cql}{expand_string}", limit + ) + + def cql_paginate_all_expansions( + self, + cql: str, + expand: str | None = None, + limit: int | None = None, + ) -> Iterator[list[dict[str, Any]]]: + """ + This function will paginate through the top level query first, then + paginate through all of the expansions. + The limit only applies to the top level query. + All expansion paginations use default pagination limit (defined by Atlassian). + """ + + def _traverse_and_update(data: dict | list) -> None: + if isinstance(data, dict): + next_url = data.get("_links", {}).get("next") + if next_url and "results" in data: + data["results"].extend(self._paginate_url(next_url)) + + for value in data.values(): + _traverse_and_update(value) + elif isinstance(data, list): + for item in data: + _traverse_and_update(item) + + for results in self.paginated_cql_page_retrieval(cql, expand, limit): + _traverse_and_update(results) + yield results diff --git a/backend/danswer/connectors/confluence/rate_limit_handler.py b/backend/danswer/connectors/confluence/rate_limit_handler.py deleted file mode 100644 index ea0e46800ff..00000000000 --- a/backend/danswer/connectors/confluence/rate_limit_handler.py +++ /dev/null @@ -1,82 +0,0 @@ -import time -from collections.abc import Callable -from typing import Any -from typing import cast -from typing import TypeVar - -from requests import HTTPError - -from danswer.utils.logger import setup_logger - -logger = setup_logger() - - -F = TypeVar("F", bound=Callable[..., Any]) - - -RATE_LIMIT_MESSAGE_LOWERCASE = "Rate limit exceeded".lower() - - -class ConfluenceRateLimitError(Exception): - pass - - -def make_confluence_call_handle_rate_limit(confluence_call: F) -> F: - def wrapped_call(*args: list[Any], **kwargs: Any) -> Any: - max_retries = 5 - starting_delay = 5 - backoff = 2 - max_delay = 600 - - for attempt in range(max_retries): - try: - return confluence_call(*args, **kwargs) - except HTTPError as e: - # Check if the response or headers are None to avoid potential AttributeError - if e.response is None or e.response.headers is None: - logger.warning("HTTPError with `None` as response or as headers") - raise e - - retry_after_header = e.response.headers.get("Retry-After") - if ( - e.response.status_code == 429 - or RATE_LIMIT_MESSAGE_LOWERCASE in e.response.text.lower() - ): - retry_after = None - if retry_after_header is not None: - try: - retry_after = int(retry_after_header) - except ValueError: - pass - - if retry_after is not None: - if retry_after > 600: - logger.warning( - f"Clamping retry_after from {retry_after} to {max_delay} seconds..." - ) - retry_after = max_delay - - logger.warning( - f"Rate limit hit. Retrying after {retry_after} seconds..." - ) - time.sleep(retry_after) - else: - logger.warning( - "Rate limit hit. Retrying with exponential backoff..." - ) - delay = min(starting_delay * (backoff**attempt), max_delay) - time.sleep(delay) - else: - # re-raise, let caller handle - raise - except AttributeError as e: - # Some error within the Confluence library, unclear why it fails. - # Users reported it to be intermittent, so just retry - logger.warning(f"Confluence Internal Error, retrying... {e}") - delay = min(starting_delay * (backoff**attempt), max_delay) - time.sleep(delay) - - if attempt == max_retries - 1: - raise e - - return cast(F, wrapped_call) diff --git a/backend/danswer/connectors/confluence/utils.py b/backend/danswer/connectors/confluence/utils.py new file mode 100644 index 00000000000..beb0465be60 --- /dev/null +++ b/backend/danswer/connectors/confluence/utils.py @@ -0,0 +1,214 @@ +import io +from datetime import datetime +from datetime import timezone +from typing import Any + +import bs4 + +from danswer.configs.app_configs import ( + CONFLUENCE_CONNECTOR_ATTACHMENT_CHAR_COUNT_THRESHOLD, +) +from danswer.configs.app_configs import CONFLUENCE_CONNECTOR_ATTACHMENT_SIZE_THRESHOLD +from danswer.connectors.confluence.onyx_confluence import ( + OnyxConfluence, +) +from danswer.file_processing.extract_file_text import extract_file_text +from danswer.file_processing.html_utils import format_document_soup +from danswer.utils.logger import setup_logger + +logger = setup_logger() + + +_USER_EMAIL_CACHE: dict[str, str | None] = {} + + +def get_user_email_from_username__server( + confluence_client: OnyxConfluence, user_name: str +) -> str | None: + global _USER_EMAIL_CACHE + if _USER_EMAIL_CACHE.get(user_name) is None: + try: + response = confluence_client.get_mobile_parameters(user_name) + email = response.get("email") + except Exception: + email = None + _USER_EMAIL_CACHE[user_name] = email + return _USER_EMAIL_CACHE[user_name] + + +_USER_NOT_FOUND = "Unknown Confluence User" +_USER_ID_TO_DISPLAY_NAME_CACHE: dict[str, str | None] = {} + + +def _get_user(confluence_client: OnyxConfluence, user_id: str) -> str: + """Get Confluence Display Name based on the account-id or userkey value + + Args: + user_id (str): The user id (i.e: the account-id or userkey) + confluence_client (Confluence): The Confluence Client + + Returns: + str: The User Display Name. 'Unknown User' if the user is deactivated or not found + """ + global _USER_ID_TO_DISPLAY_NAME_CACHE + if _USER_ID_TO_DISPLAY_NAME_CACHE.get(user_id) is None: + try: + result = confluence_client.get_user_details_by_userkey(user_id) + found_display_name = result.get("displayName") + except Exception: + found_display_name = None + + if not found_display_name: + try: + result = confluence_client.get_user_details_by_accountid(user_id) + found_display_name = result.get("displayName") + except Exception: + found_display_name = None + + _USER_ID_TO_DISPLAY_NAME_CACHE[user_id] = found_display_name + + return _USER_ID_TO_DISPLAY_NAME_CACHE.get(user_id) or _USER_NOT_FOUND + + +def extract_text_from_confluence_html( + confluence_client: OnyxConfluence, confluence_object: dict[str, Any] +) -> str: + """Parse a Confluence html page and replace the 'user Id' by the real + User Display Name + + Args: + confluence_object (dict): The confluence object as a dict + confluence_client (Confluence): Confluence client + + Returns: + str: loaded and formated Confluence page + """ + body = confluence_object["body"] + object_html = body.get("storage", body.get("view", {})).get("value") + + soup = bs4.BeautifulSoup(object_html, "html.parser") + for user in soup.findAll("ri:user"): + user_id = ( + user.attrs["ri:account-id"] + if "ri:account-id" in user.attrs + else user.get("ri:userkey") + ) + if not user_id: + logger.warning( + "ri:userkey not found in ri:user element. " f"Found attrs: {user.attrs}" + ) + continue + # Include @ sign for tagging, more clear for LLM + user.replaceWith("@" + _get_user(confluence_client, user_id)) + return format_document_soup(soup) + + +def attachment_to_content( + confluence_client: OnyxConfluence, + attachment: dict[str, Any], +) -> str | None: + """If it returns None, assume that we should skip this attachment.""" + if attachment["metadata"]["mediaType"] in [ + "image/jpeg", + "image/png", + "image/gif", + "image/svg+xml", + "video/mp4", + "video/quicktime", + ]: + return None + + download_link = confluence_client.url + attachment["_links"]["download"] + + attachment_size = attachment["extensions"]["fileSize"] + if attachment_size > CONFLUENCE_CONNECTOR_ATTACHMENT_SIZE_THRESHOLD: + logger.warning( + f"Skipping {download_link} due to size. " + f"size={attachment_size} " + f"threshold={CONFLUENCE_CONNECTOR_ATTACHMENT_SIZE_THRESHOLD}" + ) + return None + + logger.info(f"_attachment_to_content - _session.get: link={download_link}") + response = confluence_client._session.get(download_link) + if response.status_code != 200: + logger.warning( + f"Failed to fetch {download_link} with invalid status code {response.status_code}" + ) + return None + + extracted_text = extract_file_text( + io.BytesIO(response.content), + file_name=attachment["title"], + break_on_unprocessable=False, + ) + if len(extracted_text) > CONFLUENCE_CONNECTOR_ATTACHMENT_CHAR_COUNT_THRESHOLD: + logger.warning( + f"Skipping {download_link} due to char count. " + f"char count={len(extracted_text)} " + f"threshold={CONFLUENCE_CONNECTOR_ATTACHMENT_CHAR_COUNT_THRESHOLD}" + ) + return None + + return extracted_text + + +def build_confluence_document_id(base_url: str, content_url: str) -> str: + """For confluence, the document id is the page url for a page based document + or the attachment download url for an attachment based document + + Args: + base_url (str): The base url of the Confluence instance + content_url (str): The url of the page or attachment download url + + Returns: + str: The document id + """ + return f"{base_url}{content_url}" + + +def extract_referenced_attachment_names(page_text: str) -> list[str]: + """Parse a Confluence html page to generate a list of current + attachments in use + + Args: + text (str): The page content + + Returns: + list[str]: List of filenames currently in use by the page text + """ + referenced_attachment_filenames = [] + soup = bs4.BeautifulSoup(page_text, "html.parser") + for attachment in soup.findAll("ri:attachment"): + referenced_attachment_filenames.append(attachment.attrs["ri:filename"]) + return referenced_attachment_filenames + + +def datetime_from_string(datetime_string: str) -> datetime: + datetime_object = datetime.fromisoformat(datetime_string) + + if datetime_object.tzinfo is None: + # If no timezone info, assume it is UTC + datetime_object = datetime_object.replace(tzinfo=timezone.utc) + else: + # If not in UTC, translate it + datetime_object = datetime_object.astimezone(timezone.utc) + + return datetime_object + + +def build_confluence_client( + credentials_json: dict[str, Any], is_cloud: bool, wiki_base: str +) -> OnyxConfluence: + return OnyxConfluence( + api_version="cloud" if is_cloud else "latest", + # Remove trailing slash from wiki_base if present + url=wiki_base.rstrip("/"), + # passing in username causes issues for Confluence data center + username=credentials_json["confluence_username"] if is_cloud else None, + password=credentials_json["confluence_access_token"] if is_cloud else None, + token=credentials_json["confluence_access_token"] if not is_cloud else None, + backoff_and_retry=True, + max_backoff_retries=60, + max_backoff_seconds=60, + ) diff --git a/backend/danswer/connectors/cross_connector_utils/miscellaneous_utils.py b/backend/danswer/connectors/cross_connector_utils/miscellaneous_utils.py index 897503dca99..8e8ea8d7d65 100644 --- a/backend/danswer/connectors/cross_connector_utils/miscellaneous_utils.py +++ b/backend/danswer/connectors/cross_connector_utils/miscellaneous_utils.py @@ -11,6 +11,10 @@ from danswer.utils.text_processing import is_valid_email +T = TypeVar("T") +U = TypeVar("U") + + def datetime_to_utc(dt: datetime) -> datetime: if dt.tzinfo is None or dt.tzinfo.utcoffset(dt) is None: dt = dt.replace(tzinfo=timezone.utc) @@ -19,7 +23,16 @@ def datetime_to_utc(dt: datetime) -> datetime: def time_str_to_utc(datetime_str: str) -> datetime: - dt = parse(datetime_str) + try: + dt = parse(datetime_str) + except ValueError: + # Handle malformed timezone by attempting to fix common format issues + if "0000" in datetime_str: + # Convert "0000" to "+0000" for proper timezone parsing + fixed_dt_str = datetime_str.replace(" 0000", " +0000") + dt = parse(fixed_dt_str) + else: + raise return datetime_to_utc(dt) @@ -49,10 +62,6 @@ def get_experts_stores_representations( return [owner for owner in reps if owner is not None] -T = TypeVar("T") -U = TypeVar("U") - - def process_in_batches( objects: list[T], process_function: Callable[[T], U], batch_size: int ) -> Iterator[list[U]]: diff --git a/backend/danswer/connectors/discourse/connector.py b/backend/danswer/connectors/discourse/connector.py index d74aad0f276..d1b6395a189 100644 --- a/backend/danswer/connectors/discourse/connector.py +++ b/backend/danswer/connectors/discourse/connector.py @@ -14,7 +14,6 @@ from danswer.connectors.cross_connector_utils.rate_limit_wrapper import ( rate_limit_builder, ) -from danswer.connectors.cross_connector_utils.retry_wrapper import retry_builder from danswer.connectors.interfaces import GenerateDocumentsOutput from danswer.connectors.interfaces import PollConnector from danswer.connectors.interfaces import SecondsSinceUnixEpoch @@ -24,6 +23,7 @@ from danswer.connectors.models import Section from danswer.file_processing.html_utils import parse_html_page_basic from danswer.utils.logger import setup_logger +from danswer.utils.retry_wrapper import retry_builder logger = setup_logger() diff --git a/backend/danswer/connectors/document360/connector.py b/backend/danswer/connectors/document360/connector.py index 6a9f4ba6a56..7ccf3c92e62 100644 --- a/backend/danswer/connectors/document360/connector.py +++ b/backend/danswer/connectors/document360/connector.py @@ -11,7 +11,6 @@ from danswer.connectors.cross_connector_utils.rate_limit_wrapper import ( rate_limit_builder, ) -from danswer.connectors.cross_connector_utils.retry_wrapper import retry_builder from danswer.connectors.document360.utils import flatten_child_categories from danswer.connectors.interfaces import GenerateDocumentsOutput from danswer.connectors.interfaces import LoadConnector @@ -22,6 +21,7 @@ from danswer.connectors.models import Document from danswer.connectors.models import Section from danswer.file_processing.html_utils import parse_html_page_basic +from danswer.utils.retry_wrapper import retry_builder # Limitations and Potential Improvements # 1. The "Categories themselves contain potentially relevant information" but they're not pulled in diff --git a/backend/danswer/connectors/dropbox/connector.py b/backend/danswer/connectors/dropbox/connector.py index b36f0fbd122..7d2eb0166c7 100644 --- a/backend/danswer/connectors/dropbox/connector.py +++ b/backend/danswer/connectors/dropbox/connector.py @@ -97,8 +97,8 @@ def _yield_files_recursive( link = self._get_shared_link(entry.path_display) try: text = extract_file_text( - entry.name, BytesIO(downloaded_file), + file_name=entry.name, break_on_unprocessable=False, ) batch.append( diff --git a/backend/danswer/connectors/factory.py b/backend/danswer/connectors/factory.py index 75e0d9bb238..bc9196eec59 100644 --- a/backend/danswer/connectors/factory.py +++ b/backend/danswer/connectors/factory.py @@ -4,6 +4,7 @@ from sqlalchemy.orm import Session from danswer.configs.constants import DocumentSource +from danswer.configs.constants import DocumentSourceRequiringTenantContext from danswer.connectors.asana.connector import AsanaConnector from danswer.connectors.axero.connector import AxeroConnector from danswer.connectors.blob.connector import BlobStorageConnector @@ -15,6 +16,7 @@ from danswer.connectors.document360.connector import Document360Connector from danswer.connectors.dropbox.connector import DropboxConnector from danswer.connectors.file.connector import LocalFileConnector +from danswer.connectors.freshdesk.connector import FreshdeskConnector from danswer.connectors.github.connector import GithubConnector from danswer.connectors.gitlab.connector import GitlabConnector from danswer.connectors.gmail.connector import GmailConnector @@ -33,7 +35,6 @@ from danswer.connectors.models import InputType from danswer.connectors.notion.connector import NotionConnector from danswer.connectors.productboard.connector import ProductboardConnector -from danswer.connectors.requesttracker.connector import RequestTrackerConnector from danswer.connectors.salesforce.connector import SalesforceConnector from danswer.connectors.sharepoint.connector import SharepointConnector from danswer.connectors.slab.connector import SlabConnector @@ -63,7 +64,7 @@ def identify_connector_class( DocumentSource.SLACK: { InputType.LOAD_STATE: SlackLoadConnector, InputType.POLL: SlackPollConnector, - InputType.PRUNE: SlackPollConnector, + InputType.SLIM_RETRIEVAL: SlackPollConnector, }, DocumentSource.GITHUB: GithubConnector, DocumentSource.GMAIL: GmailConnector, @@ -76,7 +77,6 @@ def identify_connector_class( DocumentSource.SLAB: SlabConnector, DocumentSource.NOTION: NotionConnector, DocumentSource.ZULIP: ZulipConnector, - DocumentSource.REQUESTTRACKER: RequestTrackerConnector, DocumentSource.GURU: GuruConnector, DocumentSource.LINEAR: LinearConnector, DocumentSource.HUBSPOT: HubSpotConnector, @@ -100,6 +100,7 @@ def identify_connector_class( DocumentSource.GOOGLE_CLOUD_STORAGE: BlobStorageConnector, DocumentSource.OCI_STORAGE: BlobStorageConnector, DocumentSource.XENFORO: XenforoConnector, + DocumentSource.FRESHDESK: FreshdeskConnector, } connector_by_source = connector_map.get(source, {}) @@ -134,8 +135,13 @@ def instantiate_connector( input_type: InputType, connector_specific_config: dict[str, Any], credential: Credential, + tenant_id: str | None = None, ) -> BaseConnector: connector_class = identify_connector_class(source, input_type) + + if source in DocumentSourceRequiringTenantContext: + connector_specific_config["tenant_id"] = tenant_id + connector = connector_class(**connector_specific_config) new_credentials = connector.load_credentials(credential.credential_json) diff --git a/backend/danswer/connectors/file/connector.py b/backend/danswer/connectors/file/connector.py index 83d0af2c12e..13744f02b2f 100644 --- a/backend/danswer/connectors/file/connector.py +++ b/backend/danswer/connectors/file/connector.py @@ -16,7 +16,7 @@ from danswer.connectors.models import BasicExpertInfo from danswer.connectors.models import Document from danswer.connectors.models import Section -from danswer.db.engine import get_sqlalchemy_engine +from danswer.db.engine import get_session_with_tenant from danswer.file_processing.extract_file_text import check_file_ext_is_valid from danswer.file_processing.extract_file_text import detect_encoding from danswer.file_processing.extract_file_text import extract_file_text @@ -27,6 +27,8 @@ from danswer.file_processing.extract_file_text import read_text_file from danswer.file_store.file_store import get_default_file_store from danswer.utils.logger import setup_logger +from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA +from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR logger = setup_logger() @@ -74,13 +76,14 @@ def _process_file( ) # Using the PDF reader function directly to pass in password cleanly - elif extension == ".pdf": + elif extension == ".pdf" and pdf_pass is not None: file_content_raw, file_metadata = read_pdf_file(file=file, pdf_pass=pdf_pass) else: file_content_raw = extract_file_text( - file_name=file_name, file=file, + file_name=file_name, + break_on_unprocessable=True, ) all_metadata = {**metadata, **file_metadata} if metadata else file_metadata @@ -158,10 +161,12 @@ class LocalFileConnector(LoadConnector): def __init__( self, file_locations: list[Path | str], + tenant_id: str = POSTGRES_DEFAULT_SCHEMA, batch_size: int = INDEX_BATCH_SIZE, ) -> None: self.file_locations = [Path(file_location) for file_location in file_locations] self.batch_size = batch_size + self.tenant_id = tenant_id self.pdf_pass: str | None = None def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: @@ -170,7 +175,9 @@ def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None def load_from_state(self) -> GenerateDocumentsOutput: documents: list[Document] = [] - with Session(get_sqlalchemy_engine()) as db_session: + token = CURRENT_TENANT_ID_CONTEXTVAR.set(self.tenant_id) + + with get_session_with_tenant(self.tenant_id) as db_session: for file_path in self.file_locations: current_datetime = datetime.now(timezone.utc) files = _read_files_and_metadata( @@ -192,6 +199,8 @@ def load_from_state(self) -> GenerateDocumentsOutput: if documents: yield documents + CURRENT_TENANT_ID_CONTEXTVAR.reset(token) + if __name__ == "__main__": connector = LocalFileConnector(file_locations=[os.environ["TEST_FILE"]]) diff --git a/backend/danswer/dynamic_configs/__init__.py b/backend/danswer/connectors/freshdesk/__init__,py similarity index 100% rename from backend/danswer/dynamic_configs/__init__.py rename to backend/danswer/connectors/freshdesk/__init__,py diff --git a/backend/danswer/connectors/freshdesk/connector.py b/backend/danswer/connectors/freshdesk/connector.py new file mode 100644 index 00000000000..db2a293e95f --- /dev/null +++ b/backend/danswer/connectors/freshdesk/connector.py @@ -0,0 +1,239 @@ +import json +from collections.abc import Iterator +from datetime import datetime +from datetime import timezone +from typing import List + +import requests + +from danswer.configs.app_configs import INDEX_BATCH_SIZE +from danswer.configs.constants import DocumentSource +from danswer.connectors.interfaces import GenerateDocumentsOutput +from danswer.connectors.interfaces import LoadConnector +from danswer.connectors.interfaces import PollConnector +from danswer.connectors.interfaces import SecondsSinceUnixEpoch +from danswer.connectors.models import ConnectorMissingCredentialError +from danswer.connectors.models import Document +from danswer.connectors.models import Section +from danswer.file_processing.html_utils import parse_html_page_basic +from danswer.utils.logger import setup_logger + +logger = setup_logger() + +_FRESHDESK_ID_PREFIX = "FRESHDESK_" + + +_TICKET_FIELDS_TO_INCLUDE = { + "fr_escalated", + "spam", + "priority", + "source", + "status", + "type", + "is_escalated", + "tags", + "nr_due_by", + "nr_escalated", + "cc_emails", + "fwd_emails", + "reply_cc_emails", + "ticket_cc_emails", + "support_email", + "to_emails", +} + +_SOURCE_NUMBER_TYPE_MAP: dict[int, str] = { + 1: "Email", + 2: "Portal", + 3: "Phone", + 7: "Chat", + 9: "Feedback Widget", + 10: "Outbound Email", +} + +_PRIORITY_NUMBER_TYPE_MAP: dict[int, str] = { + 1: "low", + 2: "medium", + 3: "high", + 4: "urgent", +} + +_STATUS_NUMBER_TYPE_MAP: dict[int, str] = { + 2: "open", + 3: "pending", + 4: "resolved", + 5: "closed", +} + + +def _create_metadata_from_ticket(ticket: dict) -> dict: + metadata: dict[str, str | list[str]] = {} + # Combine all emails into a list so there are no repeated emails + email_data: set[str] = set() + + for key, value in ticket.items(): + # Skip fields that aren't useful for embedding + if key not in _TICKET_FIELDS_TO_INCLUDE: + continue + + # Skip empty fields + if not value or value == "[]": + continue + + # Convert strings or lists to strings + stringified_value: str | list[str] + if isinstance(value, list): + stringified_value = [str(item) for item in value] + else: + stringified_value = str(value) + + if "email" in key: + if isinstance(stringified_value, list): + email_data.update(stringified_value) + else: + email_data.add(stringified_value) + else: + metadata[key] = stringified_value + + if email_data: + metadata["emails"] = list(email_data) + + # Convert source numbers to human-parsable string + if source_number := ticket.get("source"): + metadata["source"] = _SOURCE_NUMBER_TYPE_MAP.get( + source_number, "Unknown Source Type" + ) + + # Convert priority numbers to human-parsable string + if priority_number := ticket.get("priority"): + metadata["priority"] = _PRIORITY_NUMBER_TYPE_MAP.get( + priority_number, "Unknown Priority" + ) + + # Convert status to human-parsable string + if status_number := ticket.get("status"): + metadata["status"] = _STATUS_NUMBER_TYPE_MAP.get( + status_number, "Unknown Status" + ) + + due_by = datetime.fromisoformat(ticket["due_by"].replace("Z", "+00:00")) + metadata["overdue"] = str(datetime.now(timezone.utc) > due_by) + + return metadata + + +def _create_doc_from_ticket(ticket: dict, domain: str) -> Document: + # Use the ticket description as the text + text = f"Ticket description: {parse_html_page_basic(ticket.get('description_text', ''))}" + metadata = _create_metadata_from_ticket(ticket) + + # This is also used in the ID because it is more unique than the just the ticket ID + link = f"https://{domain}.freshdesk.com/helpdesk/tickets/{ticket['id']}" + + return Document( + id=_FRESHDESK_ID_PREFIX + link, + sections=[ + Section( + link=link, + text=text, + ) + ], + source=DocumentSource.FRESHDESK, + semantic_identifier=ticket["subject"], + metadata=metadata, + doc_updated_at=datetime.fromisoformat( + ticket["updated_at"].replace("Z", "+00:00") + ), + ) + + +class FreshdeskConnector(PollConnector, LoadConnector): + def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None: + self.batch_size = batch_size + + def load_credentials(self, credentials: dict[str, str | int]) -> None: + api_key = credentials.get("freshdesk_api_key") + domain = credentials.get("freshdesk_domain") + password = credentials.get("freshdesk_password") + + if not all(isinstance(cred, str) for cred in [domain, api_key, password]): + raise ConnectorMissingCredentialError( + "All Freshdesk credentials must be strings" + ) + + self.api_key = str(api_key) + self.domain = str(domain) + self.password = str(password) + + def _fetch_tickets( + self, start: datetime | None = None, end: datetime | None = None + ) -> Iterator[List[dict]]: + """ + 'end' is not currently used, so we may double fetch tickets created after the indexing + starts but before the actual call is made. + + To use 'end' would require us to use the search endpoint but it has limitations, + namely having to fetch all IDs and then individually fetch each ticket because there is no + 'include' field available for this endpoint: + https://developers.freshdesk.com/api/#filter_tickets + """ + if self.api_key is None or self.domain is None or self.password is None: + raise ConnectorMissingCredentialError("freshdesk") + + base_url = f"https://{self.domain}.freshdesk.com/api/v2/tickets" + params: dict[str, int | str] = { + "include": "description", + "per_page": 50, + "page": 1, + } + + if start: + params["updated_since"] = start.isoformat() + + while True: + response = requests.get( + base_url, auth=(self.api_key, self.password), params=params + ) + response.raise_for_status() + + if response.status_code == 204: + break + + tickets = json.loads(response.content) + logger.info( + f"Fetched {len(tickets)} tickets from Freshdesk API (Page {params['page']})" + ) + + yield tickets + + if len(tickets) < int(params["per_page"]): + break + + params["page"] = int(params["page"]) + 1 + + def _process_tickets( + self, start: datetime | None = None, end: datetime | None = None + ) -> GenerateDocumentsOutput: + doc_batch: List[Document] = [] + + for ticket_batch in self._fetch_tickets(start, end): + for ticket in ticket_batch: + doc_batch.append(_create_doc_from_ticket(ticket, self.domain)) + + if len(doc_batch) >= self.batch_size: + yield doc_batch + doc_batch = [] + + if doc_batch: + yield doc_batch + + def load_from_state(self) -> GenerateDocumentsOutput: + return self._process_tickets() + + def poll_source( + self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch + ) -> GenerateDocumentsOutput: + start_datetime = datetime.fromtimestamp(start, tz=timezone.utc) + end_datetime = datetime.fromtimestamp(end, tz=timezone.utc) + + yield from self._process_tickets(start_datetime, end_datetime) diff --git a/backend/danswer/connectors/gitlab/connector.py b/backend/danswer/connectors/gitlab/connector.py index f07baf3e141..39ec443e709 100644 --- a/backend/danswer/connectors/gitlab/connector.py +++ b/backend/danswer/connectors/gitlab/connector.py @@ -24,6 +24,9 @@ from danswer.connectors.models import Section from danswer.utils.logger import setup_logger + +logger = setup_logger() + # List of directories/Files to exclude exclude_patterns = [ "logs", @@ -31,7 +34,6 @@ ".gitlab/", ".pre-commit-config.yaml", ] -logger = setup_logger() def _batch_gitlab_objects( diff --git a/backend/danswer/connectors/gmail/connector.py b/backend/danswer/connectors/gmail/connector.py index 42d2f305f73..268ad871f7c 100644 --- a/backend/danswer/connectors/gmail/connector.py +++ b/backend/danswer/connectors/gmail/connector.py @@ -1,221 +1,360 @@ from base64 import urlsafe_b64decode from typing import Any -from typing import cast from typing import Dict from google.oauth2.credentials import Credentials as OAuthCredentials # type: ignore from google.oauth2.service_account import Credentials as ServiceAccountCredentials # type: ignore -from googleapiclient import discovery # type: ignore from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.constants import DocumentSource from danswer.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc -from danswer.connectors.gmail.connector_auth import ( - get_gmail_creds_for_authorized_user, -) -from danswer.connectors.gmail.connector_auth import ( - get_gmail_creds_for_service_account, -) -from danswer.connectors.gmail.constants import ( - DB_CREDENTIALS_DICT_DELEGATED_USER_KEY, -) -from danswer.connectors.gmail.constants import DB_CREDENTIALS_DICT_TOKEN_KEY -from danswer.connectors.gmail.constants import ( - GMAIL_DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY, +from danswer.connectors.google_utils.google_auth import get_google_creds +from danswer.connectors.google_utils.google_utils import execute_paginated_retrieval +from danswer.connectors.google_utils.resources import get_admin_service +from danswer.connectors.google_utils.resources import get_gmail_service +from danswer.connectors.google_utils.shared_constants import ( + DB_CREDENTIALS_PRIMARY_ADMIN_KEY, ) +from danswer.connectors.google_utils.shared_constants import MISSING_SCOPES_ERROR_STR +from danswer.connectors.google_utils.shared_constants import ONYX_SCOPE_INSTRUCTIONS +from danswer.connectors.google_utils.shared_constants import SLIM_BATCH_SIZE +from danswer.connectors.google_utils.shared_constants import USER_FIELDS from danswer.connectors.interfaces import GenerateDocumentsOutput +from danswer.connectors.interfaces import GenerateSlimDocumentOutput from danswer.connectors.interfaces import LoadConnector from danswer.connectors.interfaces import PollConnector from danswer.connectors.interfaces import SecondsSinceUnixEpoch +from danswer.connectors.interfaces import SlimConnector +from danswer.connectors.models import BasicExpertInfo from danswer.connectors.models import Document from danswer.connectors.models import Section +from danswer.connectors.models import SlimDocument from danswer.utils.logger import setup_logger +from danswer.utils.retry_wrapper import retry_builder + logger = setup_logger() +# This is for the initial list call to get the thread ids +THREAD_LIST_FIELDS = "nextPageToken, threads(id)" + +# These are the fields to retrieve using the ID from the initial list call +PARTS_FIELDS = "parts(body(data), mimeType)" +PAYLOAD_FIELDS = f"payload(headers, {PARTS_FIELDS})" +MESSAGES_FIELDS = f"messages(id, {PAYLOAD_FIELDS})" +THREADS_FIELDS = f"threads(id, {MESSAGES_FIELDS})" +THREAD_FIELDS = f"id, {MESSAGES_FIELDS}" + +EMAIL_FIELDS = [ + "cc", + "bcc", + "from", + "to", +] + +add_retries = retry_builder(tries=50, max_delay=30) + + +def _build_time_range_query( + time_range_start: SecondsSinceUnixEpoch | None = None, + time_range_end: SecondsSinceUnixEpoch | None = None, +) -> str | None: + query = "" + if time_range_start is not None and time_range_start != 0: + query += f"after:{int(time_range_start)}" + if time_range_end is not None and time_range_end != 0: + query += f" before:{int(time_range_end)}" + query = query.strip() + + if len(query) == 0: + return None + + return query + + +def _clean_email_and_extract_name(email: str) -> tuple[str, str | None]: + email = email.strip() + if "<" in email and ">" in email: + # Handle format: "Display Name " + display_name = email[: email.find("<")].strip() + email_address = email[email.find("<") + 1 : email.find(">")].strip() + return email_address, display_name if display_name else None + else: + # Handle plain email address + return email.strip(), None + + +def _get_owners_from_emails(emails: dict[str, str | None]) -> list[BasicExpertInfo]: + owners = [] + for email, names in emails.items(): + if names: + name_parts = names.split(" ") + first_name = " ".join(name_parts[:-1]) + last_name = name_parts[-1] + else: + first_name = None + last_name = None + owners.append( + BasicExpertInfo(email=email, first_name=first_name, last_name=last_name) + ) + return owners + + +def _get_message_body(payload: dict[str, Any]) -> str: + parts = payload.get("parts", []) + message_body = "" + for part in parts: + mime_type = part.get("mimeType") + body = part.get("body") + if mime_type == "text/plain" and body: + data = body.get("data", "") + text = urlsafe_b64decode(data).decode() + message_body += text + return message_body + + +def message_to_section(message: Dict[str, Any]) -> tuple[Section, dict[str, str]]: + link = f"https://mail.google.com/mail/u/0/#inbox/{message['id']}" + + payload = message.get("payload", {}) + headers = payload.get("headers", []) + metadata: dict[str, Any] = {} + for header in headers: + name = header.get("name").lower() + value = header.get("value") + if name in EMAIL_FIELDS: + metadata[name] = value + if name == "subject": + metadata["subject"] = value + if name == "date": + metadata["updated_at"] = value + + if labels := message.get("labelIds"): + metadata["labels"] = labels + + message_data = "" + for name, value in metadata.items(): + # updated at isnt super useful for the llm + if name != "updated_at": + message_data += f"{name}: {value}\n" + + message_body_text: str = _get_message_body(payload) + + return Section(link=link, text=message_body_text + message_data), metadata + + +def thread_to_document(full_thread: Dict[str, Any]) -> Document | None: + all_messages = full_thread.get("messages", []) + if not all_messages: + return None + + sections = [] + semantic_identifier = "" + updated_at = None + from_emails: dict[str, str | None] = {} + other_emails: dict[str, str | None] = {} + for message in all_messages: + section, message_metadata = message_to_section(message) + sections.append(section) + + for name, value in message_metadata.items(): + if name in EMAIL_FIELDS: + email, display_name = _clean_email_and_extract_name(value) + if name == "from": + from_emails[email] = ( + display_name if not from_emails.get(email) else None + ) + else: + other_emails[email] = ( + display_name if not other_emails.get(email) else None + ) + + # If we haven't set the semantic identifier yet, set it to the subject of the first message + if not semantic_identifier: + semantic_identifier = message_metadata.get("subject", "") -class GmailConnector(LoadConnector, PollConnector): + if message_metadata.get("updated_at"): + updated_at = message_metadata.get("updated_at") + + updated_at_datetime = None + if updated_at: + updated_at_datetime = time_str_to_utc(updated_at) + + id = full_thread.get("id") + if not id: + raise ValueError("Thread ID is required") + + primary_owners = _get_owners_from_emails(from_emails) + secondary_owners = _get_owners_from_emails(other_emails) + + return Document( + id=id, + semantic_identifier=semantic_identifier, + sections=sections, + source=DocumentSource.GMAIL, + # This is used to perform permission sync + primary_owners=primary_owners, + secondary_owners=secondary_owners, + doc_updated_at=updated_at_datetime, + # Not adding emails to metadata because it's already in the sections + metadata={}, + ) + + +class GmailConnector(LoadConnector, PollConnector, SlimConnector): def __init__(self, batch_size: int = INDEX_BATCH_SIZE) -> None: self.batch_size = batch_size - self.creds: OAuthCredentials | ServiceAccountCredentials | None = None - def load_credentials(self, credentials: dict[str, Any]) -> dict[str, str] | None: - """Checks for two different types of credentials. - (1) A credential which holds a token acquired via a user going thorugh - the Google OAuth flow. - (2) A credential which holds a service account key JSON file, which - can then be used to impersonate any user in the workspace. - """ - creds: OAuthCredentials | ServiceAccountCredentials | None = None - new_creds_dict = None - if DB_CREDENTIALS_DICT_TOKEN_KEY in credentials: - access_token_json_str = cast( - str, credentials[DB_CREDENTIALS_DICT_TOKEN_KEY] - ) - creds = get_gmail_creds_for_authorized_user( - token_json_str=access_token_json_str - ) + self._creds: OAuthCredentials | ServiceAccountCredentials | None = None + self._primary_admin_email: str | None = None - # tell caller to update token stored in DB if it has changed - # (e.g. the token has been refreshed) - new_creds_json_str = creds.to_json() if creds else "" - if new_creds_json_str != access_token_json_str: - new_creds_dict = {DB_CREDENTIALS_DICT_TOKEN_KEY: new_creds_json_str} - - if GMAIL_DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY in credentials: - service_account_key_json_str = credentials[ - GMAIL_DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY - ] - creds = get_gmail_creds_for_service_account( - service_account_key_json_str=service_account_key_json_str + @property + def primary_admin_email(self) -> str: + if self._primary_admin_email is None: + raise RuntimeError( + "Primary admin email missing, " + "should not call this property " + "before calling load_credentials" ) + return self._primary_admin_email - # "Impersonate" a user if one is specified - delegated_user_email = cast( - str | None, credentials.get(DB_CREDENTIALS_DICT_DELEGATED_USER_KEY) + @property + def google_domain(self) -> str: + if self._primary_admin_email is None: + raise RuntimeError( + "Primary admin email missing, " + "should not call this property " + "before calling load_credentials" ) - if delegated_user_email: - creds = creds.with_subject(delegated_user_email) if creds else None # type: ignore + return self._primary_admin_email.split("@")[-1] - if creds is None: - raise PermissionError( - "Unable to access Gmail - unknown credential structure." + @property + def creds(self) -> OAuthCredentials | ServiceAccountCredentials: + if self._creds is None: + raise RuntimeError( + "Creds missing, " + "should not call this property " + "before calling load_credentials" ) + return self._creds - self.creds = creds - return new_creds_dict + def load_credentials(self, credentials: dict[str, Any]) -> dict[str, str] | None: + primary_admin_email = credentials[DB_CREDENTIALS_PRIMARY_ADMIN_KEY] + self._primary_admin_email = primary_admin_email - def _get_email_body(self, payload: dict[str, Any]) -> str: - parts = payload.get("parts", []) - email_body = "" - for part in parts: - mime_type = part.get("mimeType") - body = part.get("body") - if mime_type == "text/plain": - data = body.get("data", "") - text = urlsafe_b64decode(data).decode() - email_body += text - return email_body - - def _email_to_document(self, full_email: Dict[str, Any]) -> Document: - email_id = full_email["id"] - payload = full_email["payload"] - headers = payload.get("headers") - labels = full_email.get("labelIds", []) - metadata = {} - if headers: - for header in headers: - name = header.get("name").lower() - value = header.get("value") - if name in ["from", "to", "subject", "date", "cc", "bcc"]: - metadata[name] = value - email_data = "" - for name, value in metadata.items(): - email_data += f"{name}: {value}\n" - metadata["labels"] = labels - logger.debug(f"{email_data}") - email_body_text: str = self._get_email_body(payload) - date_str = metadata.get("date") - email_updated_at = time_str_to_utc(date_str) if date_str else None - link = f"https://mail.google.com/mail/u/0/#inbox/{email_id}" - return Document( - id=email_id, - sections=[Section(link=link, text=email_data + email_body_text)], + self._creds, new_creds_dict = get_google_creds( + credentials=credentials, source=DocumentSource.GMAIL, - title=metadata.get("subject"), - semantic_identifier=metadata.get("subject", "Untitled Email"), - doc_updated_at=email_updated_at, - metadata=metadata, ) + return new_creds_dict - @staticmethod - def _build_time_range_query( - time_range_start: SecondsSinceUnixEpoch | None = None, - time_range_end: SecondsSinceUnixEpoch | None = None, - ) -> str | None: - query = "" - if time_range_start is not None and time_range_start != 0: - query += f"after:{int(time_range_start)}" - if time_range_end is not None and time_range_end != 0: - query += f" before:{int(time_range_end)}" - query = query.strip() - - if len(query) == 0: - return None - - return query + def _get_all_user_emails(self) -> list[str]: + admin_service = get_admin_service(self.creds, self.primary_admin_email) + emails = [] + for user in execute_paginated_retrieval( + retrieval_function=admin_service.users().list, + list_key="users", + fields=USER_FIELDS, + domain=self.google_domain, + ): + if email := user.get("primaryEmail"): + emails.append(email) + return emails - def _fetch_mails_from_gmail( + def _fetch_threads( self, time_range_start: SecondsSinceUnixEpoch | None = None, time_range_end: SecondsSinceUnixEpoch | None = None, ) -> GenerateDocumentsOutput: - if self.creds is None: - raise PermissionError("Not logged into Gmail") - page_token = "" - query = GmailConnector._build_time_range_query(time_range_start, time_range_end) - service = discovery.build("gmail", "v1", credentials=self.creds) - while page_token is not None: - result = ( - service.users() - .messages() - .list( - userId="me", - pageToken=page_token, - q=query, - maxResults=self.batch_size, - ) - .execute() - ) - page_token = result.get("nextPageToken") - messages = result.get("messages", []) - doc_batch = [] - for message in messages: - message_id = message["id"] - msg = ( - service.users() - .messages() - .get(userId="me", id=message_id, format="full") + query = _build_time_range_query(time_range_start, time_range_end) + doc_batch = [] + for user_email in self._get_all_user_emails(): + gmail_service = get_gmail_service(self.creds, user_email) + for thread in execute_paginated_retrieval( + retrieval_function=gmail_service.users().threads().list, + list_key="threads", + userId=user_email, + fields=THREAD_LIST_FIELDS, + q=query, + ): + full_thread = add_retries( + lambda: gmail_service.users() + .threads() + .get( + userId=user_email, + id=thread["id"], + fields=THREAD_FIELDS, + ) .execute() - ) - doc = self._email_to_document(msg) + )() + doc = thread_to_document(full_thread) + if doc is None: + continue doc_batch.append(doc) - if len(doc_batch) > 0: - yield doc_batch + if len(doc_batch) > self.batch_size: + yield doc_batch + doc_batch = [] + if doc_batch: + yield doc_batch + + def _fetch_slim_threads( + self, + time_range_start: SecondsSinceUnixEpoch | None = None, + time_range_end: SecondsSinceUnixEpoch | None = None, + ) -> GenerateSlimDocumentOutput: + query = _build_time_range_query(time_range_start, time_range_end) + doc_batch = [] + for user_email in self._get_all_user_emails(): + gmail_service = get_gmail_service(self.creds, user_email) + for thread in execute_paginated_retrieval( + retrieval_function=gmail_service.users().threads().list, + list_key="threads", + userId=user_email, + fields=THREAD_LIST_FIELDS, + q=query, + ): + doc_batch.append( + SlimDocument( + id=thread["id"], + perm_sync_data={"user_email": user_email}, + ) + ) + if len(doc_batch) > SLIM_BATCH_SIZE: + yield doc_batch + doc_batch = [] + if doc_batch: + yield doc_batch def load_from_state(self) -> GenerateDocumentsOutput: - yield from self._fetch_mails_from_gmail() + try: + yield from self._fetch_threads() + except Exception as e: + if MISSING_SCOPES_ERROR_STR in str(e): + raise PermissionError(ONYX_SCOPE_INSTRUCTIONS) from e + raise e def poll_source( self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch ) -> GenerateDocumentsOutput: - yield from self._fetch_mails_from_gmail(start, end) + try: + yield from self._fetch_threads(start, end) + except Exception as e: + if MISSING_SCOPES_ERROR_STR in str(e): + raise PermissionError(ONYX_SCOPE_INSTRUCTIONS) from e + raise e + def retrieve_all_slim_documents( + self, + start: SecondsSinceUnixEpoch | None = None, + end: SecondsSinceUnixEpoch | None = None, + ) -> GenerateSlimDocumentOutput: + try: + yield from self._fetch_slim_threads(start, end) + except Exception as e: + if MISSING_SCOPES_ERROR_STR in str(e): + raise PermissionError(ONYX_SCOPE_INSTRUCTIONS) from e + raise e -if __name__ == "__main__": - import json - import os - service_account_json_path = os.environ.get("GOOGLE_SERVICE_ACCOUNT_KEY_JSON_PATH") - if not service_account_json_path: - raise ValueError( - "Please set GOOGLE_SERVICE_ACCOUNT_KEY_JSON_PATH environment variable" - ) - with open(service_account_json_path) as f: - creds = json.load(f) - - credentials_dict = { - DB_CREDENTIALS_DICT_TOKEN_KEY: json.dumps(creds), - } - delegated_user = os.environ.get("GMAIL_DELEGATED_USER") - if delegated_user: - credentials_dict[DB_CREDENTIALS_DICT_DELEGATED_USER_KEY] = delegated_user - - connector = GmailConnector() - connector.load_credentials( - json.loads(credentials_dict[DB_CREDENTIALS_DICT_TOKEN_KEY]) - ) - document_batch_generator = connector.load_from_state() - for document_batch in document_batch_generator: - print(document_batch) - break +if __name__ == "__main__": + pass diff --git a/backend/danswer/connectors/gmail/connector_auth.py b/backend/danswer/connectors/gmail/connector_auth.py deleted file mode 100644 index ad80d1e1eb1..00000000000 --- a/backend/danswer/connectors/gmail/connector_auth.py +++ /dev/null @@ -1,199 +0,0 @@ -import json -from typing import cast -from urllib.parse import parse_qs -from urllib.parse import ParseResult -from urllib.parse import urlparse - -from google.auth.transport.requests import Request # type: ignore -from google.oauth2.credentials import Credentials as OAuthCredentials # type: ignore -from google.oauth2.service_account import Credentials as ServiceAccountCredentials # type: ignore -from google_auth_oauthlib.flow import InstalledAppFlow # type: ignore -from sqlalchemy.orm import Session - -from danswer.configs.app_configs import WEB_DOMAIN -from danswer.configs.constants import DocumentSource -from danswer.configs.constants import KV_CRED_KEY -from danswer.configs.constants import KV_GMAIL_CRED_KEY -from danswer.configs.constants import KV_GMAIL_SERVICE_ACCOUNT_KEY -from danswer.connectors.gmail.constants import ( - DB_CREDENTIALS_DICT_DELEGATED_USER_KEY, -) -from danswer.connectors.gmail.constants import DB_CREDENTIALS_DICT_TOKEN_KEY -from danswer.connectors.gmail.constants import ( - GMAIL_DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY, -) -from danswer.connectors.gmail.constants import SCOPES -from danswer.db.credentials import update_credential_json -from danswer.db.models import User -from danswer.dynamic_configs.factory import get_dynamic_config_store -from danswer.server.documents.models import CredentialBase -from danswer.server.documents.models import GoogleAppCredentials -from danswer.server.documents.models import GoogleServiceAccountKey -from danswer.utils.logger import setup_logger - -logger = setup_logger() - - -def _build_frontend_gmail_redirect() -> str: - return f"{WEB_DOMAIN}/admin/connectors/gmail/auth/callback" - - -def get_gmail_creds_for_authorized_user( - token_json_str: str, -) -> OAuthCredentials | None: - creds_json = json.loads(token_json_str) - creds = OAuthCredentials.from_authorized_user_info(creds_json, SCOPES) - if creds.valid: - return creds - - if creds.expired and creds.refresh_token: - try: - creds.refresh(Request()) - if creds.valid: - logger.notice("Refreshed Gmail tokens.") - return creds - except Exception as e: - logger.exception(f"Failed to refresh gmail access token due to: {e}") - return None - - return None - - -def get_gmail_creds_for_service_account( - service_account_key_json_str: str, -) -> ServiceAccountCredentials | None: - service_account_key = json.loads(service_account_key_json_str) - creds = ServiceAccountCredentials.from_service_account_info( - service_account_key, scopes=SCOPES - ) - if not creds.valid or not creds.expired: - creds.refresh(Request()) - return creds if creds.valid else None - - -def verify_csrf(credential_id: int, state: str) -> None: - csrf = get_dynamic_config_store().load(KV_CRED_KEY.format(str(credential_id))) - if csrf != state: - raise PermissionError( - "State from Gmail Connector callback does not match expected" - ) - - -def get_gmail_auth_url(credential_id: int) -> str: - creds_str = str(get_dynamic_config_store().load(KV_GMAIL_CRED_KEY)) - credential_json = json.loads(creds_str) - flow = InstalledAppFlow.from_client_config( - credential_json, - scopes=SCOPES, - redirect_uri=_build_frontend_gmail_redirect(), - ) - auth_url, _ = flow.authorization_url(prompt="consent") - - parsed_url = cast(ParseResult, urlparse(auth_url)) - params = parse_qs(parsed_url.query) - - get_dynamic_config_store().store( - KV_CRED_KEY.format(credential_id), params.get("state", [None])[0], encrypt=True - ) # type: ignore - return str(auth_url) - - -def get_auth_url(credential_id: int) -> str: - creds_str = str(get_dynamic_config_store().load(KV_GMAIL_CRED_KEY)) - credential_json = json.loads(creds_str) - flow = InstalledAppFlow.from_client_config( - credential_json, - scopes=SCOPES, - redirect_uri=_build_frontend_gmail_redirect(), - ) - auth_url, _ = flow.authorization_url(prompt="consent") - - parsed_url = cast(ParseResult, urlparse(auth_url)) - params = parse_qs(parsed_url.query) - - get_dynamic_config_store().store( - KV_CRED_KEY.format(credential_id), params.get("state", [None])[0], encrypt=True - ) # type: ignore - return str(auth_url) - - -def update_gmail_credential_access_tokens( - auth_code: str, - credential_id: int, - user: User, - db_session: Session, -) -> OAuthCredentials | None: - app_credentials = get_google_app_gmail_cred() - flow = InstalledAppFlow.from_client_config( - app_credentials.model_dump(), - scopes=SCOPES, - redirect_uri=_build_frontend_gmail_redirect(), - ) - flow.fetch_token(code=auth_code) - creds = flow.credentials - token_json_str = creds.to_json() - new_creds_dict = {DB_CREDENTIALS_DICT_TOKEN_KEY: token_json_str} - - if not update_credential_json(credential_id, new_creds_dict, user, db_session): - return None - return creds - - -def build_service_account_creds( - delegated_user_email: str | None = None, -) -> CredentialBase: - service_account_key = get_gmail_service_account_key() - - credential_dict = { - GMAIL_DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY: service_account_key.json(), - } - if delegated_user_email: - credential_dict[DB_CREDENTIALS_DICT_DELEGATED_USER_KEY] = delegated_user_email - - return CredentialBase( - source=DocumentSource.GMAIL, - credential_json=credential_dict, - admin_public=True, - ) - - -def get_google_app_gmail_cred() -> GoogleAppCredentials: - creds_str = str(get_dynamic_config_store().load(KV_GMAIL_CRED_KEY)) - return GoogleAppCredentials(**json.loads(creds_str)) - - -def upsert_google_app_gmail_cred(app_credentials: GoogleAppCredentials) -> None: - get_dynamic_config_store().store( - KV_GMAIL_CRED_KEY, app_credentials.json(), encrypt=True - ) - - -def delete_google_app_gmail_cred() -> None: - get_dynamic_config_store().delete(KV_GMAIL_CRED_KEY) - - -def get_gmail_service_account_key() -> GoogleServiceAccountKey: - creds_str = str(get_dynamic_config_store().load(KV_GMAIL_SERVICE_ACCOUNT_KEY)) - return GoogleServiceAccountKey(**json.loads(creds_str)) - - -def upsert_gmail_service_account_key( - service_account_key: GoogleServiceAccountKey, -) -> None: - get_dynamic_config_store().store( - KV_GMAIL_SERVICE_ACCOUNT_KEY, service_account_key.json(), encrypt=True - ) - - -def upsert_service_account_key(service_account_key: GoogleServiceAccountKey) -> None: - get_dynamic_config_store().store( - KV_GMAIL_SERVICE_ACCOUNT_KEY, service_account_key.json(), encrypt=True - ) - - -def delete_gmail_service_account_key() -> None: - get_dynamic_config_store().delete(KV_GMAIL_SERVICE_ACCOUNT_KEY) - - -def delete_service_account_key() -> None: - get_dynamic_config_store().delete(KV_GMAIL_SERVICE_ACCOUNT_KEY) diff --git a/backend/danswer/connectors/gmail/constants.py b/backend/danswer/connectors/gmail/constants.py deleted file mode 100644 index 36eff081818..00000000000 --- a/backend/danswer/connectors/gmail/constants.py +++ /dev/null @@ -1,4 +0,0 @@ -DB_CREDENTIALS_DICT_TOKEN_KEY = "gmail_tokens" -GMAIL_DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY = "gmail_service_account_key" -DB_CREDENTIALS_DICT_DELEGATED_USER_KEY = "gmail_delegated_user" -SCOPES = ["https://www.googleapis.com/auth/gmail.readonly"] diff --git a/backend/danswer/connectors/google_drive/connector.py b/backend/danswer/connectors/google_drive/connector.py index bf267ab7786..6d23606260d 100644 --- a/backend/danswer/connectors/google_drive/connector.py +++ b/backend/danswer/connectors/google_drive/connector.py @@ -1,520 +1,333 @@ -import io from collections.abc import Iterator -from collections.abc import Sequence -from datetime import datetime -from datetime import timezone -from enum import Enum -from itertools import chain from typing import Any from google.oauth2.credentials import Credentials as OAuthCredentials # type: ignore from google.oauth2.service_account import Credentials as ServiceAccountCredentials # type: ignore -from googleapiclient import discovery # type: ignore -from googleapiclient.errors import HttpError # type: ignore -from danswer.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE -from danswer.configs.app_configs import GOOGLE_DRIVE_FOLLOW_SHORTCUTS -from danswer.configs.app_configs import GOOGLE_DRIVE_INCLUDE_SHARED -from danswer.configs.app_configs import GOOGLE_DRIVE_ONLY_ORG_PUBLIC from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.constants import DocumentSource -from danswer.configs.constants import IGNORE_FOR_QA -from danswer.connectors.cross_connector_utils.retry_wrapper import retry_builder -from danswer.connectors.google_drive.connector_auth import get_google_drive_creds -from danswer.connectors.google_drive.constants import ( - DB_CREDENTIALS_DICT_DELEGATED_USER_KEY, +from danswer.connectors.google_drive.doc_conversion import ( + convert_drive_item_to_document, ) -from danswer.connectors.google_drive.constants import ( - DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY, +from danswer.connectors.google_drive.file_retrieval import crawl_folders_for_files +from danswer.connectors.google_drive.file_retrieval import get_files_in_my_drive +from danswer.connectors.google_drive.file_retrieval import get_files_in_shared_drive +from danswer.connectors.google_drive.models import GoogleDriveFileType +from danswer.connectors.google_utils.google_auth import get_google_creds +from danswer.connectors.google_utils.google_utils import execute_paginated_retrieval +from danswer.connectors.google_utils.resources import get_admin_service +from danswer.connectors.google_utils.resources import get_drive_service +from danswer.connectors.google_utils.resources import get_google_docs_service +from danswer.connectors.google_utils.shared_constants import ( + DB_CREDENTIALS_PRIMARY_ADMIN_KEY, ) +from danswer.connectors.google_utils.shared_constants import MISSING_SCOPES_ERROR_STR +from danswer.connectors.google_utils.shared_constants import ONYX_SCOPE_INSTRUCTIONS +from danswer.connectors.google_utils.shared_constants import SCOPE_DOC_URL +from danswer.connectors.google_utils.shared_constants import SLIM_BATCH_SIZE +from danswer.connectors.google_utils.shared_constants import USER_FIELDS from danswer.connectors.interfaces import GenerateDocumentsOutput +from danswer.connectors.interfaces import GenerateSlimDocumentOutput from danswer.connectors.interfaces import LoadConnector from danswer.connectors.interfaces import PollConnector from danswer.connectors.interfaces import SecondsSinceUnixEpoch -from danswer.connectors.models import Document -from danswer.connectors.models import Section -from danswer.file_processing.extract_file_text import docx_to_text -from danswer.file_processing.extract_file_text import pptx_to_text -from danswer.file_processing.extract_file_text import read_pdf_file -from danswer.utils.batching import batch_generator +from danswer.connectors.interfaces import SlimConnector +from danswer.connectors.models import SlimDocument from danswer.utils.logger import setup_logger logger = setup_logger() -DRIVE_FOLDER_TYPE = "application/vnd.google-apps.folder" -DRIVE_SHORTCUT_TYPE = "application/vnd.google-apps.shortcut" -UNSUPPORTED_FILE_TYPE_CONTENT = "" # keep empty for now - - -class GDriveMimeType(str, Enum): - DOC = "application/vnd.google-apps.document" - SPREADSHEET = "application/vnd.google-apps.spreadsheet" - PDF = "application/pdf" - WORD_DOC = "application/vnd.openxmlformats-officedocument.wordprocessingml.document" - PPT = "application/vnd.google-apps.presentation" - POWERPOINT = ( - "application/vnd.openxmlformats-officedocument.presentationml.presentation" - ) - PLAIN_TEXT = "text/plain" - MARKDOWN = "text/markdown" - - -GoogleDriveFileType = dict[str, Any] - -# Google Drive APIs are quite flakey and may 500 for an -# extended period of time. Trying to combat here by adding a very -# long retry period (~20 minutes of trying every minute) -add_retries = retry_builder(tries=50, max_delay=30) - - -def _run_drive_file_query( - service: discovery.Resource, - query: str, - continue_on_failure: bool, - include_shared: bool = GOOGLE_DRIVE_INCLUDE_SHARED, - follow_shortcuts: bool = GOOGLE_DRIVE_FOLLOW_SHORTCUTS, - batch_size: int = INDEX_BATCH_SIZE, -) -> Iterator[GoogleDriveFileType]: - next_page_token = "" - while next_page_token is not None: - logger.debug(f"Running Google Drive fetch with query: {query}") - results = add_retries( - lambda: ( - service.files() - .list( - corpora="allDrives" - if include_shared - else "user", # needed to search through shared drives - pageSize=batch_size, - supportsAllDrives=include_shared, - includeItemsFromAllDrives=include_shared, - fields=( - "nextPageToken, files(mimeType, id, name, permissions, " - "modifiedTime, webViewLink, shortcutDetails)" - ), - pageToken=next_page_token, - q=query, - ) - .execute() - ) - )() - next_page_token = results.get("nextPageToken") - files = results["files"] - for file in files: - if follow_shortcuts and "shortcutDetails" in file: - try: - file_shortcut_points_to = add_retries( - lambda: ( - service.files() - .get( - fileId=file["shortcutDetails"]["targetId"], - supportsAllDrives=include_shared, - fields="mimeType, id, name, modifiedTime, webViewLink, permissions, shortcutDetails", - ) - .execute() - ) - )() - yield file_shortcut_points_to - except HttpError: - logger.error( - f"Failed to follow shortcut with details: {file['shortcutDetails']}" - ) - if continue_on_failure: - continue - raise - else: - yield file - - -def _get_folder_id( - service: discovery.Resource, - parent_id: str, - folder_name: str, - include_shared: bool, - follow_shortcuts: bool, -) -> str | None: - """ - Get the ID of a folder given its name and the ID of its parent folder. - """ - query = f"'{parent_id}' in parents and name='{folder_name}' and " - if follow_shortcuts: - query += f"(mimeType='{DRIVE_FOLDER_TYPE}' or mimeType='{DRIVE_SHORTCUT_TYPE}')" - else: - query += f"mimeType='{DRIVE_FOLDER_TYPE}'" - - # TODO: support specifying folder path in shared drive rather than just `My Drive` - results = add_retries( - lambda: ( - service.files() - .list( - q=query, - spaces="drive", - fields="nextPageToken, files(id, name, shortcutDetails)", - supportsAllDrives=include_shared, - includeItemsFromAllDrives=include_shared, - ) - .execute() - ) - )() - items = results.get("files", []) - - folder_id = None - if items: - if follow_shortcuts and "shortcutDetails" in items[0]: - folder_id = items[0]["shortcutDetails"]["targetId"] - else: - folder_id = items[0]["id"] - return folder_id - - -def _get_folders( - service: discovery.Resource, - continue_on_failure: bool, - folder_id: str | None = None, # if specified, only fetches files within this folder - include_shared: bool = GOOGLE_DRIVE_INCLUDE_SHARED, - follow_shortcuts: bool = GOOGLE_DRIVE_FOLLOW_SHORTCUTS, - batch_size: int = INDEX_BATCH_SIZE, -) -> Iterator[GoogleDriveFileType]: - query = f"mimeType = '{DRIVE_FOLDER_TYPE}' " - if follow_shortcuts: - query = "(" + query + f" or mimeType = '{DRIVE_SHORTCUT_TYPE}'" + ") " - - if folder_id: - query += f"and '{folder_id}' in parents " - query = query.rstrip() # remove the trailing space(s) - - for file in _run_drive_file_query( - service=service, - query=query, - continue_on_failure=continue_on_failure, - include_shared=include_shared, - follow_shortcuts=follow_shortcuts, - batch_size=batch_size, - ): - # Need to check this since file may have been a target of a shortcut - # and not necessarily a folder - if file["mimeType"] == DRIVE_FOLDER_TYPE: - yield file - else: - pass - - -def _get_files( - service: discovery.Resource, - continue_on_failure: bool, - time_range_start: SecondsSinceUnixEpoch | None = None, - time_range_end: SecondsSinceUnixEpoch | None = None, - folder_id: str | None = None, # if specified, only fetches files within this folder - include_shared: bool = GOOGLE_DRIVE_INCLUDE_SHARED, - follow_shortcuts: bool = GOOGLE_DRIVE_FOLLOW_SHORTCUTS, - batch_size: int = INDEX_BATCH_SIZE, -) -> Iterator[GoogleDriveFileType]: - query = f"mimeType != '{DRIVE_FOLDER_TYPE}' " - if time_range_start is not None: - time_start = datetime.utcfromtimestamp(time_range_start).isoformat() + "Z" - query += f"and modifiedTime >= '{time_start}' " - if time_range_end is not None: - time_stop = datetime.utcfromtimestamp(time_range_end).isoformat() + "Z" - query += f"and modifiedTime <= '{time_stop}' " - if folder_id: - query += f"and '{folder_id}' in parents " - query = query.rstrip() # remove the trailing space(s) - - files = _run_drive_file_query( - service=service, - query=query, - continue_on_failure=continue_on_failure, - include_shared=include_shared, - follow_shortcuts=follow_shortcuts, - batch_size=batch_size, - ) - - return files - - -def get_all_files_batched( - service: discovery.Resource, - continue_on_failure: bool, - include_shared: bool = GOOGLE_DRIVE_INCLUDE_SHARED, - follow_shortcuts: bool = GOOGLE_DRIVE_FOLLOW_SHORTCUTS, - batch_size: int = INDEX_BATCH_SIZE, - time_range_start: SecondsSinceUnixEpoch | None = None, - time_range_end: SecondsSinceUnixEpoch | None = None, - folder_id: str | None = None, # if specified, only fetches files within this folder - # if True, will fetch files in sub-folders of the specified folder ID. - # Only applies if folder_id is specified. - traverse_subfolders: bool = True, - folder_ids_traversed: list[str] | None = None, -) -> Iterator[list[GoogleDriveFileType]]: - """Gets all files matching the criteria specified by the args from Google Drive - in batches of size `batch_size`. - """ - found_files = _get_files( - service=service, - continue_on_failure=continue_on_failure, - time_range_start=time_range_start, - time_range_end=time_range_end, - folder_id=folder_id, - include_shared=include_shared, - follow_shortcuts=follow_shortcuts, - batch_size=batch_size, - ) - yield from batch_generator( - items=found_files, - batch_size=batch_size, - pre_batch_yield=lambda batch_files: logger.debug( - f"Parseable Documents in batch: {[file['name'] for file in batch_files]}" - ), - ) - - if traverse_subfolders and folder_id is not None: - folder_ids_traversed = folder_ids_traversed or [] - subfolders = _get_folders( - service=service, - folder_id=folder_id, - continue_on_failure=continue_on_failure, - include_shared=include_shared, - follow_shortcuts=follow_shortcuts, - batch_size=batch_size, - ) - for subfolder in subfolders: - if subfolder["id"] not in folder_ids_traversed: - logger.info("Fetching all files in subfolder: " + subfolder["name"]) - folder_ids_traversed.append(subfolder["id"]) - yield from get_all_files_batched( - service=service, - continue_on_failure=continue_on_failure, - include_shared=include_shared, - follow_shortcuts=follow_shortcuts, - batch_size=batch_size, - time_range_start=time_range_start, - time_range_end=time_range_end, - folder_id=subfolder["id"], - traverse_subfolders=traverse_subfolders, - folder_ids_traversed=folder_ids_traversed, - ) - else: - logger.debug( - "Skipping subfolder since already traversed: " + subfolder["name"] - ) +def _extract_str_list_from_comma_str(string: str | None) -> list[str]: + if not string: + return [] + return [s.strip() for s in string.split(",") if s.strip()] -def extract_text(file: dict[str, str], service: discovery.Resource) -> str: - mime_type = file["mimeType"] - if mime_type not in set(item.value for item in GDriveMimeType): - # Unsupported file types can still have a title, finding this way is still useful - return UNSUPPORTED_FILE_TYPE_CONTENT +def _extract_ids_from_urls(urls: list[str]) -> list[str]: + return [url.split("/")[-1] for url in urls] - if mime_type in [ - GDriveMimeType.DOC.value, - GDriveMimeType.PPT.value, - GDriveMimeType.SPREADSHEET.value, - ]: - export_mime_type = ( - "text/plain" - if mime_type != GDriveMimeType.SPREADSHEET.value - else "text/csv" - ) - return ( - service.files() - .export(fileId=file["id"], mimeType=export_mime_type) - .execute() - .decode("utf-8") - ) - elif mime_type in [ - GDriveMimeType.PLAIN_TEXT.value, - GDriveMimeType.MARKDOWN.value, - ]: - return service.files().get_media(fileId=file["id"]).execute().decode("utf-8") - elif mime_type == GDriveMimeType.WORD_DOC.value: - response = service.files().get_media(fileId=file["id"]).execute() - return docx_to_text(file=io.BytesIO(response)) - elif mime_type == GDriveMimeType.PDF.value: - response = service.files().get_media(fileId=file["id"]).execute() - text, _ = read_pdf_file(file=io.BytesIO(response)) - return text - elif mime_type == GDriveMimeType.POWERPOINT.value: - response = service.files().get_media(fileId=file["id"]).execute() - return pptx_to_text(file=io.BytesIO(response)) - - return UNSUPPORTED_FILE_TYPE_CONTENT - - -class GoogleDriveConnector(LoadConnector, PollConnector): + +class GoogleDriveConnector(LoadConnector, PollConnector, SlimConnector): def __init__( self, - # optional list of folder paths e.g. "[My Folder/My Subfolder]" - # if specified, will only index files in these folders - folder_paths: list[str] | None = None, + include_shared_drives: bool = True, + shared_drive_urls: str | None = None, + include_my_drives: bool = True, + my_drive_emails: str | None = None, + shared_folder_urls: str | None = None, batch_size: int = INDEX_BATCH_SIZE, - include_shared: bool = GOOGLE_DRIVE_INCLUDE_SHARED, - follow_shortcuts: bool = GOOGLE_DRIVE_FOLLOW_SHORTCUTS, - only_org_public: bool = GOOGLE_DRIVE_ONLY_ORG_PUBLIC, - continue_on_failure: bool = CONTINUE_ON_CONNECTOR_FAILURE, + # OLD PARAMETERS + folder_paths: list[str] | None = None, + include_shared: bool | None = None, + follow_shortcuts: bool | None = None, + only_org_public: bool | None = None, + continue_on_failure: bool | None = None, ) -> None: - self.folder_paths = folder_paths or [] + # Check for old input parameters + if ( + folder_paths is not None + or include_shared is not None + or follow_shortcuts is not None + or only_org_public is not None + or continue_on_failure is not None + ): + logger.exception( + "Google Drive connector received old input parameters. " + "Please visit the docs for help with the new setup: " + f"{SCOPE_DOC_URL}" + ) + raise ValueError( + "Google Drive connector received old input parameters. " + "Please visit the docs for help with the new setup: " + f"{SCOPE_DOC_URL}" + ) + + if ( + not include_shared_drives + and not include_my_drives + and not shared_folder_urls + ): + raise ValueError( + "At least one of include_shared_drives, include_my_drives," + " or shared_folder_urls must be true" + ) + self.batch_size = batch_size - self.include_shared = include_shared - self.follow_shortcuts = follow_shortcuts - self.only_org_public = only_org_public - self.continue_on_failure = continue_on_failure - self.creds: OAuthCredentials | ServiceAccountCredentials | None = None - - @staticmethod - def _process_folder_paths( - service: discovery.Resource, - folder_paths: list[str], - include_shared: bool, - follow_shortcuts: bool, - ) -> list[str]: - """['Folder/Sub Folder'] -> ['']""" - folder_ids: list[str] = [] - for path in folder_paths: - folder_names = path.split("/") - parent_id = "root" - for folder_name in folder_names: - found_parent_id = _get_folder_id( - service=service, - parent_id=parent_id, - folder_name=folder_name, - include_shared=include_shared, - follow_shortcuts=follow_shortcuts, - ) - if found_parent_id is None: - raise ValueError( - ( - f"Folder '{folder_name}' in path '{path}' " - "not found in Google Drive" - ) - ) - parent_id = found_parent_id - folder_ids.append(parent_id) - - return folder_ids + + self.include_shared_drives = include_shared_drives + shared_drive_url_list = _extract_str_list_from_comma_str(shared_drive_urls) + self.shared_drive_ids = _extract_ids_from_urls(shared_drive_url_list) + + self.include_my_drives = include_my_drives + self.my_drive_emails = _extract_str_list_from_comma_str(my_drive_emails) + + shared_folder_url_list = _extract_str_list_from_comma_str(shared_folder_urls) + self.shared_folder_ids = _extract_ids_from_urls(shared_folder_url_list) + + self._primary_admin_email: str | None = None + + self._creds: OAuthCredentials | ServiceAccountCredentials | None = None + + self._TRAVERSED_PARENT_IDS: set[str] = set() + + @property + def primary_admin_email(self) -> str: + if self._primary_admin_email is None: + raise RuntimeError( + "Primary admin email missing, " + "should not call this property " + "before calling load_credentials" + ) + return self._primary_admin_email + + @property + def google_domain(self) -> str: + if self._primary_admin_email is None: + raise RuntimeError( + "Primary admin email missing, " + "should not call this property " + "before calling load_credentials" + ) + return self._primary_admin_email.split("@")[-1] + + @property + def creds(self) -> OAuthCredentials | ServiceAccountCredentials: + if self._creds is None: + raise RuntimeError( + "Creds missing, " + "should not call this property " + "before calling load_credentials" + ) + return self._creds + + def _update_traversed_parent_ids(self, folder_id: str) -> None: + self._TRAVERSED_PARENT_IDS.add(folder_id) def load_credentials(self, credentials: dict[str, Any]) -> dict[str, str] | None: - """Checks for two different types of credentials. - (1) A credential which holds a token acquired via a user going thorough - the Google OAuth flow. - (2) A credential which holds a service account key JSON file, which - can then be used to impersonate any user in the workspace. - """ - creds, new_creds_dict = get_google_drive_creds(credentials) - self.creds = creds + primary_admin_email = credentials[DB_CREDENTIALS_PRIMARY_ADMIN_KEY] + self._primary_admin_email = primary_admin_email + + self._creds, new_creds_dict = get_google_creds( + credentials=credentials, + source=DocumentSource.GOOGLE_DRIVE, + ) return new_creds_dict - def _fetch_docs_from_drive( + def _get_all_user_emails(self) -> list[str]: + admin_service = get_admin_service( + creds=self.creds, + user_email=self.primary_admin_email, + ) + emails = [] + for user in execute_paginated_retrieval( + retrieval_function=admin_service.users().list, + list_key="users", + fields=USER_FIELDS, + domain=self.google_domain, + ): + if email := user.get("primaryEmail"): + emails.append(email) + return emails + + def _fetch_drive_items( self, + is_slim: bool, start: SecondsSinceUnixEpoch | None = None, end: SecondsSinceUnixEpoch | None = None, - ) -> GenerateDocumentsOutput: - if self.creds is None: - raise PermissionError("Not logged into Google Drive") - - service = discovery.build("drive", "v3", credentials=self.creds) - folder_ids: Sequence[str | None] = self._process_folder_paths( - service, self.folder_paths, self.include_shared, self.follow_shortcuts + ) -> Iterator[GoogleDriveFileType]: + primary_drive_service = get_drive_service( + creds=self.creds, + user_email=self.primary_admin_email, ) - if not folder_ids: - folder_ids = [None] - - file_batches = chain( - *[ - get_all_files_batched( - service=service, - continue_on_failure=self.continue_on_failure, - include_shared=self.include_shared, - follow_shortcuts=self.follow_shortcuts, - batch_size=self.batch_size, - time_range_start=start, - time_range_end=end, - folder_id=folder_id, - traverse_subfolders=True, + + if self.include_shared_drives: + shared_drive_urls = self.shared_drive_ids + if not shared_drive_urls: + # if no parent ids are specified, get all shared drives using the admin account + for drive in execute_paginated_retrieval( + retrieval_function=primary_drive_service.drives().list, + list_key="drives", + useDomainAdminAccess=True, + fields="drives(id)", + ): + shared_drive_urls.append(drive["id"]) + + # For each shared drive, retrieve all files + for shared_drive_id in shared_drive_urls: + for file in get_files_in_shared_drive( + service=primary_drive_service, + drive_id=shared_drive_id, + is_slim=is_slim, + cache_folders=bool(self.shared_folder_ids), + update_traversed_ids_func=self._update_traversed_parent_ids, + start=start, + end=end, + ): + yield file + + if self.shared_folder_ids: + # Crawl all the shared parent ids for files + for folder_id in self.shared_folder_ids: + yield from crawl_folders_for_files( + service=primary_drive_service, + parent_id=folder_id, + personal_drive=False, + traversed_parent_ids=self._TRAVERSED_PARENT_IDS, + update_traversed_ids_func=self._update_traversed_parent_ids, + start=start, + end=end, + ) + + all_user_emails = [] + # get all personal docs from each users' personal drive + if self.include_my_drives: + if isinstance(self.creds, ServiceAccountCredentials): + all_user_emails = self.my_drive_emails or [] + + # If using service account and no emails specified, fetch all users + if not all_user_emails: + all_user_emails = self._get_all_user_emails() + + elif self.primary_admin_email: + # If using OAuth, only fetch the primary admin email + all_user_emails = [self.primary_admin_email] + + for email in all_user_emails: + logger.info(f"Fetching personal files for user: {email}") + user_drive_service = get_drive_service(self.creds, user_email=email) + + yield from get_files_in_my_drive( + service=user_drive_service, + email=email, + is_slim=is_slim, + start=start, + end=end, ) - for folder_id in folder_ids - ] - ) - for files_batch in file_batches: - doc_batch = [] - for file in files_batch: - try: - # Skip files that are shortcuts - if file.get("mimeType") == DRIVE_SHORTCUT_TYPE: - logger.info("Ignoring Drive Shortcut Filetype") - continue - - if self.only_org_public: - if "permissions" not in file: - continue - if not any( - permission["type"] == "domain" - for permission in file["permissions"] - ): - continue - - text_contents = extract_text(file, service) or "" - - doc_batch.append( - Document( - id=file["webViewLink"], - sections=[ - Section(link=file["webViewLink"], text=text_contents) - ], - source=DocumentSource.GOOGLE_DRIVE, - semantic_identifier=file["name"], - doc_updated_at=datetime.fromisoformat( - file["modifiedTime"] - ).astimezone(timezone.utc), - metadata={} if text_contents else {IGNORE_FOR_QA: "True"}, - additional_info=file.get("id"), - ) - ) - except Exception as e: - if not self.continue_on_failure: - raise e - - logger.exception( - "Ran into exception when pulling a file from Google Drive" - ) - - yield doc_batch + + def _extract_docs_from_google_drive( + self, + start: SecondsSinceUnixEpoch | None = None, + end: SecondsSinceUnixEpoch | None = None, + ) -> GenerateDocumentsOutput: + doc_batch = [] + for file in self._fetch_drive_items( + is_slim=False, + start=start, + end=end, + ): + user_email = ( + file.get("owners", [{}])[0].get("emailAddress") + or self.primary_admin_email + ) + user_drive_service = get_drive_service(self.creds, user_email=user_email) + docs_service = get_google_docs_service(self.creds, user_email=user_email) + if doc := convert_drive_item_to_document( + file=file, + drive_service=user_drive_service, + docs_service=docs_service, + ): + doc_batch.append(doc) + if len(doc_batch) >= self.batch_size: + yield doc_batch + doc_batch = [] + + yield doc_batch def load_from_state(self) -> GenerateDocumentsOutput: - yield from self._fetch_docs_from_drive() + try: + yield from self._extract_docs_from_google_drive() + except Exception as e: + if MISSING_SCOPES_ERROR_STR in str(e): + raise PermissionError(ONYX_SCOPE_INSTRUCTIONS) from e + raise e def poll_source( self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch ) -> GenerateDocumentsOutput: - # need to subtract 10 minutes from start time to account for modifiedTime - # propogation if a document is modified, it takes some time for the API to - # reflect these changes if we do not have an offset, then we may "miss" the - # update when polling - yield from self._fetch_docs_from_drive(start, end) - - -if __name__ == "__main__": - import json - import os + try: + yield from self._extract_docs_from_google_drive(start, end) + except Exception as e: + if MISSING_SCOPES_ERROR_STR in str(e): + raise PermissionError(ONYX_SCOPE_INSTRUCTIONS) from e + raise e + + def _extract_slim_docs_from_google_drive( + self, + start: SecondsSinceUnixEpoch | None = None, + end: SecondsSinceUnixEpoch | None = None, + ) -> GenerateSlimDocumentOutput: + slim_batch = [] + for file in self._fetch_drive_items( + is_slim=True, + start=start, + end=end, + ): + slim_batch.append( + SlimDocument( + id=file["webViewLink"], + perm_sync_data={ + "doc_id": file.get("id"), + "permissions": file.get("permissions", []), + "permission_ids": file.get("permissionIds", []), + "name": file.get("name"), + "owner_email": file.get("owners", [{}])[0].get("emailAddress"), + }, + ) + ) + if len(slim_batch) >= SLIM_BATCH_SIZE: + yield slim_batch + slim_batch = [] + yield slim_batch - service_account_json_path = os.environ.get("GOOGLE_SERVICE_ACCOUNT_KEY_JSON_PATH") - if not service_account_json_path: - raise ValueError( - "Please set GOOGLE_SERVICE_ACCOUNT_KEY_JSON_PATH environment variable" - ) - with open(service_account_json_path) as f: - creds = json.load(f) - - credentials_dict = { - DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY: json.dumps(creds), - } - delegated_user = os.environ.get("GOOGLE_DRIVE_DELEGATED_USER") - if delegated_user: - credentials_dict[DB_CREDENTIALS_DICT_DELEGATED_USER_KEY] = delegated_user - - connector = GoogleDriveConnector(include_shared=True, follow_shortcuts=True) - connector.load_credentials(credentials_dict) - document_batch_generator = connector.load_from_state() - for document_batch in document_batch_generator: - print(document_batch) - break + def retrieve_all_slim_documents( + self, + start: SecondsSinceUnixEpoch | None = None, + end: SecondsSinceUnixEpoch | None = None, + ) -> GenerateSlimDocumentOutput: + try: + yield from self._extract_slim_docs_from_google_drive(start, end) + except Exception as e: + if MISSING_SCOPES_ERROR_STR in str(e): + raise PermissionError(ONYX_SCOPE_INSTRUCTIONS) from e + raise e diff --git a/backend/danswer/connectors/google_drive/connector_auth.py b/backend/danswer/connectors/google_drive/connector_auth.py deleted file mode 100644 index cc68fec54ea..00000000000 --- a/backend/danswer/connectors/google_drive/connector_auth.py +++ /dev/null @@ -1,233 +0,0 @@ -import json -from typing import cast -from urllib.parse import parse_qs -from urllib.parse import ParseResult -from urllib.parse import urlparse - -from google.auth.transport.requests import Request # type: ignore -from google.oauth2.credentials import Credentials as OAuthCredentials # type: ignore -from google.oauth2.service_account import Credentials as ServiceAccountCredentials # type: ignore -from google_auth_oauthlib.flow import InstalledAppFlow # type: ignore -from sqlalchemy.orm import Session - -from danswer.configs.app_configs import ENTERPRISE_EDITION_ENABLED -from danswer.configs.app_configs import WEB_DOMAIN -from danswer.configs.constants import DocumentSource -from danswer.configs.constants import KV_CRED_KEY -from danswer.configs.constants import KV_GOOGLE_DRIVE_CRED_KEY -from danswer.configs.constants import KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY -from danswer.connectors.google_drive.constants import BASE_SCOPES -from danswer.connectors.google_drive.constants import ( - DB_CREDENTIALS_DICT_DELEGATED_USER_KEY, -) -from danswer.connectors.google_drive.constants import ( - DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY, -) -from danswer.connectors.google_drive.constants import DB_CREDENTIALS_DICT_TOKEN_KEY -from danswer.connectors.google_drive.constants import FETCH_GROUPS_SCOPES -from danswer.connectors.google_drive.constants import FETCH_PERMISSIONS_SCOPES -from danswer.db.credentials import update_credential_json -from danswer.db.models import User -from danswer.dynamic_configs.factory import get_dynamic_config_store -from danswer.server.documents.models import CredentialBase -from danswer.server.documents.models import GoogleAppCredentials -from danswer.server.documents.models import GoogleServiceAccountKey -from danswer.utils.logger import setup_logger - -logger = setup_logger() - - -def build_gdrive_scopes() -> list[str]: - base_scopes: list[str] = BASE_SCOPES - permissions_scopes: list[str] = FETCH_PERMISSIONS_SCOPES - groups_scopes: list[str] = FETCH_GROUPS_SCOPES - - if ENTERPRISE_EDITION_ENABLED: - return base_scopes + permissions_scopes + groups_scopes - return base_scopes + permissions_scopes - - -def _build_frontend_google_drive_redirect() -> str: - return f"{WEB_DOMAIN}/admin/connectors/google-drive/auth/callback" - - -def get_google_drive_creds_for_authorized_user( - token_json_str: str, scopes: list[str] = build_gdrive_scopes() -) -> OAuthCredentials | None: - creds_json = json.loads(token_json_str) - creds = OAuthCredentials.from_authorized_user_info(creds_json, scopes) - if creds.valid: - return creds - - if creds.expired and creds.refresh_token: - try: - creds.refresh(Request()) - if creds.valid: - logger.notice("Refreshed Google Drive tokens.") - return creds - except Exception as e: - logger.exception(f"Failed to refresh google drive access token due to: {e}") - return None - - return None - - -def _get_google_drive_creds_for_service_account( - service_account_key_json_str: str, scopes: list[str] = build_gdrive_scopes() -) -> ServiceAccountCredentials | None: - service_account_key = json.loads(service_account_key_json_str) - creds = ServiceAccountCredentials.from_service_account_info( - service_account_key, scopes=scopes - ) - if not creds.valid or not creds.expired: - creds.refresh(Request()) - return creds if creds.valid else None - - -def get_google_drive_creds( - credentials: dict[str, str], scopes: list[str] = build_gdrive_scopes() -) -> tuple[ServiceAccountCredentials | OAuthCredentials, dict[str, str] | None]: - oauth_creds = None - service_creds = None - new_creds_dict = None - if DB_CREDENTIALS_DICT_TOKEN_KEY in credentials: - access_token_json_str = cast(str, credentials[DB_CREDENTIALS_DICT_TOKEN_KEY]) - oauth_creds = get_google_drive_creds_for_authorized_user( - token_json_str=access_token_json_str, scopes=scopes - ) - - # tell caller to update token stored in DB if it has changed - # (e.g. the token has been refreshed) - new_creds_json_str = oauth_creds.to_json() if oauth_creds else "" - if new_creds_json_str != access_token_json_str: - new_creds_dict = {DB_CREDENTIALS_DICT_TOKEN_KEY: new_creds_json_str} - - elif DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY in credentials: - service_account_key_json_str = credentials[ - DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY - ] - service_creds = _get_google_drive_creds_for_service_account( - service_account_key_json_str=service_account_key_json_str, - scopes=scopes, - ) - - # "Impersonate" a user if one is specified - delegated_user_email = cast( - str | None, credentials.get(DB_CREDENTIALS_DICT_DELEGATED_USER_KEY) - ) - if delegated_user_email: - service_creds = ( - service_creds.with_subject(delegated_user_email) - if service_creds - else None - ) - - creds: ServiceAccountCredentials | OAuthCredentials | None = ( - oauth_creds or service_creds - ) - if creds is None: - raise PermissionError( - "Unable to access Google Drive - unknown credential structure." - ) - - return creds, new_creds_dict - - -def verify_csrf(credential_id: int, state: str) -> None: - csrf = get_dynamic_config_store().load(KV_CRED_KEY.format(str(credential_id))) - if csrf != state: - raise PermissionError( - "State from Google Drive Connector callback does not match expected" - ) - - -def get_auth_url(credential_id: int) -> str: - creds_str = str(get_dynamic_config_store().load(KV_GOOGLE_DRIVE_CRED_KEY)) - credential_json = json.loads(creds_str) - flow = InstalledAppFlow.from_client_config( - credential_json, - scopes=build_gdrive_scopes(), - redirect_uri=_build_frontend_google_drive_redirect(), - ) - auth_url, _ = flow.authorization_url(prompt="consent") - - parsed_url = cast(ParseResult, urlparse(auth_url)) - params = parse_qs(parsed_url.query) - - get_dynamic_config_store().store( - KV_CRED_KEY.format(credential_id), params.get("state", [None])[0], encrypt=True - ) # type: ignore - return str(auth_url) - - -def update_credential_access_tokens( - auth_code: str, - credential_id: int, - user: User, - db_session: Session, -) -> OAuthCredentials | None: - app_credentials = get_google_app_cred() - flow = InstalledAppFlow.from_client_config( - app_credentials.model_dump(), - scopes=build_gdrive_scopes(), - redirect_uri=_build_frontend_google_drive_redirect(), - ) - flow.fetch_token(code=auth_code) - creds = flow.credentials - token_json_str = creds.to_json() - new_creds_dict = {DB_CREDENTIALS_DICT_TOKEN_KEY: token_json_str} - - if not update_credential_json(credential_id, new_creds_dict, user, db_session): - return None - return creds - - -def build_service_account_creds( - source: DocumentSource, - delegated_user_email: str | None = None, -) -> CredentialBase: - service_account_key = get_service_account_key() - - credential_dict = { - DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY: service_account_key.json(), - } - if delegated_user_email: - credential_dict[DB_CREDENTIALS_DICT_DELEGATED_USER_KEY] = delegated_user_email - - return CredentialBase( - credential_json=credential_dict, - admin_public=True, - source=DocumentSource.GOOGLE_DRIVE, - ) - - -def get_google_app_cred() -> GoogleAppCredentials: - creds_str = str(get_dynamic_config_store().load(KV_GOOGLE_DRIVE_CRED_KEY)) - return GoogleAppCredentials(**json.loads(creds_str)) - - -def upsert_google_app_cred(app_credentials: GoogleAppCredentials) -> None: - get_dynamic_config_store().store( - KV_GOOGLE_DRIVE_CRED_KEY, app_credentials.json(), encrypt=True - ) - - -def delete_google_app_cred() -> None: - get_dynamic_config_store().delete(KV_GOOGLE_DRIVE_CRED_KEY) - - -def get_service_account_key() -> GoogleServiceAccountKey: - creds_str = str( - get_dynamic_config_store().load(KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY) - ) - return GoogleServiceAccountKey(**json.loads(creds_str)) - - -def upsert_service_account_key(service_account_key: GoogleServiceAccountKey) -> None: - get_dynamic_config_store().store( - KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY, service_account_key.json(), encrypt=True - ) - - -def delete_service_account_key() -> None: - get_dynamic_config_store().delete(KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY) diff --git a/backend/danswer/connectors/google_drive/constants.py b/backend/danswer/connectors/google_drive/constants.py index 0cca65c13df..4fdfb23d57b 100644 --- a/backend/danswer/connectors/google_drive/constants.py +++ b/backend/danswer/connectors/google_drive/constants.py @@ -1,7 +1,4 @@ -DB_CREDENTIALS_DICT_TOKEN_KEY = "google_drive_tokens" -DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY = "google_drive_service_account_key" -DB_CREDENTIALS_DICT_DELEGATED_USER_KEY = "google_drive_delegated_user" - -BASE_SCOPES = ["https://www.googleapis.com/auth/drive.readonly"] -FETCH_PERMISSIONS_SCOPES = ["https://www.googleapis.com/auth/drive.metadata.readonly"] -FETCH_GROUPS_SCOPES = ["https://www.googleapis.com/auth/cloud-identity.groups.readonly"] +UNSUPPORTED_FILE_TYPE_CONTENT = "" # keep empty for now +DRIVE_FOLDER_TYPE = "application/vnd.google-apps.folder" +DRIVE_SHORTCUT_TYPE = "application/vnd.google-apps.shortcut" +DRIVE_FILE_TYPE = "application/vnd.google-apps.file" diff --git a/backend/danswer/connectors/google_drive/doc_conversion.py b/backend/danswer/connectors/google_drive/doc_conversion.py new file mode 100644 index 00000000000..44a03818088 --- /dev/null +++ b/backend/danswer/connectors/google_drive/doc_conversion.py @@ -0,0 +1,175 @@ +import io +from datetime import datetime +from datetime import timezone + +from googleapiclient.errors import HttpError # type: ignore + +from danswer.configs.app_configs import CONTINUE_ON_CONNECTOR_FAILURE +from danswer.configs.constants import DocumentSource +from danswer.configs.constants import IGNORE_FOR_QA +from danswer.connectors.google_drive.constants import DRIVE_SHORTCUT_TYPE +from danswer.connectors.google_drive.constants import UNSUPPORTED_FILE_TYPE_CONTENT +from danswer.connectors.google_drive.models import GDriveMimeType +from danswer.connectors.google_drive.models import GoogleDriveFileType +from danswer.connectors.google_drive.section_extraction import get_document_sections +from danswer.connectors.google_utils.resources import GoogleDocsService +from danswer.connectors.google_utils.resources import GoogleDriveService +from danswer.connectors.models import Document +from danswer.connectors.models import Section +from danswer.file_processing.extract_file_text import docx_to_text +from danswer.file_processing.extract_file_text import pptx_to_text +from danswer.file_processing.extract_file_text import read_pdf_file +from danswer.file_processing.unstructured import get_unstructured_api_key +from danswer.file_processing.unstructured import unstructured_to_text +from danswer.utils.logger import setup_logger + +logger = setup_logger() + +# these errors don't represent a failure in the connector, but simply files +# that can't / shouldn't be indexed +ERRORS_TO_CONTINUE_ON = [ + "cannotExportFile", + "exportSizeLimitExceeded", + "cannotDownloadFile", +] + + +def _extract_sections_basic( + file: dict[str, str], service: GoogleDriveService +) -> list[Section]: + mime_type = file["mimeType"] + link = file["webViewLink"] + + if mime_type not in set(item.value for item in GDriveMimeType): + # Unsupported file types can still have a title, finding this way is still useful + return [Section(link=link, text=UNSUPPORTED_FILE_TYPE_CONTENT)] + + try: + if mime_type in [ + GDriveMimeType.DOC.value, + GDriveMimeType.PPT.value, + GDriveMimeType.SPREADSHEET.value, + ]: + export_mime_type = ( + "text/plain" + if mime_type != GDriveMimeType.SPREADSHEET.value + else "text/csv" + ) + text = ( + service.files() + .export(fileId=file["id"], mimeType=export_mime_type) + .execute() + .decode("utf-8") + ) + return [Section(link=link, text=text)] + elif mime_type in [ + GDriveMimeType.PLAIN_TEXT.value, + GDriveMimeType.MARKDOWN.value, + ]: + return [ + Section( + link=link, + text=service.files() + .get_media(fileId=file["id"]) + .execute() + .decode("utf-8"), + ) + ] + if mime_type in [ + GDriveMimeType.WORD_DOC.value, + GDriveMimeType.POWERPOINT.value, + GDriveMimeType.PDF.value, + ]: + response = service.files().get_media(fileId=file["id"]).execute() + if get_unstructured_api_key(): + return [ + Section( + link=link, + text=unstructured_to_text( + file=io.BytesIO(response), + file_name=file.get("name", file["id"]), + ), + ) + ] + + if mime_type == GDriveMimeType.WORD_DOC.value: + return [ + Section(link=link, text=docx_to_text(file=io.BytesIO(response))) + ] + elif mime_type == GDriveMimeType.PDF.value: + text, _ = read_pdf_file(file=io.BytesIO(response)) + return [Section(link=link, text=text)] + elif mime_type == GDriveMimeType.POWERPOINT.value: + return [ + Section(link=link, text=pptx_to_text(file=io.BytesIO(response))) + ] + + return [Section(link=link, text=UNSUPPORTED_FILE_TYPE_CONTENT)] + + except Exception: + return [Section(link=link, text=UNSUPPORTED_FILE_TYPE_CONTENT)] + + +def convert_drive_item_to_document( + file: GoogleDriveFileType, + drive_service: GoogleDriveService, + docs_service: GoogleDocsService, +) -> Document | None: + try: + # Skip files that are shortcuts + if file.get("mimeType") == DRIVE_SHORTCUT_TYPE: + logger.info("Ignoring Drive Shortcut Filetype") + return None + + sections: list[Section] = [] + + # Special handling for Google Docs to preserve structure, link + # to headers + if file.get("mimeType") == GDriveMimeType.DOC.value: + try: + sections = get_document_sections(docs_service, file["id"]) + except Exception as e: + logger.warning( + f"Ran into exception '{e}' when pulling sections from Google Doc '{file['name']}'." + " Falling back to basic extraction." + ) + + # NOTE: this will run for either (1) the above failed or (2) the file is not a Google Doc + if not sections: + try: + # For all other file types just extract the text + sections = _extract_sections_basic(file, drive_service) + + except HttpError as e: + reason = e.error_details[0]["reason"] if e.error_details else e.reason + message = e.error_details[0]["message"] if e.error_details else e.reason + if e.status_code == 403 and reason in ERRORS_TO_CONTINUE_ON: + logger.warning( + f"Could not export file '{file['name']}' due to '{message}', skipping..." + ) + return None + + raise + + if not sections: + return None + + return Document( + id=file["webViewLink"], + sections=sections, + source=DocumentSource.GOOGLE_DRIVE, + semantic_identifier=file["name"], + doc_updated_at=datetime.fromisoformat(file["modifiedTime"]).astimezone( + timezone.utc + ), + metadata={} + if any(section.text for section in sections) + else {IGNORE_FOR_QA: "True"}, + additional_info=file.get("id"), + ) + except Exception as e: + if not CONTINUE_ON_CONNECTOR_FAILURE: + raise e + + logger.exception("Ran into exception when pulling a file from Google Drive") + return None diff --git a/backend/danswer/connectors/google_drive/file_retrieval.py b/backend/danswer/connectors/google_drive/file_retrieval.py new file mode 100644 index 00000000000..4f8cddab6df --- /dev/null +++ b/backend/danswer/connectors/google_drive/file_retrieval.py @@ -0,0 +1,199 @@ +from collections.abc import Callable +from collections.abc import Iterator +from datetime import datetime + +from googleapiclient.discovery import Resource # type: ignore + +from danswer.connectors.google_drive.constants import DRIVE_FOLDER_TYPE +from danswer.connectors.google_drive.constants import DRIVE_SHORTCUT_TYPE +from danswer.connectors.google_drive.models import GoogleDriveFileType +from danswer.connectors.google_utils.google_utils import execute_paginated_retrieval +from danswer.connectors.interfaces import SecondsSinceUnixEpoch +from danswer.utils.logger import setup_logger + +logger = setup_logger() + +FILE_FIELDS = ( + "nextPageToken, files(mimeType, id, name, permissions, modifiedTime, webViewLink, " + "shortcutDetails, owners(emailAddress))" +) +SLIM_FILE_FIELDS = ( + "nextPageToken, files(mimeType, id, name, permissions(emailAddress, type), " + "permissionIds, webViewLink, owners(emailAddress))" +) +FOLDER_FIELDS = "nextPageToken, files(id, name, permissions, modifiedTime, webViewLink, shortcutDetails)" + + +def _generate_time_range_filter( + start: SecondsSinceUnixEpoch | None = None, + end: SecondsSinceUnixEpoch | None = None, +) -> str: + time_range_filter = "" + if start is not None: + time_start = datetime.utcfromtimestamp(start).isoformat() + "Z" + time_range_filter += f" and modifiedTime >= '{time_start}'" + if end is not None: + time_stop = datetime.utcfromtimestamp(end).isoformat() + "Z" + time_range_filter += f" and modifiedTime <= '{time_stop}'" + return time_range_filter + + +def _get_folders_in_parent( + service: Resource, + parent_id: str | None = None, + personal_drive: bool = False, +) -> Iterator[GoogleDriveFileType]: + # Follow shortcuts to folders + query = f"(mimeType = '{DRIVE_FOLDER_TYPE}' or mimeType = '{DRIVE_SHORTCUT_TYPE}')" + query += " and trashed = false" + + if parent_id: + query += f" and '{parent_id}' in parents" + + for file in execute_paginated_retrieval( + retrieval_function=service.files().list, + list_key="files", + corpora="user" if personal_drive else "allDrives", + supportsAllDrives=not personal_drive, + includeItemsFromAllDrives=not personal_drive, + fields=FOLDER_FIELDS, + q=query, + ): + yield file + + +def _get_files_in_parent( + service: Resource, + parent_id: str, + personal_drive: bool, + start: SecondsSinceUnixEpoch | None = None, + end: SecondsSinceUnixEpoch | None = None, + is_slim: bool = False, +) -> Iterator[GoogleDriveFileType]: + query = f"mimeType != '{DRIVE_FOLDER_TYPE}' and '{parent_id}' in parents" + query += " and trashed = false" + query += _generate_time_range_filter(start, end) + + for file in execute_paginated_retrieval( + retrieval_function=service.files().list, + list_key="files", + corpora="user" if personal_drive else "allDrives", + supportsAllDrives=not personal_drive, + includeItemsFromAllDrives=not personal_drive, + fields=SLIM_FILE_FIELDS if is_slim else FILE_FIELDS, + q=query, + ): + yield file + + +def crawl_folders_for_files( + service: Resource, + parent_id: str, + personal_drive: bool, + traversed_parent_ids: set[str], + update_traversed_ids_func: Callable[[str], None], + start: SecondsSinceUnixEpoch | None = None, + end: SecondsSinceUnixEpoch | None = None, +) -> Iterator[GoogleDriveFileType]: + """ + This function starts crawling from any folder. It is slower though. + """ + if parent_id in traversed_parent_ids: + print(f"Skipping subfolder since already traversed: {parent_id}") + return + + update_traversed_ids_func(parent_id) + + yield from _get_files_in_parent( + service=service, + personal_drive=personal_drive, + start=start, + end=end, + parent_id=parent_id, + ) + + for subfolder in _get_folders_in_parent( + service=service, + parent_id=parent_id, + personal_drive=personal_drive, + ): + logger.info("Fetching all files in subfolder: " + subfolder["name"]) + yield from crawl_folders_for_files( + service=service, + parent_id=subfolder["id"], + personal_drive=personal_drive, + traversed_parent_ids=traversed_parent_ids, + update_traversed_ids_func=update_traversed_ids_func, + start=start, + end=end, + ) + + +def get_files_in_shared_drive( + service: Resource, + drive_id: str, + is_slim: bool = False, + cache_folders: bool = True, + update_traversed_ids_func: Callable[[str], None] = lambda _: None, + start: SecondsSinceUnixEpoch | None = None, + end: SecondsSinceUnixEpoch | None = None, +) -> Iterator[GoogleDriveFileType]: + # If we know we are going to folder crawl later, we can cache the folders here + if cache_folders: + # Get all folders being queried and add them to the traversed set + query = f"mimeType = '{DRIVE_FOLDER_TYPE}'" + query += " and trashed = false" + for file in execute_paginated_retrieval( + retrieval_function=service.files().list, + list_key="files", + corpora="drive", + driveId=drive_id, + supportsAllDrives=True, + includeItemsFromAllDrives=True, + fields="nextPageToken, files(id)", + q=query, + ): + update_traversed_ids_func(file["id"]) + + # Get all files in the shared drive + query = f"mimeType != '{DRIVE_FOLDER_TYPE}'" + query += " and trashed = false" + query += _generate_time_range_filter(start, end) + for file in execute_paginated_retrieval( + retrieval_function=service.files().list, + list_key="files", + corpora="drive", + driveId=drive_id, + supportsAllDrives=True, + includeItemsFromAllDrives=True, + fields=SLIM_FILE_FIELDS if is_slim else FILE_FIELDS, + q=query, + ): + yield file + + +def get_files_in_my_drive( + service: Resource, + email: str, + is_slim: bool = False, + start: SecondsSinceUnixEpoch | None = None, + end: SecondsSinceUnixEpoch | None = None, +) -> Iterator[GoogleDriveFileType]: + query = f"mimeType != '{DRIVE_FOLDER_TYPE}' and '{email}' in owners" + query += " and trashed = false" + query += _generate_time_range_filter(start, end) + for file in execute_paginated_retrieval( + retrieval_function=service.files().list, + list_key="files", + corpora="user", + fields=SLIM_FILE_FIELDS if is_slim else FILE_FIELDS, + q=query, + ): + yield file + + +# Just in case we need to get the root folder id +def get_root_folder_id(service: Resource) -> str: + # we dont paginate here because there is only one root folder per user + # https://developers.google.com/drive/api/guides/v2-to-v3-reference + return service.files().get(fileId="root", fields="id").execute()["id"] diff --git a/backend/danswer/connectors/google_drive/models.py b/backend/danswer/connectors/google_drive/models.py new file mode 100644 index 00000000000..5bb06f3c206 --- /dev/null +++ b/backend/danswer/connectors/google_drive/models.py @@ -0,0 +1,18 @@ +from enum import Enum +from typing import Any + + +class GDriveMimeType(str, Enum): + DOC = "application/vnd.google-apps.document" + SPREADSHEET = "application/vnd.google-apps.spreadsheet" + PDF = "application/pdf" + WORD_DOC = "application/vnd.openxmlformats-officedocument.wordprocessingml.document" + PPT = "application/vnd.google-apps.presentation" + POWERPOINT = ( + "application/vnd.openxmlformats-officedocument.presentationml.presentation" + ) + PLAIN_TEXT = "text/plain" + MARKDOWN = "text/markdown" + + +GoogleDriveFileType = dict[str, Any] diff --git a/backend/danswer/connectors/google_drive/section_extraction.py b/backend/danswer/connectors/google_drive/section_extraction.py new file mode 100644 index 00000000000..b5809204893 --- /dev/null +++ b/backend/danswer/connectors/google_drive/section_extraction.py @@ -0,0 +1,105 @@ +from typing import Any + +from pydantic import BaseModel + +from danswer.connectors.google_utils.resources import GoogleDocsService +from danswer.connectors.models import Section + + +class CurrentHeading(BaseModel): + id: str + text: str + + +def _build_gdoc_section_link(doc_id: str, heading_id: str) -> str: + """Builds a Google Doc link that jumps to a specific heading""" + # NOTE: doesn't support docs with multiple tabs atm, if we need that ask + # @Chris + return ( + f"https://docs.google.com/document/d/{doc_id}/edit?tab=t.0#heading={heading_id}" + ) + + +def _extract_id_from_heading(paragraph: dict[str, Any]) -> str: + """Extracts the id from a heading paragraph element""" + return paragraph["paragraphStyle"]["headingId"] + + +def _extract_text_from_paragraph(paragraph: dict[str, Any]) -> str: + """Extracts the text content from a paragraph element""" + text_elements = [] + for element in paragraph.get("elements", []): + if "textRun" in element: + text_elements.append(element["textRun"].get("content", "")) + return "".join(text_elements) + + +def get_document_sections( + docs_service: GoogleDocsService, + doc_id: str, +) -> list[Section]: + """Extracts sections from a Google Doc, including their headings and content""" + # Fetch the document structure + doc = docs_service.documents().get(documentId=doc_id).execute() + + # Get the content + content = doc.get("body", {}).get("content", []) + + sections: list[Section] = [] + current_section: list[str] = [] + current_heading: CurrentHeading | None = None + + for element in content: + if "paragraph" not in element: + continue + + paragraph = element["paragraph"] + + # Check if this is a heading + if ( + "paragraphStyle" in paragraph + and "namedStyleType" in paragraph["paragraphStyle"] + ): + style = paragraph["paragraphStyle"]["namedStyleType"] + is_heading = style.startswith("HEADING_") + is_title = style.startswith("TITLE") + + if is_heading or is_title: + # If we were building a previous section, add it to sections list + if current_heading is not None and current_section: + heading_text = current_heading.text + section_text = f"{heading_text}\n" + "\n".join(current_section) + sections.append( + Section( + text=section_text.strip(), + link=_build_gdoc_section_link(doc_id, current_heading.id), + ) + ) + current_section = [] + + # Start new heading + heading_id = _extract_id_from_heading(paragraph) + heading_text = _extract_text_from_paragraph(paragraph) + current_heading = CurrentHeading( + id=heading_id, + text=heading_text, + ) + continue + + # Add content to current section + if current_heading is not None: + text = _extract_text_from_paragraph(paragraph) + if text.strip(): + current_section.append(text) + + # Don't forget to add the last section + if current_heading is not None and current_section: + section_text = f"{current_heading.text}\n" + "\n".join(current_section) + sections.append( + Section( + text=section_text.strip(), + link=_build_gdoc_section_link(doc_id, current_heading.id), + ) + ) + + return sections diff --git a/backend/throttle.ctrl b/backend/danswer/connectors/google_utils/__init__.py similarity index 100% rename from backend/throttle.ctrl rename to backend/danswer/connectors/google_utils/__init__.py diff --git a/backend/danswer/connectors/google_utils/google_auth.py b/backend/danswer/connectors/google_utils/google_auth.py new file mode 100644 index 00000000000..8a8c59d6af3 --- /dev/null +++ b/backend/danswer/connectors/google_utils/google_auth.py @@ -0,0 +1,107 @@ +import json +from typing import cast + +from google.auth.transport.requests import Request # type: ignore +from google.oauth2.credentials import Credentials as OAuthCredentials # type: ignore +from google.oauth2.service_account import Credentials as ServiceAccountCredentials # type: ignore + +from danswer.configs.constants import DocumentSource +from danswer.connectors.google_utils.shared_constants import ( + DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY, +) +from danswer.connectors.google_utils.shared_constants import ( + DB_CREDENTIALS_DICT_TOKEN_KEY, +) +from danswer.connectors.google_utils.shared_constants import ( + DB_CREDENTIALS_PRIMARY_ADMIN_KEY, +) +from danswer.connectors.google_utils.shared_constants import ( + GOOGLE_SCOPES, +) +from danswer.utils.logger import setup_logger + +logger = setup_logger() + + +def get_google_oauth_creds( + token_json_str: str, source: DocumentSource +) -> OAuthCredentials | None: + creds_json = json.loads(token_json_str) + creds = OAuthCredentials.from_authorized_user_info( + info=creds_json, + scopes=GOOGLE_SCOPES[source], + ) + if creds.valid: + return creds + + if creds.expired and creds.refresh_token: + try: + creds.refresh(Request()) + if creds.valid: + logger.notice("Refreshed Google Drive tokens.") + return creds + except Exception: + logger.exception("Failed to refresh google drive access token due to:") + return None + + return None + + +def get_google_creds( + credentials: dict[str, str], + source: DocumentSource, +) -> tuple[ServiceAccountCredentials | OAuthCredentials, dict[str, str] | None]: + """Checks for two different types of credentials. + (1) A credential which holds a token acquired via a user going thorough + the Google OAuth flow. + (2) A credential which holds a service account key JSON file, which + can then be used to impersonate any user in the workspace. + """ + oauth_creds = None + service_creds = None + new_creds_dict = None + if DB_CREDENTIALS_DICT_TOKEN_KEY in credentials: + # OAUTH + access_token_json_str = cast(str, credentials[DB_CREDENTIALS_DICT_TOKEN_KEY]) + oauth_creds = get_google_oauth_creds( + token_json_str=access_token_json_str, source=source + ) + + # tell caller to update token stored in DB if it has changed + # (e.g. the token has been refreshed) + new_creds_json_str = oauth_creds.to_json() if oauth_creds else "" + if new_creds_json_str != access_token_json_str: + new_creds_dict = { + DB_CREDENTIALS_DICT_TOKEN_KEY: new_creds_json_str, + DB_CREDENTIALS_PRIMARY_ADMIN_KEY: credentials[ + DB_CREDENTIALS_PRIMARY_ADMIN_KEY + ], + } + elif DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY in credentials: + # SERVICE ACCOUNT + service_account_key_json_str = credentials[ + DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY + ] + service_account_key = json.loads(service_account_key_json_str) + + service_creds = ServiceAccountCredentials.from_service_account_info( + service_account_key, scopes=GOOGLE_SCOPES[source] + ) + + if not service_creds.valid or not service_creds.expired: + service_creds.refresh(Request()) + + if not service_creds.valid: + raise PermissionError( + f"Unable to access {source} - service account credentials are invalid." + ) + + creds: ServiceAccountCredentials | OAuthCredentials | None = ( + oauth_creds or service_creds + ) + if creds is None: + raise PermissionError( + f"Unable to access {source} - unknown credential structure." + ) + + return creds, new_creds_dict diff --git a/backend/danswer/connectors/google_utils/google_kv.py b/backend/danswer/connectors/google_utils/google_kv.py new file mode 100644 index 00000000000..1231293be69 --- /dev/null +++ b/backend/danswer/connectors/google_utils/google_kv.py @@ -0,0 +1,207 @@ +import json +from typing import cast +from urllib.parse import parse_qs +from urllib.parse import ParseResult +from urllib.parse import urlparse + +from google.oauth2.credentials import Credentials as OAuthCredentials # type: ignore +from google_auth_oauthlib.flow import InstalledAppFlow # type: ignore +from googleapiclient.discovery import build # type: ignore +from sqlalchemy.orm import Session + +from danswer.configs.app_configs import WEB_DOMAIN +from danswer.configs.constants import DocumentSource +from danswer.configs.constants import KV_CRED_KEY +from danswer.configs.constants import KV_GMAIL_CRED_KEY +from danswer.configs.constants import KV_GMAIL_SERVICE_ACCOUNT_KEY +from danswer.configs.constants import KV_GOOGLE_DRIVE_CRED_KEY +from danswer.configs.constants import KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY +from danswer.connectors.google_utils.shared_constants import ( + DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY, +) +from danswer.connectors.google_utils.shared_constants import ( + DB_CREDENTIALS_DICT_TOKEN_KEY, +) +from danswer.connectors.google_utils.shared_constants import ( + DB_CREDENTIALS_PRIMARY_ADMIN_KEY, +) +from danswer.connectors.google_utils.shared_constants import ( + GOOGLE_SCOPES, +) +from danswer.connectors.google_utils.shared_constants import ( + MISSING_SCOPES_ERROR_STR, +) +from danswer.connectors.google_utils.shared_constants import ( + ONYX_SCOPE_INSTRUCTIONS, +) +from danswer.db.credentials import update_credential_json +from danswer.db.models import User +from danswer.key_value_store.factory import get_kv_store +from danswer.server.documents.models import CredentialBase +from danswer.server.documents.models import GoogleAppCredentials +from danswer.server.documents.models import GoogleServiceAccountKey +from danswer.utils.logger import setup_logger + +logger = setup_logger() + + +def _build_frontend_google_drive_redirect() -> str: + return f"{WEB_DOMAIN}/admin/connectors/google-drive/auth/callback" + + +def verify_csrf(credential_id: int, state: str) -> None: + csrf = get_kv_store().load(KV_CRED_KEY.format(str(credential_id))) + if csrf != state: + raise PermissionError( + "State from Google Drive Connector callback does not match expected" + ) + + +def update_credential_access_tokens( + auth_code: str, + credential_id: int, + user: User, + db_session: Session, + source: DocumentSource, +) -> OAuthCredentials | None: + app_credentials = get_google_app_cred(source) + flow = InstalledAppFlow.from_client_config( + app_credentials.model_dump(), + scopes=GOOGLE_SCOPES, + redirect_uri=_build_frontend_google_drive_redirect(), + ) + flow.fetch_token(code=auth_code) + creds = flow.credentials + token_json_str = creds.to_json() + + # Get user email from Google API so we know who + # the primary admin is for this connector + try: + admin_service = build("drive", "v3", credentials=creds) + user_info = ( + admin_service.about() + .get( + fields="user(emailAddress)", + ) + .execute() + ) + email = user_info.get("user", {}).get("emailAddress") + except Exception as e: + if MISSING_SCOPES_ERROR_STR in str(e): + raise PermissionError(ONYX_SCOPE_INSTRUCTIONS) from e + raise e + + new_creds_dict = { + DB_CREDENTIALS_DICT_TOKEN_KEY: token_json_str, + DB_CREDENTIALS_PRIMARY_ADMIN_KEY: email, + } + + if not update_credential_json(credential_id, new_creds_dict, user, db_session): + return None + return creds + + +def build_service_account_creds( + source: DocumentSource, + primary_admin_email: str | None = None, +) -> CredentialBase: + service_account_key = get_service_account_key(source=source) + + credential_dict = { + DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY: service_account_key.json(), + } + if primary_admin_email: + credential_dict[DB_CREDENTIALS_PRIMARY_ADMIN_KEY] = primary_admin_email + + return CredentialBase( + credential_json=credential_dict, + admin_public=True, + source=source, + ) + + +def get_auth_url(credential_id: int) -> str: + creds_str = str(get_kv_store().load(KV_GOOGLE_DRIVE_CRED_KEY)) + credential_json = json.loads(creds_str) + flow = InstalledAppFlow.from_client_config( + credential_json, + scopes=GOOGLE_SCOPES, + redirect_uri=_build_frontend_google_drive_redirect(), + ) + auth_url, _ = flow.authorization_url(prompt="consent") + + parsed_url = cast(ParseResult, urlparse(auth_url)) + params = parse_qs(parsed_url.query) + + get_kv_store().store( + KV_CRED_KEY.format(credential_id), params.get("state", [None])[0], encrypt=True + ) # type: ignore + return str(auth_url) + + +def get_google_app_cred(source: DocumentSource) -> GoogleAppCredentials: + if source == DocumentSource.GOOGLE_DRIVE: + creds_str = str(get_kv_store().load(KV_GOOGLE_DRIVE_CRED_KEY)) + elif source == DocumentSource.GMAIL: + creds_str = str(get_kv_store().load(KV_GMAIL_CRED_KEY)) + else: + raise ValueError(f"Unsupported source: {source}") + return GoogleAppCredentials(**json.loads(creds_str)) + + +def upsert_google_app_cred( + app_credentials: GoogleAppCredentials, source: DocumentSource +) -> None: + if source == DocumentSource.GOOGLE_DRIVE: + get_kv_store().store( + KV_GOOGLE_DRIVE_CRED_KEY, app_credentials.json(), encrypt=True + ) + elif source == DocumentSource.GMAIL: + get_kv_store().store(KV_GMAIL_CRED_KEY, app_credentials.json(), encrypt=True) + else: + raise ValueError(f"Unsupported source: {source}") + + +def delete_google_app_cred(source: DocumentSource) -> None: + if source == DocumentSource.GOOGLE_DRIVE: + get_kv_store().delete(KV_GOOGLE_DRIVE_CRED_KEY) + elif source == DocumentSource.GMAIL: + get_kv_store().delete(KV_GMAIL_CRED_KEY) + else: + raise ValueError(f"Unsupported source: {source}") + + +def get_service_account_key(source: DocumentSource) -> GoogleServiceAccountKey: + if source == DocumentSource.GOOGLE_DRIVE: + creds_str = str(get_kv_store().load(KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY)) + elif source == DocumentSource.GMAIL: + creds_str = str(get_kv_store().load(KV_GMAIL_SERVICE_ACCOUNT_KEY)) + else: + raise ValueError(f"Unsupported source: {source}") + return GoogleServiceAccountKey(**json.loads(creds_str)) + + +def upsert_service_account_key( + service_account_key: GoogleServiceAccountKey, source: DocumentSource +) -> None: + if source == DocumentSource.GOOGLE_DRIVE: + get_kv_store().store( + KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY, + service_account_key.json(), + encrypt=True, + ) + elif source == DocumentSource.GMAIL: + get_kv_store().store( + KV_GMAIL_SERVICE_ACCOUNT_KEY, service_account_key.json(), encrypt=True + ) + else: + raise ValueError(f"Unsupported source: {source}") + + +def delete_service_account_key(source: DocumentSource) -> None: + if source == DocumentSource.GOOGLE_DRIVE: + get_kv_store().delete(KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY) + elif source == DocumentSource.GMAIL: + get_kv_store().delete(KV_GMAIL_SERVICE_ACCOUNT_KEY) + else: + raise ValueError(f"Unsupported source: {source}") diff --git a/backend/danswer/connectors/google_utils/google_utils.py b/backend/danswer/connectors/google_utils/google_utils.py new file mode 100644 index 00000000000..037ec3731ac --- /dev/null +++ b/backend/danswer/connectors/google_utils/google_utils.py @@ -0,0 +1,102 @@ +import re +import time +from collections.abc import Callable +from collections.abc import Iterator +from datetime import datetime +from datetime import timezone +from typing import Any + +from googleapiclient.errors import HttpError # type: ignore + +from danswer.connectors.google_drive.models import GoogleDriveFileType +from danswer.utils.logger import setup_logger +from danswer.utils.retry_wrapper import retry_builder + +logger = setup_logger() + + +# Google Drive APIs are quite flakey and may 500 for an +# extended period of time. Trying to combat here by adding a very +# long retry period (~20 minutes of trying every minute) +add_retries = retry_builder(tries=50, max_delay=30) + + +def _execute_with_retry(request: Any) -> Any: + max_attempts = 10 + attempt = 0 + + while attempt < max_attempts: + # Note for reasons unknown, the Google API will sometimes return a 429 + # and even after waiting the retry period, it will return another 429. + # It could be due to a few possibilities: + # 1. Other things are also requesting from the Gmail API with the same key + # 2. It's a rolling rate limit so the moment we get some amount of requests cleared, we hit it again very quickly + # 3. The retry-after has a maximum and we've already hit the limit for the day + # or it's something else... + try: + return request.execute() + except HttpError as error: + attempt += 1 + + if error.resp.status == 429: + # Attempt to get 'Retry-After' from headers + retry_after = error.resp.get("Retry-After") + if retry_after: + sleep_time = int(retry_after) + else: + # Extract 'Retry after' timestamp from error message + match = re.search( + r"Retry after (\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z)", + str(error), + ) + if match: + retry_after_timestamp = match.group(1) + retry_after_dt = datetime.strptime( + retry_after_timestamp, "%Y-%m-%dT%H:%M:%S.%fZ" + ).replace(tzinfo=timezone.utc) + current_time = datetime.now(timezone.utc) + sleep_time = max( + int((retry_after_dt - current_time).total_seconds()), + 0, + ) + else: + logger.error( + f"No Retry-After header or timestamp found in error message: {error}" + ) + sleep_time = 60 + + sleep_time += 3 # Add a buffer to be safe + + logger.info( + f"Rate limit exceeded. Attempt {attempt}/{max_attempts}. Sleeping for {sleep_time} seconds." + ) + time.sleep(sleep_time) + + else: + raise + + # If we've exhausted all attempts + raise Exception(f"Failed to execute request after {max_attempts} attempts") + + +def execute_paginated_retrieval( + retrieval_function: Callable, + list_key: str, + **kwargs: Any, +) -> Iterator[GoogleDriveFileType]: + """Execute a paginated retrieval from Google Drive API + Args: + retrieval_function: The specific list function to call (e.g., service.files().list) + **kwargs: Arguments to pass to the list function + """ + next_page_token = "" + while next_page_token is not None: + request_kwargs = kwargs.copy() + if next_page_token: + request_kwargs["pageToken"] = next_page_token + + results = add_retries(lambda: retrieval_function(**request_kwargs).execute())() + + next_page_token = results.get("nextPageToken") + for item in results.get(list_key, []): + yield item diff --git a/backend/danswer/connectors/google_utils/resources.py b/backend/danswer/connectors/google_utils/resources.py new file mode 100644 index 00000000000..341c99df155 --- /dev/null +++ b/backend/danswer/connectors/google_utils/resources.py @@ -0,0 +1,63 @@ +from google.oauth2.credentials import Credentials as OAuthCredentials # type: ignore +from google.oauth2.service_account import Credentials as ServiceAccountCredentials # type: ignore +from googleapiclient.discovery import build # type: ignore +from googleapiclient.discovery import Resource # type: ignore + + +class GoogleDriveService(Resource): + pass + + +class GoogleDocsService(Resource): + pass + + +class AdminService(Resource): + pass + + +class GmailService(Resource): + pass + + +def _get_google_service( + service_name: str, + service_version: str, + creds: ServiceAccountCredentials | OAuthCredentials, + user_email: str | None = None, +) -> GoogleDriveService | GoogleDocsService | AdminService | GmailService: + if isinstance(creds, ServiceAccountCredentials): + creds = creds.with_subject(user_email) + service = build(service_name, service_version, credentials=creds) + elif isinstance(creds, OAuthCredentials): + service = build(service_name, service_version, credentials=creds) + + return service + + +def get_google_docs_service( + creds: ServiceAccountCredentials | OAuthCredentials, + user_email: str | None = None, +) -> GoogleDocsService: + return _get_google_service("docs", "v1", creds, user_email) + + +def get_drive_service( + creds: ServiceAccountCredentials | OAuthCredentials, + user_email: str | None = None, +) -> GoogleDriveService: + return _get_google_service("drive", "v3", creds, user_email) + + +def get_admin_service( + creds: ServiceAccountCredentials | OAuthCredentials, + user_email: str, +) -> AdminService: + return _get_google_service("admin", "directory_v1", creds, user_email) + + +def get_gmail_service( + creds: ServiceAccountCredentials | OAuthCredentials, + user_email: str, +) -> GmailService: + return _get_google_service("gmail", "v1", creds, user_email) diff --git a/backend/danswer/connectors/google_utils/shared_constants.py b/backend/danswer/connectors/google_utils/shared_constants.py new file mode 100644 index 00000000000..3aa3d1048b6 --- /dev/null +++ b/backend/danswer/connectors/google_utils/shared_constants.py @@ -0,0 +1,40 @@ +from danswer.configs.constants import DocumentSource + +# NOTE: do not need https://www.googleapis.com/auth/documents.readonly +# this is counted under `/auth/drive.readonly` +GOOGLE_SCOPES = { + DocumentSource.GOOGLE_DRIVE: [ + "https://www.googleapis.com/auth/drive.readonly", + "https://www.googleapis.com/auth/drive.metadata.readonly", + "https://www.googleapis.com/auth/admin.directory.group.readonly", + "https://www.googleapis.com/auth/admin.directory.user.readonly", + ], + DocumentSource.GMAIL: [ + "https://www.googleapis.com/auth/gmail.readonly", + "https://www.googleapis.com/auth/admin.directory.user.readonly", + "https://www.googleapis.com/auth/admin.directory.group.readonly", + ], +} + +# This is the Oauth token +DB_CREDENTIALS_DICT_TOKEN_KEY = "google_tokens" +# This is the service account key +DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY = "google_service_account_key" +# The email saved for both auth types +DB_CREDENTIALS_PRIMARY_ADMIN_KEY = "google_primary_admin" + +USER_FIELDS = "nextPageToken, users(primaryEmail)" + +# Error message substrings +MISSING_SCOPES_ERROR_STR = "client not authorized for any of the scopes requested" + +# Documentation and error messages +SCOPE_DOC_URL = "https://docs.danswer.dev/connectors/google_drive/overview" +ONYX_SCOPE_INSTRUCTIONS = ( + "You have upgraded Danswer without updating the Google Drive scopes. " + f"Please refer to the documentation to learn how to update the scopes: {SCOPE_DOC_URL}" +) + + +# This is the maximum number of threads that can be retrieved at once +SLIM_BATCH_SIZE = 500 diff --git a/backend/danswer/connectors/guru/connector.py b/backend/danswer/connectors/guru/connector.py index a27546425d3..510105b872f 100644 --- a/backend/danswer/connectors/guru/connector.py +++ b/backend/danswer/connectors/guru/connector.py @@ -19,13 +19,14 @@ from danswer.file_processing.html_utils import parse_html_page_basic from danswer.utils.logger import setup_logger + +logger = setup_logger() + # Potential Improvements # 1. Support fetching per collection via collection token (configured at connector creation) - GURU_API_BASE = "https://api.getguru.com/api/v1/" GURU_QUERY_ENDPOINT = GURU_API_BASE + "search/query" GURU_CARDS_URL = "https://app.getguru.com/card/" -logger = setup_logger() def unixtime_to_guru_time_str(unix_time: SecondsSinceUnixEpoch) -> str: diff --git a/backend/danswer/connectors/interfaces.py b/backend/danswer/connectors/interfaces.py index 3bd99792cce..c53b3de5f2f 100644 --- a/backend/danswer/connectors/interfaces.py +++ b/backend/danswer/connectors/interfaces.py @@ -3,14 +3,18 @@ from typing import Any from danswer.connectors.models import Document +from danswer.connectors.models import SlimDocument SecondsSinceUnixEpoch = float GenerateDocumentsOutput = Iterator[list[Document]] +GenerateSlimDocumentOutput = Iterator[list[SlimDocument]] class BaseConnector(abc.ABC): + REDIS_KEY_PREFIX = "da_connector_data:" + @abc.abstractmethod def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: raise NotImplementedError @@ -50,9 +54,13 @@ def poll_source( raise NotImplementedError -class IdConnector(BaseConnector): +class SlimConnector(BaseConnector): @abc.abstractmethod - def retrieve_all_source_ids(self) -> set[str]: + def retrieve_all_slim_documents( + self, + start: SecondsSinceUnixEpoch | None = None, + end: SecondsSinceUnixEpoch | None = None, + ) -> GenerateSlimDocumentOutput: raise NotImplementedError diff --git a/backend/danswer/connectors/linear/connector.py b/backend/danswer/connectors/linear/connector.py index 8455b20f5ba..22b769562d1 100644 --- a/backend/danswer/connectors/linear/connector.py +++ b/backend/danswer/connectors/linear/connector.py @@ -18,6 +18,7 @@ from danswer.connectors.models import Section from danswer.utils.logger import setup_logger + logger = setup_logger() _NUM_RETRIES = 5 diff --git a/backend/danswer/connectors/loopio/connector.py b/backend/danswer/connectors/loopio/connector.py index e10bed87617..d3bdfe503f7 100644 --- a/backend/danswer/connectors/loopio/connector.py +++ b/backend/danswer/connectors/loopio/connector.py @@ -161,7 +161,7 @@ def _process_entries( ] doc_batch.append( Document( - id=entry["id"], + id=str(entry["id"]), sections=[Section(link=link, text=content_text)], source=DocumentSource.LOOPIO, semantic_identifier=questions[0], diff --git a/backend/danswer/connectors/mediawiki/family.py b/backend/danswer/connectors/mediawiki/family.py index 0d953066700..7554dd0f3cd 100644 --- a/backend/danswer/connectors/mediawiki/family.py +++ b/backend/danswer/connectors/mediawiki/family.py @@ -3,6 +3,7 @@ import builtins import functools import itertools +import tempfile from typing import Any from unittest import mock from urllib.parse import urlparse @@ -18,6 +19,8 @@ logger = setup_logger() +pywikibot.config.base_dir = tempfile.TemporaryDirectory().name + @mock.patch.object( builtins, "print", lambda *args: logger.info("\t".join(map(str, args))) @@ -45,8 +48,7 @@ def __init__( if any(x not in generate_family_file.NAME_CHARACTERS for x in name): raise ValueError( - 'ERROR: Name of family "{}" must be ASCII letters and digits [a-zA-Z0-9]', - name, + f'ERROR: Name of family "{name}" must be ASCII letters and digits [a-zA-Z0-9]', ) if isinstance(dointerwiki, bool): diff --git a/backend/danswer/connectors/mediawiki/wiki.py b/backend/danswer/connectors/mediawiki/wiki.py index f4ec1e02311..27edea871dc 100644 --- a/backend/danswer/connectors/mediawiki/wiki.py +++ b/backend/danswer/connectors/mediawiki/wiki.py @@ -2,7 +2,9 @@ import datetime import itertools +import tempfile from collections.abc import Generator +from collections.abc import Iterator from typing import Any from typing import ClassVar @@ -19,6 +21,12 @@ from danswer.connectors.mediawiki.family import family_class_dispatch from danswer.connectors.models import Document from danswer.connectors.models import Section +from danswer.utils.logger import setup_logger + + +logger = setup_logger() + +pywikibot.config.base_dir = tempfile.TemporaryDirectory().name def pywikibot_timestamp_to_utc_datetime( @@ -74,7 +82,7 @@ def get_doc_from_page( sections=sections, semantic_identifier=page.title(), metadata={"categories": [category.title() for category in page.categories()]}, - id=page.pageid, + id=f"MEDIAWIKI_{page.pageid}_{page.full_url()}", ) @@ -116,14 +124,18 @@ def __init__( self.batch_size = batch_size # short names can only have ascii letters and digits - - self.family = family_class_dispatch(hostname, "Wikipedia Connector")() + self.family = family_class_dispatch(hostname, "WikipediaConnector")() self.site = pywikibot.Site(fam=self.family, code=language_code) self.categories = [ pywikibot.Category(self.site, f"Category:{category.replace(' ', '_')}") for category in categories ] - self.pages = [pywikibot.Page(self.site, page) for page in pages] + + self.pages = [] + for page in pages: + if not page: + continue + self.pages.append(pywikibot.Page(self.site, page)) def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: """Load credentials for a MediaWiki site. @@ -169,8 +181,13 @@ def _get_doc_batch( ] # Since we can specify both individual pages and categories, we need to iterate over all of them. - all_pages = itertools.chain(self.pages, *category_pages) + all_pages: Iterator[pywikibot.Page] = itertools.chain( + self.pages, *category_pages + ) for page in all_pages: + logger.info( + f"MediaWikiConnector: title='{page.title()}' url={page.full_url()}" + ) doc_batch.append( get_doc_from_page(page, self.site, self.document_source_type) ) @@ -216,5 +233,7 @@ def poll_source( print("All docs", all_docs) current = datetime.datetime.now().timestamp() one_day_ago = current - 30 * 24 * 60 * 60 # 30 days + latest_docs = list(test_connector.poll_source(one_day_ago, current)) + print("Latest docs", latest_docs) diff --git a/backend/danswer/connectors/models.py b/backend/danswer/connectors/models.py index 7d86d21980d..ba1368dc944 100644 --- a/backend/danswer/connectors/models.py +++ b/backend/danswer/connectors/models.py @@ -14,7 +14,7 @@ class InputType(str, Enum): LOAD_STATE = "load_state" # e.g. loading a current full state or a save state, such as from a file POLL = "poll" # e.g. calling an API to get all documents in the last hour EVENT = "event" # e.g. registered an endpoint as a listener, and processing connector events - PRUNE = "prune" + SLIM_RETRIEVAL = "slim_retrieval" class ConnectorMissingCredentialError(PermissionError): @@ -169,6 +169,11 @@ def from_base(cls, base: DocumentBase) -> "Document": ) +class SlimDocument(BaseModel): + id: str + perm_sync_data: Any | None = None + + class DocumentErrorSummary(BaseModel): id: str semantic_id: str diff --git a/backend/danswer/connectors/notion/connector.py b/backend/danswer/connectors/notion/connector.py index 7878434da04..4680c3d044b 100644 --- a/backend/danswer/connectors/notion/connector.py +++ b/backend/danswer/connectors/notion/connector.py @@ -29,6 +29,9 @@ _NOTION_CALL_TIMEOUT = 30 # 30 seconds +# TODO: Tables need to be ingested, Pages need to have their metadata ingested + + @dataclass class NotionPage: """Represents a Notion Page object""" @@ -40,6 +43,8 @@ class NotionPage: properties: dict[str, Any] url: str + database_name: str | None # Only applicable to the database type page (wiki) + def __init__(self, **kwargs: dict[str, Any]) -> None: names = set([f.name for f in fields(self)]) for k, v in kwargs.items(): @@ -47,6 +52,17 @@ def __init__(self, **kwargs: dict[str, Any]) -> None: setattr(self, k, v) +@dataclass +class NotionBlock: + """Represents a Notion Block object""" + + id: str # Used for the URL + text: str + # In a plaintext representation of the page, how this block should be joined + # with the existing text up to this point, separated out from text for clarity + prefix: str + + @dataclass class NotionSearchResponse: """Represents the response from the Notion Search API""" @@ -62,7 +78,6 @@ def __init__(self, **kwargs: dict[str, Any]) -> None: setattr(self, k, v) -# TODO - Add the ability to optionally limit to specific Notion databases class NotionConnector(LoadConnector, PollConnector): """Notion Page connector that reads all Notion pages this integration has been granted access to. @@ -119,28 +134,59 @@ def _fetch_child_blocks( f"This is likely due to the block not being shared " f"with the Danswer integration. Exact exception:\n\n{e}" ) - return None - logger.exception(f"Error fetching blocks - {res.json()}") - raise e + else: + logger.exception( + f"Error fetching blocks with status code {res.status_code}: {res.json()}" + ) + + # This can occasionally happen, the reason is unknown and cannot be reproduced on our internal Notion + # Assuming this will not be a critical loss of data + return None return res.json() @retry(tries=3, delay=1, backoff=2) def _fetch_page(self, page_id: str) -> NotionPage: - """Fetch a page from it's ID via the Notion API.""" + """Fetch a page from its ID via the Notion API, retry with database if page fetch fails.""" logger.debug(f"Fetching page for ID '{page_id}'") - block_url = f"https://api.notion.com/v1/pages/{page_id}" + page_url = f"https://api.notion.com/v1/pages/{page_id}" res = rl_requests.get( - block_url, + page_url, headers=self.headers, timeout=_NOTION_CALL_TIMEOUT, ) try: res.raise_for_status() except Exception as e: - logger.exception(f"Error fetching page - {res.json()}") - raise e + logger.warning( + f"Failed to fetch page, trying database for ID '{page_id}'. Exception: {e}" + ) + # Try fetching as a database if page fetch fails, this happens if the page is set to a wiki + # it becomes a database from the notion perspective + return self._fetch_database_as_page(page_id) return NotionPage(**res.json()) + @retry(tries=3, delay=1, backoff=2) + def _fetch_database_as_page(self, database_id: str) -> NotionPage: + """Attempt to fetch a database as a page.""" + logger.debug(f"Fetching database for ID '{database_id}' as a page") + database_url = f"https://api.notion.com/v1/databases/{database_id}" + res = rl_requests.get( + database_url, + headers=self.headers, + timeout=_NOTION_CALL_TIMEOUT, + ) + try: + res.raise_for_status() + except Exception as e: + logger.exception(f"Error fetching database as page - {res.json()}") + raise e + database_name = res.json().get("title") + database_name = ( + database_name[0].get("text", {}).get("content") if database_name else None + ) + + return NotionPage(**res.json(), database_name=database_name) + @retry(tries=3, delay=1, backoff=2) def _fetch_database( self, database_id: str, cursor: str | None = None @@ -171,8 +217,86 @@ def _fetch_database( raise e return res.json() - def _read_pages_from_database(self, database_id: str) -> list[str]: - """Returns a list of all page IDs in the database""" + @staticmethod + def _properties_to_str(properties: dict[str, Any]) -> str: + """Converts Notion properties to a string""" + + def _recurse_properties(inner_dict: dict[str, Any]) -> str | None: + while "type" in inner_dict: + type_name = inner_dict["type"] + inner_dict = inner_dict[type_name] + + # If the innermost layer is None, the value is not set + if not inner_dict: + return None + + if isinstance(inner_dict, list): + list_properties = [ + _recurse_properties(item) for item in inner_dict if item + ] + return ( + ", ".join( + [ + list_property + for list_property in list_properties + if list_property + ] + ) + or None + ) + + # TODO there may be more types to handle here + if isinstance(inner_dict, str): + # For some objects the innermost value could just be a string, not sure what causes this + return inner_dict + + elif isinstance(inner_dict, dict): + if "name" in inner_dict: + return inner_dict["name"] + if "content" in inner_dict: + return inner_dict["content"] + start = inner_dict.get("start") + end = inner_dict.get("end") + if start is not None: + if end is not None: + return f"{start} - {end}" + return start + elif end is not None: + return f"Until {end}" + + if "id" in inner_dict: + # This is not useful to index, it's a reference to another Notion object + # and this ID value in plaintext is useless outside of the Notion context + logger.debug("Skipping Notion object id field property") + return None + + logger.debug(f"Unreadable property from innermost prop: {inner_dict}") + return None + + result = "" + for prop_name, prop in properties.items(): + if not prop: + continue + + try: + inner_value = _recurse_properties(prop) + except Exception as e: + # This is not a critical failure, these properties are not the actual contents of the page + # more similar to metadata + logger.warning(f"Error recursing properties for {prop_name}: {e}") + continue + # Not a perfect way to format Notion database tables but there's no perfect representation + # since this must be represented as plaintext + if inner_value: + result += f"{prop_name}: {inner_value}\t" + + return result + + def _read_pages_from_database( + self, database_id: str + ) -> tuple[list[NotionBlock], list[str]]: + """Returns a list of top level blocks and all page IDs in the database""" + result_blocks: list[NotionBlock] = [] result_pages: list[str] = [] cursor = None while True: @@ -181,29 +305,34 @@ def _read_pages_from_database(self, database_id: str) -> list[str]: for result in data["results"]: obj_id = result["id"] obj_type = result["object"] - if obj_type == "page": - logger.debug( - f"Found page with ID '{obj_id}' in database '{database_id}'" - ) - result_pages.append(result["id"]) - elif obj_type == "database": - logger.debug( - f"Found database with ID '{obj_id}' in database '{database_id}'" - ) - result_pages.extend(self._read_pages_from_database(obj_id)) + text = self._properties_to_str(result.get("properties", {})) + if text: + result_blocks.append(NotionBlock(id=obj_id, text=text, prefix="\n")) + + if self.recursive_index_enabled: + if obj_type == "page": + logger.debug( + f"Found page with ID '{obj_id}' in database '{database_id}'" + ) + result_pages.append(result["id"]) + elif obj_type == "database": + logger.debug( + f"Found database with ID '{obj_id}' in database '{database_id}'" + ) + # The inner contents are ignored at this level + _, child_pages = self._read_pages_from_database(obj_id) + result_pages.extend(child_pages) if data["next_cursor"] is None: break cursor = data["next_cursor"] - return result_pages + return result_blocks, result_pages - def _read_blocks( - self, base_block_id: str - ) -> tuple[list[tuple[str, str]], list[str]]: - """Reads all child blocks for the specified block""" - result_lines: list[tuple[str, str]] = [] + def _read_blocks(self, base_block_id: str) -> tuple[list[NotionBlock], list[str]]: + """Reads all child blocks for the specified block, returns a list of blocks and child page ids""" + result_blocks: list[NotionBlock] = [] child_pages: list[str] = [] cursor = None while True: @@ -211,7 +340,7 @@ def _read_blocks( # this happens when a block is not shared with the integration if data is None: - return result_lines, child_pages + return result_blocks, child_pages for result in data["results"]: logger.debug( @@ -255,46 +384,70 @@ def _read_blocks( if result["has_children"]: if result_type == "child_page": + # Child pages will not be included at this top level, it will be a separate document child_pages.append(result_block_id) else: logger.debug(f"Entering sub-block: {result_block_id}") - subblock_result_lines, subblock_child_pages = self._read_blocks( + subblocks, subblock_child_pages = self._read_blocks( result_block_id ) logger.debug(f"Finished sub-block: {result_block_id}") - result_lines.extend(subblock_result_lines) + result_blocks.extend(subblocks) child_pages.extend(subblock_child_pages) - if result_type == "child_database" and self.recursive_index_enabled: - child_pages.extend(self._read_pages_from_database(result_block_id)) - - cur_result_text = "\n".join(cur_result_text_arr) - if cur_result_text: - result_lines.append((cur_result_text, result_block_id)) + if result_type == "child_database": + inner_blocks, inner_child_pages = self._read_pages_from_database( + result_block_id + ) + # A database on a page often looks like a table, we need to include it for the contents + # of the page but the children (cells) should be processed as other Documents + result_blocks.extend(inner_blocks) + + if self.recursive_index_enabled: + child_pages.extend(inner_child_pages) + + if cur_result_text_arr: + new_block = NotionBlock( + id=result_block_id, + text="\n".join(cur_result_text_arr), + prefix="\n", + ) + result_blocks.append(new_block) if data["next_cursor"] is None: break cursor = data["next_cursor"] - return result_lines, child_pages + return result_blocks, child_pages - def _read_page_title(self, page: NotionPage) -> str: + def _read_page_title(self, page: NotionPage) -> str | None: """Extracts the title from a Notion page""" page_title = None + if hasattr(page, "database_name") and page.database_name: + return page.database_name for _, prop in page.properties.items(): if prop["type"] == "title" and len(prop["title"]) > 0: page_title = " ".join([t["plain_text"] for t in prop["title"]]).strip() break - if page_title is None: - page_title = f"Untitled Page [{page.id}]" + return page_title def _read_pages( self, pages: list[NotionPage], ) -> Generator[Document, None, None]: - """Reads pages for rich text content and generates Documents""" + """Reads pages for rich text content and generates Documents + + Note that a page which is turned into a "wiki" becomes a database but both top level pages and top level databases + do not seem to have any properties associated with them. + + Pages that are part of a database can have properties which are like the values of the row in the "database" table + in which they exist + + This is not clearly outlined in the Notion API docs but it is observable empirically. + https://developers.notion.com/docs/working-with-page-content + """ all_child_page_ids: list[str] = [] for page in pages: if page.id in self.indexed_pages: @@ -304,18 +457,23 @@ def _read_pages( logger.info(f"Reading page with ID '{page.id}', with url {page.url}") page_blocks, child_page_ids = self._read_blocks(page.id) all_child_page_ids.extend(child_page_ids) - page_title = self._read_page_title(page) + + if not page_blocks: + continue + + page_title = ( + self._read_page_title(page) or f"Untitled Page with ID {page.id}" + ) + yield ( Document( id=page.id, - # Will add title to the first section later in processing - sections=[Section(link=page.url, text="")] - + [ + sections=[ Section( - link=f"{page.url}#{block_id.replace('-', '')}", - text=block_text, + link=f"{page.url}#{block.id.replace('-', '')}", + text=block.prefix + block.text, ) - for block_text, block_id in page_blocks + for block in page_blocks ], source=DocumentSource.NOTION, semantic_identifier=page_title, diff --git a/backend/danswer/connectors/requesttracker/connector.py b/backend/danswer/connectors/requesttracker/connector.py index 9c4590fc2ef..b520d0d7acf 100644 --- a/backend/danswer/connectors/requesttracker/connector.py +++ b/backend/danswer/connectors/requesttracker/connector.py @@ -1,153 +1,124 @@ -from datetime import datetime -from datetime import timezone -from logging import DEBUG as LOG_LVL_DEBUG -from typing import Any -from typing import List -from typing import Optional - -from rt.rest1 import ALL_QUEUES -from rt.rest1 import Rt - -from danswer.configs.app_configs import INDEX_BATCH_SIZE -from danswer.configs.constants import DocumentSource -from danswer.connectors.interfaces import GenerateDocumentsOutput -from danswer.connectors.interfaces import PollConnector -from danswer.connectors.interfaces import SecondsSinceUnixEpoch -from danswer.connectors.models import ConnectorMissingCredentialError -from danswer.connectors.models import Document -from danswer.connectors.models import Section -from danswer.utils.logger import setup_logger - -logger = setup_logger() - - -class RequestTrackerError(Exception): - pass - - -class RequestTrackerConnector(PollConnector): - def __init__( - self, - batch_size: int = INDEX_BATCH_SIZE, - ) -> None: - self.batch_size = batch_size - - def txn_link(self, tid: int, txn: int) -> str: - return f"{self.rt_base_url}/Ticket/Display.html?id={tid}&txn={txn}" - - def build_doc_sections_from_txn( - self, connection: Rt, ticket_id: int - ) -> List[Section]: - Sections: List[Section] = [] - - get_history_resp = connection.get_history(ticket_id) - - if get_history_resp is None: - raise RequestTrackerError(f"Ticket {ticket_id} cannot be found") - - for tx in get_history_resp: - Sections.append( - Section( - link=self.txn_link(ticket_id, int(tx["id"])), - text="\n".join( - [ - f"{k}:\n{v}\n" if k != "Attachments" else "" - for (k, v) in tx.items() - ] - ), - ) - ) - return Sections - - def load_credentials(self, credentials: dict[str, Any]) -> Optional[dict[str, Any]]: - self.rt_username = credentials.get("requesttracker_username") - self.rt_password = credentials.get("requesttracker_password") - self.rt_base_url = credentials.get("requesttracker_base_url") - return None - - # This does not include RT file attachments yet. - def _process_tickets( - self, start: datetime, end: datetime - ) -> GenerateDocumentsOutput: - if any([self.rt_username, self.rt_password, self.rt_base_url]) is None: - raise ConnectorMissingCredentialError("requesttracker") - - Rt0 = Rt( - f"{self.rt_base_url}/REST/1.0/", - self.rt_username, - self.rt_password, - ) - - Rt0.login() - - d0 = start.strftime("%Y-%m-%d %H:%M:%S") - d1 = end.strftime("%Y-%m-%d %H:%M:%S") - - tickets = Rt0.search( - Queue=ALL_QUEUES, - raw_query=f"Updated > '{d0}' AND Updated < '{d1}'", - ) - - doc_batch: List[Document] = [] - - for ticket in tickets: - ticket_keys_to_omit = ["id", "Subject"] - tid: int = int(ticket["numerical_id"]) - ticketLink: str = f"{self.rt_base_url}/Ticket/Display.html?id={tid}" - logger.info(f"Processing ticket {tid}") - doc = Document( - id=ticket["id"], - # Will add title to the first section later in processing - sections=[Section(link=ticketLink, text="")] - + self.build_doc_sections_from_txn(Rt0, tid), - source=DocumentSource.REQUESTTRACKER, - semantic_identifier=ticket["Subject"], - metadata={ - key: value - for key, value in ticket.items() - if key not in ticket_keys_to_omit - }, - ) - - doc_batch.append(doc) - - if len(doc_batch) >= self.batch_size: - yield doc_batch - doc_batch = [] - - if doc_batch: - yield doc_batch - - def poll_source( - self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch - ) -> GenerateDocumentsOutput: - # Keep query short, only look behind 1 day at maximum - one_day_ago: float = end - (24 * 60 * 60) - _start: float = start if start > one_day_ago else one_day_ago - start_datetime = datetime.fromtimestamp(_start, tz=timezone.utc) - end_datetime = datetime.fromtimestamp(end, tz=timezone.utc) - yield from self._process_tickets(start_datetime, end_datetime) - - -if __name__ == "__main__": - import time - import os - from dotenv import load_dotenv - - load_dotenv() - logger.setLevel(LOG_LVL_DEBUG) - rt_connector = RequestTrackerConnector() - rt_connector.load_credentials( - { - "requesttracker_username": os.getenv("RT_USERNAME"), - "requesttracker_password": os.getenv("RT_PASSWORD"), - "requesttracker_base_url": os.getenv("RT_BASE_URL"), - } - ) - - current = time.time() - one_day_ago = current - (24 * 60 * 60) # 1 days - latest_docs = rt_connector.poll_source(one_day_ago, current) - - for doc in latest_docs: - print(doc) +# from datetime import datetime +# from datetime import timezone +# from logging import DEBUG as LOG_LVL_DEBUG +# from typing import Any +# from typing import List +# from typing import Optional +# from rt.rest1 import ALL_QUEUES +# from rt.rest1 import Rt +# from danswer.configs.app_configs import INDEX_BATCH_SIZE +# from danswer.configs.constants import DocumentSource +# from danswer.connectors.interfaces import GenerateDocumentsOutput +# from danswer.connectors.interfaces import PollConnector +# from danswer.connectors.interfaces import SecondsSinceUnixEpoch +# from danswer.connectors.models import ConnectorMissingCredentialError +# from danswer.connectors.models import Document +# from danswer.connectors.models import Section +# from danswer.utils.logger import setup_logger +# logger = setup_logger() +# class RequestTrackerError(Exception): +# pass +# class RequestTrackerConnector(PollConnector): +# def __init__( +# self, +# batch_size: int = INDEX_BATCH_SIZE, +# ) -> None: +# self.batch_size = batch_size +# def txn_link(self, tid: int, txn: int) -> str: +# return f"{self.rt_base_url}/Ticket/Display.html?id={tid}&txn={txn}" +# def build_doc_sections_from_txn( +# self, connection: Rt, ticket_id: int +# ) -> List[Section]: +# Sections: List[Section] = [] +# get_history_resp = connection.get_history(ticket_id) +# if get_history_resp is None: +# raise RequestTrackerError(f"Ticket {ticket_id} cannot be found") +# for tx in get_history_resp: +# Sections.append( +# Section( +# link=self.txn_link(ticket_id, int(tx["id"])), +# text="\n".join( +# [ +# f"{k}:\n{v}\n" if k != "Attachments" else "" +# for (k, v) in tx.items() +# ] +# ), +# ) +# ) +# return Sections +# def load_credentials(self, credentials: dict[str, Any]) -> Optional[dict[str, Any]]: +# self.rt_username = credentials.get("requesttracker_username") +# self.rt_password = credentials.get("requesttracker_password") +# self.rt_base_url = credentials.get("requesttracker_base_url") +# return None +# # This does not include RT file attachments yet. +# def _process_tickets( +# self, start: datetime, end: datetime +# ) -> GenerateDocumentsOutput: +# if any([self.rt_username, self.rt_password, self.rt_base_url]) is None: +# raise ConnectorMissingCredentialError("requesttracker") +# Rt0 = Rt( +# f"{self.rt_base_url}/REST/1.0/", +# self.rt_username, +# self.rt_password, +# ) +# Rt0.login() +# d0 = start.strftime("%Y-%m-%d %H:%M:%S") +# d1 = end.strftime("%Y-%m-%d %H:%M:%S") +# tickets = Rt0.search( +# Queue=ALL_QUEUES, +# raw_query=f"Updated > '{d0}' AND Updated < '{d1}'", +# ) +# doc_batch: List[Document] = [] +# for ticket in tickets: +# ticket_keys_to_omit = ["id", "Subject"] +# tid: int = int(ticket["numerical_id"]) +# ticketLink: str = f"{self.rt_base_url}/Ticket/Display.html?id={tid}" +# logger.info(f"Processing ticket {tid}") +# doc = Document( +# id=ticket["id"], +# # Will add title to the first section later in processing +# sections=[Section(link=ticketLink, text="")] +# + self.build_doc_sections_from_txn(Rt0, tid), +# source=DocumentSource.REQUESTTRACKER, +# semantic_identifier=ticket["Subject"], +# metadata={ +# key: value +# for key, value in ticket.items() +# if key not in ticket_keys_to_omit +# }, +# ) +# doc_batch.append(doc) +# if len(doc_batch) >= self.batch_size: +# yield doc_batch +# doc_batch = [] +# if doc_batch: +# yield doc_batch +# def poll_source( +# self, start: SecondsSinceUnixEpoch, end: SecondsSinceUnixEpoch +# ) -> GenerateDocumentsOutput: +# # Keep query short, only look behind 1 day at maximum +# one_day_ago: float = end - (24 * 60 * 60) +# _start: float = start if start > one_day_ago else one_day_ago +# start_datetime = datetime.fromtimestamp(_start, tz=timezone.utc) +# end_datetime = datetime.fromtimestamp(end, tz=timezone.utc) +# yield from self._process_tickets(start_datetime, end_datetime) +# if __name__ == "__main__": +# import time +# import os +# from dotenv import load_dotenv +# load_dotenv() +# logger.setLevel(LOG_LVL_DEBUG) +# rt_connector = RequestTrackerConnector() +# rt_connector.load_credentials( +# { +# "requesttracker_username": os.getenv("RT_USERNAME"), +# "requesttracker_password": os.getenv("RT_PASSWORD"), +# "requesttracker_base_url": os.getenv("RT_BASE_URL"), +# } +# ) +# current = time.time() +# one_day_ago = current - (24 * 60 * 60) # 1 days +# latest_docs = rt_connector.poll_source(one_day_ago, current) +# for doc in latest_docs: +# print(doc) diff --git a/backend/danswer/connectors/salesforce/connector.py b/backend/danswer/connectors/salesforce/connector.py index 03326df4efd..1e0fe9e1d3a 100644 --- a/backend/danswer/connectors/salesforce/connector.py +++ b/backend/danswer/connectors/salesforce/connector.py @@ -11,17 +11,25 @@ from danswer.configs.constants import DocumentSource from danswer.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc from danswer.connectors.interfaces import GenerateDocumentsOutput -from danswer.connectors.interfaces import IdConnector +from danswer.connectors.interfaces import GenerateSlimDocumentOutput from danswer.connectors.interfaces import LoadConnector from danswer.connectors.interfaces import PollConnector from danswer.connectors.interfaces import SecondsSinceUnixEpoch +from danswer.connectors.interfaces import SlimConnector from danswer.connectors.models import BasicExpertInfo from danswer.connectors.models import ConnectorMissingCredentialError from danswer.connectors.models import Document from danswer.connectors.models import Section +from danswer.connectors.models import SlimDocument from danswer.connectors.salesforce.utils import extract_dict_text from danswer.utils.logger import setup_logger + +# TODO: this connector does not work well at large scales +# the large query against a large Salesforce instance has been reported to take 1.5 hours. +# Additionally it seems to eat up more memory over time if the connection is long running (again a scale issue). + + DEFAULT_PARENT_OBJECT_TYPES = ["Account"] MAX_QUERY_LENGTH = 10000 # max query length is 20,000 characters ID_PREFIX = "SALESFORCE_" @@ -29,7 +37,7 @@ logger = setup_logger() -class SalesforceConnector(LoadConnector, PollConnector, IdConnector): +class SalesforceConnector(LoadConnector, PollConnector, SlimConnector): def __init__( self, batch_size: int = INDEX_BATCH_SIZE, @@ -243,19 +251,26 @@ def poll_source( end_datetime = datetime.utcfromtimestamp(end) return self._fetch_from_salesforce(start=start_datetime, end=end_datetime) - def retrieve_all_source_ids(self) -> set[str]: + def retrieve_all_slim_documents( + self, + start: SecondsSinceUnixEpoch | None = None, + end: SecondsSinceUnixEpoch | None = None, + ) -> GenerateSlimDocumentOutput: if self.sf_client is None: raise ConnectorMissingCredentialError("Salesforce") - all_retrieved_ids: set[str] = set() + doc_metadata_list: list[SlimDocument] = [] for parent_object_type in self.parent_object_list: query = f"SELECT Id FROM {parent_object_type}" query_result = self.sf_client.query_all(query) - all_retrieved_ids.update( - f"{ID_PREFIX}{instance_dict.get('Id', '')}" + doc_metadata_list.extend( + SlimDocument( + id=f"{ID_PREFIX}{instance_dict.get('Id', '')}", + perm_sync_data={}, + ) for instance_dict in query_result["records"] ) - return all_retrieved_ids + yield doc_metadata_list if __name__ == "__main__": diff --git a/backend/danswer/connectors/sharepoint/connector.py b/backend/danswer/connectors/sharepoint/connector.py index e74dcbf7edd..8d99baffe33 100644 --- a/backend/danswer/connectors/sharepoint/connector.py +++ b/backend/danswer/connectors/sharepoint/connector.py @@ -25,6 +25,7 @@ from danswer.file_processing.extract_file_text import extract_file_text from danswer.utils.logger import setup_logger + logger = setup_logger() @@ -40,8 +41,8 @@ def _convert_driveitem_to_document( driveitem: DriveItem, ) -> Document: file_text = extract_file_text( - file_name=driveitem.name, file=io.BytesIO(driveitem.get_content().execute_query().value), + file_name=driveitem.name, break_on_unprocessable=False, ) diff --git a/backend/danswer/connectors/slab/connector.py b/backend/danswer/connectors/slab/connector.py index 80380ff7c29..ae76332838b 100644 --- a/backend/danswer/connectors/slab/connector.py +++ b/backend/danswer/connectors/slab/connector.py @@ -20,10 +20,13 @@ from danswer.connectors.models import Section from danswer.utils.logger import setup_logger + +logger = setup_logger() + + # Fairly generous retry because it's not understood why occasionally GraphQL requests fail even with timeout > 1 min SLAB_GRAPHQL_MAX_TRIES = 10 SLAB_API_URL = "https://api.slab.com/v1/graphql" -logger = setup_logger() def run_graphql_request( diff --git a/backend/danswer/connectors/slack/connector.py b/backend/danswer/connectors/slack/connector.py index d7a23714a37..22ace603bd4 100644 --- a/backend/danswer/connectors/slack/connector.py +++ b/backend/danswer/connectors/slack/connector.py @@ -13,13 +13,15 @@ from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.constants import DocumentSource from danswer.connectors.interfaces import GenerateDocumentsOutput -from danswer.connectors.interfaces import IdConnector +from danswer.connectors.interfaces import GenerateSlimDocumentOutput from danswer.connectors.interfaces import PollConnector from danswer.connectors.interfaces import SecondsSinceUnixEpoch +from danswer.connectors.interfaces import SlimConnector from danswer.connectors.models import BasicExpertInfo from danswer.connectors.models import ConnectorMissingCredentialError from danswer.connectors.models import Document from danswer.connectors.models import Section +from danswer.connectors.models import SlimDocument from danswer.connectors.slack.utils import expert_info_from_slack_id from danswer.connectors.slack.utils import get_message_link from danswer.connectors.slack.utils import make_paginated_slack_api_call_w_retries @@ -205,12 +207,17 @@ def thread_to_doc( "group_leave", "group_archive", "group_unarchive", + "channel_leave", + "channel_name", + "channel_join", } -def _default_msg_filter(message: MessageType) -> bool: +def default_msg_filter(message: MessageType) -> bool: # Don't keep messages from bots if message.get("bot_id") or message.get("app_id"): + if message.get("bot_profile", {}).get("name") == "DanswerConnector": + return False return True # Uninformative @@ -261,7 +268,7 @@ def _get_all_docs( channel_name_regex_enabled: bool = False, oldest: str | None = None, latest: str | None = None, - msg_filter_func: Callable[[MessageType], bool] = _default_msg_filter, + msg_filter_func: Callable[[MessageType], bool] = default_msg_filter, ) -> Generator[Document, None, None]: """Get all documents in the workspace, channel by channel""" slack_cleaner = SlackTextCleaner(client=client) @@ -320,8 +327,8 @@ def _get_all_doc_ids( client: WebClient, channels: list[str] | None = None, channel_name_regex_enabled: bool = False, - msg_filter_func: Callable[[MessageType], bool] = _default_msg_filter, -) -> set[str]: + msg_filter_func: Callable[[MessageType], bool] = default_msg_filter, +) -> GenerateSlimDocumentOutput: """ Get all document ids in the workspace, channel by channel This is pretty identical to get_all_docs, but it returns a set of ids instead of documents @@ -333,13 +340,14 @@ def _get_all_doc_ids( all_channels, channels, channel_name_regex_enabled ) - all_doc_ids = set() for channel in filtered_channels: + channel_id = channel["id"] channel_message_batches = get_channel_messages( client=client, channel=channel, ) + message_ts_set: set[str] = set() for message_batch in channel_message_batches: for message in message_batch: if msg_filter_func(message): @@ -348,12 +356,21 @@ def _get_all_doc_ids( # The document id is the channel id and the ts of the first message in the thread # Since we already have the first message of the thread, we dont have to # fetch the thread for id retrieval, saving time and API calls - all_doc_ids.add(f"{channel['id']}__{message['ts']}") + message_ts_set.add(message["ts"]) + + channel_metadata_list: list[SlimDocument] = [] + for message_ts in message_ts_set: + channel_metadata_list.append( + SlimDocument( + id=f"{channel_id}__{message_ts}", + perm_sync_data={"channel_id": channel_id}, + ) + ) - return all_doc_ids + yield channel_metadata_list -class SlackPollConnector(PollConnector, IdConnector): +class SlackPollConnector(PollConnector, SlimConnector): def __init__( self, workspace: str, @@ -374,7 +391,11 @@ def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None self.client = WebClient(token=bot_token) return None - def retrieve_all_source_ids(self) -> set[str]: + def retrieve_all_slim_documents( + self, + start: SecondsSinceUnixEpoch | None = None, + end: SecondsSinceUnixEpoch | None = None, + ) -> GenerateSlimDocumentOutput: if self.client is None: raise ConnectorMissingCredentialError("Slack") @@ -424,6 +445,7 @@ def poll_source( current = time.time() one_day_ago = current - 24 * 60 * 60 # 1 day + document_batches = connector.poll_source(one_day_ago, current) print(next(document_batches)) diff --git a/backend/danswer/connectors/slack/load_connector.py b/backend/danswer/connectors/slack/load_connector.py index ebcfce5b845..7350ac6284d 100644 --- a/backend/danswer/connectors/slack/load_connector.py +++ b/backend/danswer/connectors/slack/load_connector.py @@ -16,6 +16,7 @@ from danswer.connectors.slack.utils import get_message_link from danswer.utils.logger import setup_logger + logger = setup_logger() diff --git a/backend/danswer/connectors/slack/utils.py b/backend/danswer/connectors/slack/utils.py index 20e859a6d44..78bc42a0926 100644 --- a/backend/danswer/connectors/slack/utils.py +++ b/backend/danswer/connectors/slack/utils.py @@ -10,9 +10,9 @@ from slack_sdk.errors import SlackApiError from slack_sdk.web import SlackResponse -from danswer.connectors.cross_connector_utils.retry_wrapper import retry_builder from danswer.connectors.models import BasicExpertInfo from danswer.utils.logger import setup_logger +from danswer.utils.retry_wrapper import retry_builder logger = setup_logger() diff --git a/backend/danswer/connectors/web/connector.py b/backend/danswer/connectors/web/connector.py index bb1f64efdfe..9e406b71674 100644 --- a/backend/danswer/connectors/web/connector.py +++ b/backend/danswer/connectors/web/connector.py @@ -128,6 +128,9 @@ def get_internal_links( if not href: continue + # Account for malformed backslashes in URLs + href = href.replace("\\", "/") + if should_ignore_pound and "#" in href: href = href.split("#")[0] @@ -370,7 +373,7 @@ def load_from_state(self) -> GenerateDocumentsOutput: page.close() except Exception as e: last_error = f"Failed to fetch '{current_url}': {e}" - logger.error(last_error) + logger.exception(last_error) playwright.stop() restart_playwright = True continue diff --git a/backend/danswer/connectors/zendesk/connector.py b/backend/danswer/connectors/zendesk/connector.py index f85f2efff57..195fe45af0f 100644 --- a/backend/danswer/connectors/zendesk/connector.py +++ b/backend/danswer/connectors/zendesk/connector.py @@ -1,10 +1,7 @@ +from collections.abc import Iterator from typing import Any import requests -from retry import retry -from zenpy import Zenpy # type: ignore -from zenpy.lib.api_objects import Ticket # type: ignore -from zenpy.lib.api_objects.help_centre_objects import Article # type: ignore from danswer.configs.app_configs import INDEX_BATCH_SIZE from danswer.configs.app_configs import ZENDESK_CONNECTOR_SKIP_ARTICLE_LABELS @@ -20,43 +17,244 @@ from danswer.connectors.models import Document from danswer.connectors.models import Section from danswer.file_processing.html_utils import parse_html_page_basic +from danswer.utils.retry_wrapper import retry_builder -def _article_to_document(article: Article, content_tags: dict[str, str]) -> Document: - author = BasicExpertInfo( - display_name=article.author.name, email=article.author.email +MAX_PAGE_SIZE = 30 # Zendesk API maximum + + +class ZendeskCredentialsNotSetUpError(PermissionError): + def __init__(self) -> None: + super().__init__( + "Zendesk Credentials are not set up, was load_credentials called?" + ) + + +class ZendeskClient: + def __init__(self, subdomain: str, email: str, token: str): + self.base_url = f"https://{subdomain}.zendesk.com/api/v2" + self.auth = (f"{email}/token", token) + + @retry_builder() + def make_request(self, endpoint: str, params: dict[str, Any]) -> dict[str, Any]: + response = requests.get( + f"{self.base_url}/{endpoint}", auth=self.auth, params=params + ) + response.raise_for_status() + return response.json() + + +def _get_content_tag_mapping(client: ZendeskClient) -> dict[str, str]: + content_tags: dict[str, str] = {} + params = {"page[size]": MAX_PAGE_SIZE} + + try: + while True: + data = client.make_request("guide/content_tags", params) + + for tag in data.get("records", []): + content_tags[tag["id"]] = tag["name"] + + # Check if there are more pages + if data.get("meta", {}).get("has_more", False): + params["page[after]"] = data["meta"]["after_cursor"] + else: + break + + return content_tags + except Exception as e: + raise Exception(f"Error fetching content tags: {str(e)}") + + +def _get_articles( + client: ZendeskClient, start_time: int | None = None, page_size: int = MAX_PAGE_SIZE +) -> Iterator[dict[str, Any]]: + params = ( + {"start_time": start_time, "page[size]": page_size} + if start_time + else {"page[size]": page_size} + ) + + while True: + data = client.make_request("help_center/articles", params) + for article in data["articles"]: + yield article + + if not data.get("meta", {}).get("has_more"): + break + params["page[after]"] = data["meta"]["after_cursor"] + + +def _get_tickets( + client: ZendeskClient, start_time: int | None = None +) -> Iterator[dict[str, Any]]: + params = {"start_time": start_time} if start_time else {"start_time": 0} + + while True: + data = client.make_request("incremental/tickets.json", params) + for ticket in data["tickets"]: + yield ticket + + if not data.get("end_of_stream", False): + params["start_time"] = data["end_time"] + else: + break + + +def _fetch_author(client: ZendeskClient, author_id: str) -> BasicExpertInfo | None: + author_data = client.make_request(f"users/{author_id}", {}) + user = author_data.get("user") + return ( + BasicExpertInfo(display_name=user.get("name"), email=user.get("email")) + if user and user.get("name") and user.get("email") + else None ) - update_time = time_str_to_utc(article.updated_at) - # build metadata + +def _article_to_document( + article: dict[str, Any], + content_tags: dict[str, str], + author_map: dict[str, BasicExpertInfo], + client: ZendeskClient, +) -> tuple[dict[str, BasicExpertInfo] | None, Document]: + author_id = article.get("author_id") + if not author_id: + author = None + else: + author = ( + author_map.get(author_id) + if author_id in author_map + else _fetch_author(client, author_id) + ) + + new_author_mapping = {author_id: author} if author_id and author else None + + updated_at = article.get("updated_at") + update_time = time_str_to_utc(updated_at) if updated_at else None + + # Build metadata metadata: dict[str, str | list[str]] = { - "labels": [str(label) for label in article.label_names if label], + "labels": [str(label) for label in article.get("label_names", []) if label], "content_tags": [ content_tags[tag_id] - for tag_id in article.content_tag_ids + for tag_id in article.get("content_tag_ids", []) if tag_id in content_tags ], } - # remove empty values + # Remove empty values metadata = {k: v for k, v in metadata.items() if v} - return Document( - id=f"article:{article.id}", + return new_author_mapping, Document( + id=f"article:{article['id']}", sections=[ - Section(link=article.html_url, text=parse_html_page_basic(article.body)) + Section( + link=article.get("html_url"), + text=parse_html_page_basic(article["body"]), + ) ], source=DocumentSource.ZENDESK, - semantic_identifier=article.title, + semantic_identifier=article["title"], doc_updated_at=update_time, - primary_owners=[author], + primary_owners=[author] if author else None, metadata=metadata, ) -class ZendeskClientNotSetUpError(PermissionError): - def __init__(self) -> None: - super().__init__("Zendesk Client is not set up, was load_credentials called?") +def _get_comment_text( + comment: dict[str, Any], + author_map: dict[str, BasicExpertInfo], + client: ZendeskClient, +) -> tuple[dict[str, BasicExpertInfo] | None, str]: + author_id = comment.get("author_id") + if not author_id: + author = None + else: + author = ( + author_map.get(author_id) + if author_id in author_map + else _fetch_author(client, author_id) + ) + + new_author_mapping = {author_id: author} if author_id and author else None + + comment_text = f"Comment{' by ' + author.display_name if author and author.display_name else ''}" + comment_text += f"{' at ' + comment['created_at'] if comment.get('created_at') else ''}:\n{comment['body']}" + + return new_author_mapping, comment_text + + +def _ticket_to_document( + ticket: dict[str, Any], + author_map: dict[str, BasicExpertInfo], + client: ZendeskClient, + default_subdomain: str, +) -> tuple[dict[str, BasicExpertInfo] | None, Document]: + submitter_id = ticket.get("submitter") + if not submitter_id: + submitter = None + else: + submitter = ( + author_map.get(submitter_id) + if submitter_id in author_map + else _fetch_author(client, submitter_id) + ) + + new_author_mapping = ( + {submitter_id: submitter} if submitter_id and submitter else None + ) + + updated_at = ticket.get("updated_at") + update_time = time_str_to_utc(updated_at) if updated_at else None + + metadata: dict[str, str | list[str]] = {} + if status := ticket.get("status"): + metadata["status"] = status + if priority := ticket.get("priority"): + metadata["priority"] = priority + if tags := ticket.get("tags"): + metadata["tags"] = tags + if ticket_type := ticket.get("type"): + metadata["ticket_type"] = ticket_type + + # Fetch comments for the ticket + comments_data = client.make_request(f"tickets/{ticket.get('id')}/comments", {}) + comments = comments_data.get("comments", []) + + comment_texts = [] + for comment in comments: + new_author_mapping, comment_text = _get_comment_text( + comment, author_map, client + ) + if new_author_mapping: + author_map.update(new_author_mapping) + comment_texts.append(comment_text) + + comments_text = "\n\n".join(comment_texts) + + subject = ticket.get("subject") + full_text = f"Ticket Subject:\n{subject}\n\nComments:\n{comments_text}" + + ticket_url = ticket.get("url") + subdomain = ( + ticket_url.split("//")[1].split(".zendesk.com")[0] + if ticket_url + else default_subdomain + ) + + ticket_display_url = ( + f"https://{subdomain}.zendesk.com/agent/tickets/{ticket.get('id')}" + ) + + return new_author_mapping, Document( + id=f"zendesk_ticket_{ticket['id']}", + sections=[Section(link=ticket_display_url, text=full_text)], + source=DocumentSource.ZENDESK, + semantic_identifier=f"Ticket #{ticket['id']}: {subject or 'No Subject'}", + doc_updated_at=update_time, + primary_owners=[submitter] if submitter else None, + metadata=metadata, + ) class ZendeskConnector(LoadConnector, PollConnector): @@ -66,44 +264,10 @@ def __init__( content_type: str = "articles", ) -> None: self.batch_size = batch_size - self.zendesk_client: Zenpy | None = None - self.content_tags: dict[str, str] = {} self.content_type = content_type - - @retry(tries=3, delay=2, backoff=2) - def _set_content_tags( - self, subdomain: str, email: str, token: str, page_size: int = 30 - ) -> None: - # Construct the base URL - base_url = f"https://{subdomain}.zendesk.com/api/v2/guide/content_tags" - - # Set up authentication - auth = (f"{email}/token", token) - - # Set up pagination parameters - params = {"page[size]": page_size} - - try: - while True: - # Make the GET request - response = requests.get(base_url, auth=auth, params=params) - - # Check if the request was successful - if response.status_code == 200: - data = response.json() - content_tag_list = data.get("records", []) - for tag in content_tag_list: - self.content_tags[tag["id"]] = tag["name"] - - # Check if there are more pages - if data.get("meta", {}).get("has_more", False): - params["page[after]"] = data["meta"]["after_cursor"] - else: - break - else: - raise Exception(f"Error: {response.status_code}\n{response.text}") - except Exception as e: - raise Exception(f"Error fetching content tags: {str(e)}") + self.subdomain = "" + # Fetch all tags ahead of time + self.content_tags: dict[str, str] = {} def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None: # Subdomain is actually the whole URL @@ -112,87 +276,23 @@ def load_credentials(self, credentials: dict[str, Any]) -> dict[str, Any] | None .replace("https://", "") .split(".zendesk.com")[0] ) + self.subdomain = subdomain - self.zendesk_client = Zenpy( - subdomain=subdomain, - email=credentials["zendesk_email"], - token=credentials["zendesk_token"], - ) - self._set_content_tags( - subdomain, - credentials["zendesk_email"], - credentials["zendesk_token"], + self.client = ZendeskClient( + subdomain, credentials["zendesk_email"], credentials["zendesk_token"] ) return None def load_from_state(self) -> GenerateDocumentsOutput: return self.poll_source(None, None) - def _ticket_to_document(self, ticket: Ticket) -> Document: - if self.zendesk_client is None: - raise ZendeskClientNotSetUpError() - - owner = None - if ticket.requester and ticket.requester.name and ticket.requester.email: - owner = [ - BasicExpertInfo( - display_name=ticket.requester.name, email=ticket.requester.email - ) - ] - update_time = time_str_to_utc(ticket.updated_at) if ticket.updated_at else None - - metadata: dict[str, str | list[str]] = {} - if ticket.status is not None: - metadata["status"] = ticket.status - if ticket.priority is not None: - metadata["priority"] = ticket.priority - if ticket.tags: - metadata["tags"] = ticket.tags - if ticket.type is not None: - metadata["ticket_type"] = ticket.type - - # Fetch comments for the ticket - comments = self.zendesk_client.tickets.comments(ticket=ticket) - - # Combine all comments into a single text - comments_text = "\n\n".join( - [ - f"Comment{f' by {comment.author.name}' if comment.author and comment.author.name else ''}" - f"{f' at {comment.created_at}' if comment.created_at else ''}:\n{comment.body}" - for comment in comments - if comment.body - ] - ) - - # Combine ticket description and comments - description = ( - ticket.description - if hasattr(ticket, "description") and ticket.description - else "" - ) - full_text = f"Ticket Description:\n{description}\n\nComments:\n{comments_text}" - - # Extract subdomain from ticket.url - subdomain = ticket.url.split("//")[1].split(".zendesk.com")[0] - - # Build the html url for the ticket - ticket_url = f"https://{subdomain}.zendesk.com/agent/tickets/{ticket.id}" - - return Document( - id=f"zendesk_ticket_{ticket.id}", - sections=[Section(link=ticket_url, text=full_text)], - source=DocumentSource.ZENDESK, - semantic_identifier=f"Ticket #{ticket.id}: {ticket.subject or 'No Subject'}", - doc_updated_at=update_time, - primary_owners=owner, - metadata=metadata, - ) - def poll_source( self, start: SecondsSinceUnixEpoch | None, end: SecondsSinceUnixEpoch | None ) -> GenerateDocumentsOutput: - if self.zendesk_client is None: - raise ZendeskClientNotSetUpError() + if self.client is None: + raise ZendeskCredentialsNotSetUpError() + + self.content_tags = _get_content_tag_mapping(self.client) if self.content_type == "articles": yield from self._poll_articles(start) @@ -204,26 +304,30 @@ def poll_source( def _poll_articles( self, start: SecondsSinceUnixEpoch | None ) -> GenerateDocumentsOutput: - articles = ( - self.zendesk_client.help_center.articles(cursor_pagination=True) # type: ignore - if start is None - else self.zendesk_client.help_center.articles.incremental( # type: ignore - start_time=int(start) - ) - ) + articles = _get_articles(self.client, start_time=int(start) if start else None) + + # This one is built on the fly as there may be more many more authors than tags + author_map: dict[str, BasicExpertInfo] = {} + doc_batch = [] for article in articles: if ( - article.body is None - or article.draft + article.get("body") is None + or article.get("draft") or any( label in ZENDESK_CONNECTOR_SKIP_ARTICLE_LABELS - for label in article.label_names + for label in article.get("label_names", []) ) ): continue - doc_batch.append(_article_to_document(article, self.content_tags)) + new_author_map, documents = _article_to_document( + article, self.content_tags, author_map, self.client + ) + if new_author_map: + author_map.update(new_author_map) + + doc_batch.append(documents) if len(doc_batch) >= self.batch_size: yield doc_batch doc_batch.clear() @@ -234,10 +338,14 @@ def _poll_articles( def _poll_tickets( self, start: SecondsSinceUnixEpoch | None ) -> GenerateDocumentsOutput: - if self.zendesk_client is None: - raise ZendeskClientNotSetUpError() + if self.client is None: + raise ZendeskCredentialsNotSetUpError() - ticket_generator = self.zendesk_client.tickets.incremental(start_time=start) + author_map: dict[str, BasicExpertInfo] = {} + + ticket_generator = _get_tickets( + self.client, start_time=int(start) if start else None + ) while True: doc_batch = [] @@ -246,10 +354,20 @@ def _poll_tickets( ticket = next(ticket_generator) # Check if the ticket status is deleted and skip it if so - if ticket.status == "deleted": + if ticket.get("status") == "deleted": continue - doc_batch.append(self._ticket_to_document(ticket)) + new_author_map, documents = _ticket_to_document( + ticket=ticket, + author_map=author_map, + client=self.client, + default_subdomain=self.subdomain, + ) + + if new_author_map: + author_map.update(new_author_map) + + doc_batch.append(documents) if len(doc_batch) >= self.batch_size: yield doc_batch @@ -267,7 +385,6 @@ def _poll_tickets( if __name__ == "__main__": import os - import time connector = ZendeskConnector() diff --git a/backend/danswer/danswerbot/slack/formatting.py b/backend/danswer/danswerbot/slack/formatting.py new file mode 100644 index 00000000000..604c879df27 --- /dev/null +++ b/backend/danswer/danswerbot/slack/formatting.py @@ -0,0 +1,66 @@ +from mistune import Markdown # type: ignore +from mistune import Renderer # type: ignore + + +def format_slack_message(message: str | None) -> str: + renderer = Markdown(renderer=SlackRenderer()) + return renderer.render(message) + + +class SlackRenderer(Renderer): + SPECIALS: dict[str, str] = {"&": "&", "<": "<", ">": ">"} + + def escape_special(self, text: str) -> str: + for special, replacement in self.SPECIALS.items(): + text = text.replace(special, replacement) + return text + + def header(self, text: str, level: int, raw: str | None = None) -> str: + return f"*{text}*\n" + + def emphasis(self, text: str) -> str: + return f"_{text}_" + + def double_emphasis(self, text: str) -> str: + return f"*{text}*" + + def strikethrough(self, text: str) -> str: + return f"~{text}~" + + def list(self, body: str, ordered: bool = True) -> str: + lines = body.split("\n") + count = 0 + for i, line in enumerate(lines): + if line.startswith("li: "): + count += 1 + prefix = f"{count}. " if ordered else "• " + lines[i] = f"{prefix}{line[4:]}" + return "\n".join(lines) + + def list_item(self, text: str) -> str: + return f"li: {text}\n" + + def link(self, link: str, title: str | None, content: str | None) -> str: + escaped_link = self.escape_special(link) + if content: + return f"<{escaped_link}|{content}>" + if title: + return f"<{escaped_link}|{title}>" + return f"<{escaped_link}>" + + def image(self, src: str, title: str | None, text: str | None) -> str: + escaped_src = self.escape_special(src) + display_text = title or text + return f"<{escaped_src}|{display_text}>" if display_text else f"<{escaped_src}>" + + def codespan(self, text: str) -> str: + return f"`{text}`" + + def block_code(self, text: str, lang: str | None) -> str: + return f"```\n{text}\n```\n" + + def paragraph(self, text: str) -> str: + return f"{text}\n" + + def autolink(self, link: str, is_email: bool) -> str: + return link if is_email else self.link(link, None, None) diff --git a/backend/danswer/danswerbot/slack/handlers/handle_buttons.py b/backend/danswer/danswerbot/slack/handlers/handle_buttons.py index 9e1c171ee4f..f379e6af4ca 100644 --- a/backend/danswer/danswerbot/slack/handlers/handle_buttons.py +++ b/backend/danswer/danswerbot/slack/handlers/handle_buttons.py @@ -4,9 +4,7 @@ from slack_sdk import WebClient from slack_sdk.models.blocks import SectionBlock from slack_sdk.models.views import View -from slack_sdk.socket_mode import SocketModeClient from slack_sdk.socket_mode.request import SocketModeRequest -from sqlalchemy.orm import Session from danswer.configs.constants import MessageType from danswer.configs.constants import SearchFeedbackType @@ -35,20 +33,22 @@ from danswer.danswerbot.slack.utils import get_feedback_visibility from danswer.danswerbot.slack.utils import read_slack_thread from danswer.danswerbot.slack.utils import respond_in_thread +from danswer.danswerbot.slack.utils import TenantSocketModeClient from danswer.danswerbot.slack.utils import update_emote_react -from danswer.db.engine import get_sqlalchemy_engine +from danswer.db.engine import get_session_with_tenant from danswer.db.feedback import create_chat_message_feedback from danswer.db.feedback import create_doc_retrieval_feedback from danswer.document_index.document_index_utils import get_both_index_names from danswer.document_index.factory import get_default_document_index from danswer.utils.logger import setup_logger + logger = setup_logger() def handle_doc_feedback_button( req: SocketModeRequest, - client: SocketModeClient, + client: TenantSocketModeClient, ) -> None: if not (actions := req.payload.get("actions")): logger.error("Missing actions. Unable to build the source feedback view") @@ -81,7 +81,7 @@ def handle_doc_feedback_button( def handle_generate_answer_button( req: SocketModeRequest, - client: SocketModeClient, + client: TenantSocketModeClient, ) -> None: channel_id = req.payload["channel"]["id"] channel_name = req.payload["channel"]["name"] @@ -116,7 +116,7 @@ def handle_generate_answer_button( thread_ts=thread_ts, ) - with Session(get_sqlalchemy_engine()) as db_session: + with get_session_with_tenant(client.tenant_id) as db_session: slack_bot_config = get_slack_bot_config_for_channel( channel_name=channel_name, db_session=db_session ) @@ -136,6 +136,7 @@ def handle_generate_answer_button( slack_bot_config=slack_bot_config, receiver_ids=None, client=client.web_client, + tenant_id=client.tenant_id, channel=channel_id, logger=logger, feedback_reminder_id=None, @@ -150,12 +151,11 @@ def handle_slack_feedback( user_id_to_post_confirmation: str, channel_id_to_post_confirmation: str, thread_ts_to_post_confirmation: str, + tenant_id: str | None, ) -> None: - engine = get_sqlalchemy_engine() - message_id, doc_id, doc_rank = decompose_action_id(feedback_id) - with Session(engine) as db_session: + with get_session_with_tenant(tenant_id) as db_session: if feedback_type in [LIKE_BLOCK_ACTION_ID, DISLIKE_BLOCK_ACTION_ID]: create_chat_message_feedback( is_positive=feedback_type == LIKE_BLOCK_ACTION_ID, @@ -232,7 +232,7 @@ def handle_slack_feedback( def handle_followup_button( req: SocketModeRequest, - client: SocketModeClient, + client: TenantSocketModeClient, ) -> None: action_id = None if actions := req.payload.get("actions"): @@ -252,7 +252,7 @@ def handle_followup_button( tag_ids: list[str] = [] group_ids: list[str] = [] - with Session(get_sqlalchemy_engine()) as db_session: + with get_session_with_tenant(client.tenant_id) as db_session: channel_name, is_dm = get_channel_name_from_id( client=client.web_client, channel_id=channel_id ) @@ -295,7 +295,7 @@ def handle_followup_button( def get_clicker_name( req: SocketModeRequest, - client: SocketModeClient, + client: TenantSocketModeClient, ) -> str: clicker_name = req.payload.get("user", {}).get("name", "Someone") clicker_real_name = None @@ -316,7 +316,7 @@ def get_clicker_name( def handle_followup_resolved_button( req: SocketModeRequest, - client: SocketModeClient, + client: TenantSocketModeClient, immediate: bool = False, ) -> None: channel_id = req.payload["container"]["channel_id"] diff --git a/backend/danswer/danswerbot/slack/handlers/handle_message.py b/backend/danswer/danswerbot/slack/handlers/handle_message.py index 0882796204d..ffbe902c5ec 100644 --- a/backend/danswer/danswerbot/slack/handlers/handle_message.py +++ b/backend/danswer/danswerbot/slack/handlers/handle_message.py @@ -2,7 +2,6 @@ from slack_sdk import WebClient from slack_sdk.errors import SlackApiError -from sqlalchemy.orm import Session from danswer.configs.danswerbot_configs import DANSWER_BOT_FEEDBACK_REMINDER from danswer.configs.danswerbot_configs import DANSWER_REACT_EMOJI @@ -19,7 +18,7 @@ from danswer.danswerbot.slack.utils import respond_in_thread from danswer.danswerbot.slack.utils import slack_usage_report from danswer.danswerbot.slack.utils import update_emote_react -from danswer.db.engine import get_sqlalchemy_engine +from danswer.db.engine import get_session_with_tenant from danswer.db.models import SlackBotConfig from danswer.db.users import add_non_web_user_if_not_exists from danswer.utils.logger import setup_logger @@ -110,6 +109,7 @@ def handle_message( slack_bot_config: SlackBotConfig | None, client: WebClient, feedback_reminder_id: str | None, + tenant_id: str | None, ) -> bool: """Potentially respond to the user message depending on filters and if an answer was generated @@ -135,7 +135,9 @@ def handle_message( action = "slack_tag_message" elif is_bot_dm: action = "slack_dm_message" - slack_usage_report(action=action, sender_id=sender_id, client=client) + slack_usage_report( + action=action, sender_id=sender_id, client=client, tenant_id=tenant_id + ) document_set_names: list[str] | None = None persona = slack_bot_config.persona if slack_bot_config else None @@ -209,7 +211,7 @@ def handle_message( except SlackApiError as e: logger.error(f"Was not able to react to user message due to: {e}") - with Session(get_sqlalchemy_engine()) as db_session: + with get_session_with_tenant(tenant_id) as db_session: if message_info.email: add_non_web_user_if_not_exists(db_session, message_info.email) @@ -235,5 +237,6 @@ def handle_message( channel=channel, logger=logger, feedback_reminder_id=feedback_reminder_id, + tenant_id=tenant_id, ) return issue_with_regular_answer diff --git a/backend/danswer/danswerbot/slack/handlers/handle_regular_answer.py b/backend/danswer/danswerbot/slack/handlers/handle_regular_answer.py index f1c9bd077cf..9dadf5614cb 100644 --- a/backend/danswer/danswerbot/slack/handlers/handle_regular_answer.py +++ b/backend/danswer/danswerbot/slack/handlers/handle_regular_answer.py @@ -5,12 +5,10 @@ from typing import Optional from typing import TypeVar -from fastapi import HTTPException from retry import retry from slack_sdk import WebClient from slack_sdk.models.blocks import DividerBlock from slack_sdk.models.blocks import SectionBlock -from sqlalchemy.orm import Session from danswer.configs.app_configs import DISABLE_GENERATIVE_AI from danswer.configs.danswerbot_configs import DANSWER_BOT_ANSWER_GENERATION_TIMEOUT @@ -28,12 +26,13 @@ from danswer.danswerbot.slack.blocks import build_qa_response_blocks from danswer.danswerbot.slack.blocks import build_sources_blocks from danswer.danswerbot.slack.blocks import get_restate_blocks +from danswer.danswerbot.slack.formatting import format_slack_message from danswer.danswerbot.slack.handlers.utils import send_team_member_message from danswer.danswerbot.slack.models import SlackMessageInfo from danswer.danswerbot.slack.utils import respond_in_thread from danswer.danswerbot.slack.utils import SlackRateLimiter from danswer.danswerbot.slack.utils import update_emote_react -from danswer.db.engine import get_sqlalchemy_engine +from danswer.db.engine import get_session_with_tenant from danswer.db.models import Persona from danswer.db.models import SlackBotConfig from danswer.db.models import SlackBotResponseType @@ -88,6 +87,7 @@ def handle_regular_answer( channel: str, logger: DanswerLoggingAdapter, feedback_reminder_id: str | None, + tenant_id: str | None, num_retries: int = DANSWER_BOT_NUM_RETRIES, answer_generation_timeout: int = DANSWER_BOT_ANSWER_GENERATION_TIMEOUT, thread_context_percent: float = DANSWER_BOT_TARGET_CHUNK_PERCENTAGE, @@ -104,8 +104,7 @@ def handle_regular_answer( user = None if message_info.is_bot_dm: if message_info.email: - engine = get_sqlalchemy_engine() - with Session(engine) as db_session: + with get_session_with_tenant(tenant_id) as db_session: user = get_user_by_email(message_info.email, db_session) document_set_names: list[str] | None = None @@ -152,15 +151,11 @@ def _get_answer(new_message_request: DirectQARequest) -> OneShotQAResponse | Non max_document_tokens: int | None = None max_history_tokens: int | None = None - with Session(get_sqlalchemy_engine()) as db_session: + with get_session_with_tenant(tenant_id) as db_session: if len(new_message_request.messages) > 1: if new_message_request.persona_config: - raise HTTPException( - status_code=403, - detail="Slack bot does not support persona config", - ) - - elif new_message_request.persona_id: + raise RuntimeError("Slack bot does not support persona config") + elif new_message_request.persona_id is not None: persona = cast( Persona, fetch_persona_by_id( @@ -170,6 +165,10 @@ def _get_answer(new_message_request: DirectQARequest) -> OneShotQAResponse | Non get_editable=False, ), ) + else: + raise RuntimeError( + "No persona id provided, this should never happen." + ) llm, _ = get_llms_for_persona(persona) @@ -212,6 +211,7 @@ def _get_answer(new_message_request: DirectQARequest) -> OneShotQAResponse | Non use_citations=use_citations, danswerbot_flow=True, ) + if not answer.error_msg: return answer else: @@ -246,7 +246,7 @@ def _get_answer(new_message_request: DirectQARequest) -> OneShotQAResponse | Non ) # Always apply reranking settings if it exists, this is the non-streaming flow - with Session(get_sqlalchemy_engine()) as db_session: + with get_session_with_tenant(tenant_id) as db_session: saved_search_settings = get_current_search_settings(db_session) # This includes throwing out answer via reflexion @@ -413,10 +413,11 @@ def _get_answer(new_message_request: DirectQARequest) -> OneShotQAResponse | Non # If called with the DanswerBot slash command, the question is lost so we have to reshow it restate_question_block = get_restate_blocks(messages[-1].message, is_bot_msg) + formatted_answer = format_slack_message(answer.answer) if answer.answer else None answer_blocks = build_qa_response_blocks( message_id=answer.chat_message_id, - answer=answer.answer, + answer=formatted_answer, quotes=answer.quotes.quotes if answer.quotes else None, source_filters=retrieval_info.applied_source_filters, time_cutoff=retrieval_info.applied_time_cutoff, diff --git a/backend/danswer/danswerbot/slack/listener.py b/backend/danswer/danswerbot/slack/listener.py index c430f1b31b7..2078d621325 100644 --- a/backend/danswer/danswerbot/slack/listener.py +++ b/backend/danswer/danswerbot/slack/listener.py @@ -4,10 +4,8 @@ from typing import cast from slack_sdk import WebClient -from slack_sdk.socket_mode import SocketModeClient from slack_sdk.socket_mode.request import SocketModeRequest from slack_sdk.socket_mode.response import SocketModeResponse -from sqlalchemy.orm import Session from danswer.configs.constants import MessageType from danswer.configs.danswerbot_configs import DANSWER_BOT_REPHRASE_MESSAGE @@ -47,9 +45,11 @@ from danswer.danswerbot.slack.utils import remove_danswer_bot_tag from danswer.danswerbot.slack.utils import rephrase_slack_message from danswer.danswerbot.slack.utils import respond_in_thread -from danswer.db.engine import get_sqlalchemy_engine +from danswer.danswerbot.slack.utils import TenantSocketModeClient +from danswer.db.engine import get_all_tenant_ids +from danswer.db.engine import get_session_with_tenant from danswer.db.search_settings import get_current_search_settings -from danswer.dynamic_configs.interface import ConfigNotFoundError +from danswer.key_value_store.interface import KvKeyNotFoundError from danswer.natural_language_processing.search_nlp_models import EmbeddingModel from danswer.natural_language_processing.search_nlp_models import warm_up_bi_encoder from danswer.one_shot_answer.models import ThreadMessage @@ -60,6 +60,7 @@ from shared_configs.configs import MODEL_SERVER_HOST from shared_configs.configs import MODEL_SERVER_PORT from shared_configs.configs import SLACK_CHANNEL_ID +from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR logger = setup_logger() @@ -80,7 +81,7 @@ _OFFICIAL_SLACKBOT_USER_ID = "USLACKBOT" -def prefilter_requests(req: SocketModeRequest, client: SocketModeClient) -> bool: +def prefilter_requests(req: SocketModeRequest, client: TenantSocketModeClient) -> bool: """True to keep going, False to ignore this Slack request""" if req.type == "events_api": # Verify channel is valid @@ -131,9 +132,8 @@ def prefilter_requests(req: SocketModeRequest, client: SocketModeClient) -> bool ) return False + bot_tag_id = get_danswer_bot_app_id(client.web_client) if event_type == "message": - bot_tag_id = get_danswer_bot_app_id(client.web_client) - is_dm = event.get("channel_type") == "im" is_tagged = bot_tag_id and bot_tag_id in msg is_danswer_bot_msg = bot_tag_id and bot_tag_id in event.get("user", "") @@ -154,13 +154,14 @@ def prefilter_requests(req: SocketModeRequest, client: SocketModeClient) -> bool client=client.web_client, channel_id=channel ) - engine = get_sqlalchemy_engine() - with Session(engine) as db_session: + with get_session_with_tenant(client.tenant_id) as db_session: slack_bot_config = get_slack_bot_config_for_channel( channel_name=channel_name, db_session=db_session ) - if not slack_bot_config or not slack_bot_config.channel_config.get( - "respond_to_bots" + # If DanswerBot is not specifically tagged and the channel is not set to respond to bots, ignore the message + if (not bot_tag_id or bot_tag_id not in msg) and ( + not slack_bot_config + or not slack_bot_config.channel_config.get("respond_to_bots") ): channel_specific_logger.info("Ignoring message from bot") return False @@ -220,7 +221,7 @@ def prefilter_requests(req: SocketModeRequest, client: SocketModeClient) -> bool return True -def process_feedback(req: SocketModeRequest, client: SocketModeClient) -> None: +def process_feedback(req: SocketModeRequest, client: TenantSocketModeClient) -> None: if actions := req.payload.get("actions"): action = cast(dict[str, Any], actions[0]) feedback_type = cast(str, action.get("action_id")) @@ -242,6 +243,7 @@ def process_feedback(req: SocketModeRequest, client: SocketModeClient) -> None: user_id_to_post_confirmation=user_id, channel_id_to_post_confirmation=channel_id, thread_ts_to_post_confirmation=thread_ts, + tenant_id=client.tenant_id, ) query_event_id, _, _ = decompose_action_id(feedback_id) @@ -249,7 +251,7 @@ def process_feedback(req: SocketModeRequest, client: SocketModeClient) -> None: def build_request_details( - req: SocketModeRequest, client: SocketModeClient + req: SocketModeRequest, client: TenantSocketModeClient ) -> SlackMessageInfo: if req.type == "events_api": event = cast(dict[str, Any], req.payload["event"]) @@ -328,7 +330,7 @@ def build_request_details( def apologize_for_fail( details: SlackMessageInfo, - client: SocketModeClient, + client: TenantSocketModeClient, ) -> None: respond_in_thread( client=client.web_client, @@ -340,11 +342,13 @@ def apologize_for_fail( def process_message( req: SocketModeRequest, - client: SocketModeClient, + client: TenantSocketModeClient, respond_every_channel: bool = DANSWER_BOT_RESPOND_EVERY_CHANNEL, notify_no_answer: bool = NOTIFY_SLACKBOT_NO_ANSWER, ) -> None: - logger.debug(f"Received Slack request of type: '{req.type}'") + logger.debug( + f"Received Slack request of type: '{req.type}' for tenant, {client.tenant_id}" + ) # Throw out requests that can't or shouldn't be handled if not prefilter_requests(req, client): @@ -356,59 +360,67 @@ def process_message( client=client.web_client, channel_id=channel ) - engine = get_sqlalchemy_engine() - with Session(engine) as db_session: - slack_bot_config = get_slack_bot_config_for_channel( - channel_name=channel_name, db_session=db_session - ) - - # Be careful about this default, don't want to accidentally spam every channel - # Users should be able to DM slack bot in their private channels though - if ( - slack_bot_config is None - and not respond_every_channel - # Can't have configs for DMs so don't toss them out - and not is_dm - # If /DanswerBot (is_bot_msg) or @DanswerBot (bypass_filters) - # always respond with the default configs - and not (details.is_bot_msg or details.bypass_filters) - ): - return - - follow_up = bool( - slack_bot_config - and slack_bot_config.channel_config - and slack_bot_config.channel_config.get("follow_up_tags") is not None - ) - feedback_reminder_id = schedule_feedback_reminder( - details=details, client=client.web_client, include_followup=follow_up - ) + # Set the current tenant ID at the beginning for all DB calls within this thread + if client.tenant_id: + logger.info(f"Setting tenant ID to {client.tenant_id}") + token = CURRENT_TENANT_ID_CONTEXTVAR.set(client.tenant_id) + try: + with get_session_with_tenant(client.tenant_id) as db_session: + slack_bot_config = get_slack_bot_config_for_channel( + channel_name=channel_name, db_session=db_session + ) - failed = handle_message( - message_info=details, - slack_bot_config=slack_bot_config, - client=client.web_client, - feedback_reminder_id=feedback_reminder_id, - ) + # Be careful about this default, don't want to accidentally spam every channel + # Users should be able to DM slack bot in their private channels though + if ( + slack_bot_config is None + and not respond_every_channel + # Can't have configs for DMs so don't toss them out + and not is_dm + # If /DanswerBot (is_bot_msg) or @DanswerBot (bypass_filters) + # always respond with the default configs + and not (details.is_bot_msg or details.bypass_filters) + ): + return - if failed: - if feedback_reminder_id: - remove_scheduled_feedback_reminder( - client=client.web_client, - channel=details.sender, - msg_id=feedback_reminder_id, - ) - # Skipping answering due to pre-filtering is not considered a failure - if notify_no_answer: - apologize_for_fail(details, client) + follow_up = bool( + slack_bot_config + and slack_bot_config.channel_config + and slack_bot_config.channel_config.get("follow_up_tags") is not None + ) + feedback_reminder_id = schedule_feedback_reminder( + details=details, client=client.web_client, include_followup=follow_up + ) + failed = handle_message( + message_info=details, + slack_bot_config=slack_bot_config, + client=client.web_client, + feedback_reminder_id=feedback_reminder_id, + tenant_id=client.tenant_id, + ) -def acknowledge_message(req: SocketModeRequest, client: SocketModeClient) -> None: + if failed: + if feedback_reminder_id: + remove_scheduled_feedback_reminder( + client=client.web_client, + channel=details.sender, + msg_id=feedback_reminder_id, + ) + # Skipping answering due to pre-filtering is not considered a failure + if notify_no_answer: + apologize_for_fail(details, client) + finally: + if client.tenant_id: + CURRENT_TENANT_ID_CONTEXTVAR.reset(token) + + +def acknowledge_message(req: SocketModeRequest, client: TenantSocketModeClient) -> None: response = SocketModeResponse(envelope_id=req.envelope_id) client.send_socket_mode_response(response) -def action_routing(req: SocketModeRequest, client: SocketModeClient) -> None: +def action_routing(req: SocketModeRequest, client: TenantSocketModeClient) -> None: if actions := req.payload.get("actions"): action = cast(dict[str, Any], actions[0]) @@ -428,13 +440,13 @@ def action_routing(req: SocketModeRequest, client: SocketModeClient) -> None: return handle_generate_answer_button(req, client) -def view_routing(req: SocketModeRequest, client: SocketModeClient) -> None: +def view_routing(req: SocketModeRequest, client: TenantSocketModeClient) -> None: if view := req.payload.get("view"): if view["callback_id"] == VIEW_DOC_FEEDBACK_ID: return process_feedback(req, client) -def process_slack_event(client: SocketModeClient, req: SocketModeRequest) -> None: +def process_slack_event(client: TenantSocketModeClient, req: SocketModeRequest) -> None: # Always respond right away, if Slack doesn't receive these frequently enough # it will assume the Bot is DEAD!!! :( acknowledge_message(req, client) @@ -447,25 +459,29 @@ def process_slack_event(client: SocketModeClient, req: SocketModeRequest) -> Non return view_routing(req, client) elif req.type == "events_api" or req.type == "slash_commands": return process_message(req, client) - except Exception: - logger.exception("Failed to process slack event") + except Exception as e: + logger.exception(f"Failed to process slack event. Error: {e}") + logger.error(f"Slack request payload: {req.payload}") -def _get_socket_client(slack_bot_tokens: SlackBotTokens) -> SocketModeClient: +def _get_socket_client( + slack_bot_tokens: SlackBotTokens, tenant_id: str | None +) -> TenantSocketModeClient: # For more info on how to set this up, checkout the docs: # https://docs.danswer.dev/slack_bot_setup - return SocketModeClient( + return TenantSocketModeClient( # This app-level token will be used only for establishing a connection app_token=slack_bot_tokens.app_token, web_client=WebClient(token=slack_bot_tokens.bot_token), + tenant_id=tenant_id, ) -def _initialize_socket_client(socket_client: SocketModeClient) -> None: +def _initialize_socket_client(socket_client: TenantSocketModeClient) -> None: socket_client.socket_mode_request_listeners.append(process_slack_event) # type: ignore # Establish a WebSocket connection to the Socket Mode servers - logger.notice("Listening for messages from Slack...") + logger.notice(f"Listening for messages from Slack {socket_client.tenant_id }...") socket_client.connect() @@ -479,8 +495,8 @@ def _initialize_socket_client(socket_client: SocketModeClient) -> None: # NOTE: we are using Web Sockets so that you can run this from within a firewalled VPC # without issue. if __name__ == "__main__": - slack_bot_tokens: SlackBotTokens | None = None - socket_client: SocketModeClient | None = None + slack_bot_tokens: dict[str | None, SlackBotTokens] = {} + socket_clients: dict[str | None, TenantSocketModeClient] = {} set_is_ee_based_on_env_variable() @@ -489,46 +505,66 @@ def _initialize_socket_client(socket_client: SocketModeClient) -> None: while True: try: - latest_slack_bot_tokens = fetch_tokens() - - if latest_slack_bot_tokens != slack_bot_tokens: - if slack_bot_tokens is not None: - logger.notice("Slack Bot tokens have changed - reconnecting") - else: - # This happens on the very first time the listener process comes up - # or the tokens have updated (set up for the first time) - with Session(get_sqlalchemy_engine()) as db_session: - search_settings = get_current_search_settings(db_session) - embedding_model = EmbeddingModel.from_db_model( - search_settings=search_settings, - server_host=MODEL_SERVER_HOST, - server_port=MODEL_SERVER_PORT, - ) - - warm_up_bi_encoder( - embedding_model=embedding_model, - ) - - slack_bot_tokens = latest_slack_bot_tokens - # potentially may cause a message to be dropped, but it is complicated - # to avoid + (1) if the user is changing tokens, they are likely okay with some - # "migration downtime" and (2) if a single message is lost it is okay - # as this should be a very rare occurrence - if socket_client: - socket_client.close() - - socket_client = _get_socket_client(slack_bot_tokens) - _initialize_socket_client(socket_client) - - # Let the handlers run in the background + re-check for token updates every 60 seconds + tenant_ids = get_all_tenant_ids() # Function to retrieve all tenant IDs + + for tenant_id in tenant_ids: + with get_session_with_tenant(tenant_id) as db_session: + try: + token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id or "public") + latest_slack_bot_tokens = fetch_tokens() + CURRENT_TENANT_ID_CONTEXTVAR.reset(token) + + if ( + tenant_id not in slack_bot_tokens + or latest_slack_bot_tokens != slack_bot_tokens[tenant_id] + ): + if tenant_id in slack_bot_tokens: + logger.notice( + f"Slack Bot tokens have changed for tenant {tenant_id} - reconnecting" + ) + else: + # Initial setup for this tenant + search_settings = get_current_search_settings( + db_session + ) + embedding_model = EmbeddingModel.from_db_model( + search_settings=search_settings, + server_host=MODEL_SERVER_HOST, + server_port=MODEL_SERVER_PORT, + ) + warm_up_bi_encoder(embedding_model=embedding_model) + + slack_bot_tokens[tenant_id] = latest_slack_bot_tokens + + # potentially may cause a message to be dropped, but it is complicated + # to avoid + (1) if the user is changing tokens, they are likely okay with some + # "migration downtime" and (2) if a single message is lost it is okay + # as this should be a very rare occurrence + if tenant_id in socket_clients: + socket_clients[tenant_id].close() + + socket_client = _get_socket_client( + latest_slack_bot_tokens, tenant_id + ) + + # Initialize socket client for this tenant. Each tenant has its own + # socket client, allowing for multiple concurrent connections (one + # per tenant) with the tenant ID wrapped in the socket model client. + # Each `connect` stores websocket connection in a separate thread. + _initialize_socket_client(socket_client) + + socket_clients[tenant_id] = socket_client + + except KvKeyNotFoundError: + logger.debug(f"Missing Slack Bot tokens for tenant {tenant_id}") + if tenant_id in socket_clients: + socket_clients[tenant_id].disconnect() + del socket_clients[tenant_id] + del slack_bot_tokens[tenant_id] + + # Wait before checking for updates Event().wait(timeout=60) - except ConfigNotFoundError: - # try again every 30 seconds. This is needed since the user may add tokens - # via the UI at any point in the programs lifecycle - if we just allow it to - # fail, then the user will need to restart the containers after adding tokens - logger.debug( - "Missing Slack Bot tokens - waiting 60 seconds and trying again" - ) - if socket_client: - socket_client.disconnect() + + except Exception: + logger.exception("An error occurred outside of main event loop") time.sleep(60) diff --git a/backend/danswer/danswerbot/slack/tokens.py b/backend/danswer/danswerbot/slack/tokens.py index 5de3a6a0135..3f67e4649fc 100644 --- a/backend/danswer/danswerbot/slack/tokens.py +++ b/backend/danswer/danswerbot/slack/tokens.py @@ -2,7 +2,7 @@ from typing import cast from danswer.configs.constants import KV_SLACK_BOT_TOKENS_CONFIG_KEY -from danswer.dynamic_configs.factory import get_dynamic_config_store +from danswer.key_value_store.factory import get_kv_store from danswer.server.manage.models import SlackBotTokens @@ -13,7 +13,7 @@ def fetch_tokens() -> SlackBotTokens: if app_token and bot_token: return SlackBotTokens(app_token=app_token, bot_token=bot_token) - dynamic_config_store = get_dynamic_config_store() + dynamic_config_store = get_kv_store() return SlackBotTokens( **cast(dict, dynamic_config_store.load(key=KV_SLACK_BOT_TOKENS_CONFIG_KEY)) ) @@ -22,7 +22,7 @@ def fetch_tokens() -> SlackBotTokens: def save_tokens( tokens: SlackBotTokens, ) -> None: - dynamic_config_store = get_dynamic_config_store() + dynamic_config_store = get_kv_store() dynamic_config_store.store( key=KV_SLACK_BOT_TOKENS_CONFIG_KEY, val=dict(tokens), encrypt=True ) diff --git a/backend/danswer/danswerbot/slack/utils.py b/backend/danswer/danswerbot/slack/utils.py index d762dde7826..345f3605bd5 100644 --- a/backend/danswer/danswerbot/slack/utils.py +++ b/backend/danswer/danswerbot/slack/utils.py @@ -12,7 +12,7 @@ from slack_sdk.errors import SlackApiError from slack_sdk.models.blocks import Block from slack_sdk.models.metadata import Metadata -from sqlalchemy.orm import Session +from slack_sdk.socket_mode import SocketModeClient from danswer.configs.app_configs import DISABLE_TELEMETRY from danswer.configs.constants import ID_SEPARATOR @@ -31,7 +31,7 @@ from danswer.connectors.slack.utils import SlackTextCleaner from danswer.danswerbot.slack.constants import FeedbackVisibility from danswer.danswerbot.slack.tokens import fetch_tokens -from danswer.db.engine import get_sqlalchemy_engine +from danswer.db.engine import get_session_with_tenant from danswer.db.users import get_user_by_email from danswer.llm.exceptions import GenAIDisabledException from danswer.llm.factory import get_default_llms @@ -430,35 +430,58 @@ def read_slack_thread( replies = cast(dict, response.data).get("messages", []) for reply in replies: if "user" in reply and "bot_id" not in reply: - message = remove_danswer_bot_tag(reply["text"], client=client) - user_sem_id = fetch_user_semantic_id_from_id(reply["user"], client) + message = reply["text"] + user_sem_id = ( + fetch_user_semantic_id_from_id(reply.get("user"), client) + or "Unknown User" + ) message_type = MessageType.USER else: self_app_id = get_danswer_bot_app_id(client) - # Only include bot messages from Danswer, other bots are not taken in as context - if self_app_id != reply.get("user"): - continue - - blocks = reply["blocks"] - if len(blocks) <= 1: - continue - - # For the old flow, the useful block is the second one after the header block that says AI Answer - if reply["blocks"][0]["text"]["text"] == "AI Answer": - message = reply["blocks"][1]["text"]["text"] - else: - # for the new flow, the answer is the first block - message = reply["blocks"][0]["text"]["text"] - - if message.startswith("_Filters"): - if len(blocks) <= 2: + if reply.get("user") == self_app_id: + # DanswerBot response + message_type = MessageType.ASSISTANT + user_sem_id = "Assistant" + + # DanswerBot responses have both text and blocks + # The useful content is in the blocks, specifically the first block unless there are + # auto-detected filters + blocks = reply.get("blocks") + if not blocks: + logger.warning(f"DanswerBot response has no blocks: {reply}") continue - message = reply["blocks"][2]["text"]["text"] - user_sem_id = "Assistant" - message_type = MessageType.ASSISTANT + message = blocks[0].get("text", {}).get("text") + + # If auto-detected filters are on, use the second block for the actual answer + # The first block is the auto-detected filters + if message.startswith("_Filters"): + if len(blocks) < 2: + logger.warning(f"Only filter blocks found: {reply}") + continue + # This is the DanswerBot answer format, if there is a change to how we respond, + # this will need to be updated to get the correct "answer" portion + message = reply["blocks"][1].get("text", {}).get("text") + else: + # Other bots are not counted as the LLM response which only comes from Danswer + message_type = MessageType.USER + bot_user_name = fetch_user_semantic_id_from_id( + reply.get("user"), client + ) + user_sem_id = bot_user_name or "Unknown" + " Bot" + + # For other bots, just use the text as we have no way of knowing that the + # useful portion is + message = reply.get("text") + if not message: + message = blocks[0].get("text", {}).get("text") + + if not message: + logger.warning("Skipping Slack thread message, no text found") + continue + message = remove_danswer_bot_tag(message, client=client) thread_messages.append( ThreadMessage(message=message, sender=user_sem_id, role=message_type) ) @@ -466,7 +489,9 @@ def read_slack_thread( return thread_messages -def slack_usage_report(action: str, sender_id: str | None, client: WebClient) -> None: +def slack_usage_report( + action: str, sender_id: str | None, client: WebClient, tenant_id: str | None +) -> None: if DISABLE_TELEMETRY: return @@ -478,7 +503,7 @@ def slack_usage_report(action: str, sender_id: str | None, client: WebClient) -> logger.warning("Unable to find sender email") if sender_email is not None: - with Session(get_sqlalchemy_engine()) as db_session: + with get_session_with_tenant(tenant_id) as db_session: danswer_user = get_user_by_email(email=sender_email, db_session=db_session) optional_telemetry( @@ -554,3 +579,9 @@ def get_feedback_visibility() -> FeedbackVisibility: return FeedbackVisibility(DANSWER_BOT_FEEDBACK_VISIBILITY.lower()) except ValueError: return FeedbackVisibility.PRIVATE + + +class TenantSocketModeClient(SocketModeClient): + def __init__(self, tenant_id: str | None, *args: Any, **kwargs: Any): + super().__init__(*args, **kwargs) + self.tenant_id = tenant_id diff --git a/backend/danswer/db/auth.py b/backend/danswer/db/auth.py index 9b54e82cc1f..14eafbd1147 100644 --- a/backend/danswer/db/auth.py +++ b/backend/danswer/db/auth.py @@ -10,16 +10,19 @@ from sqlalchemy import func from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.future import select +from sqlalchemy.orm import Session +from danswer.auth.invited_users import get_invited_users from danswer.auth.schemas import UserRole from danswer.db.engine import get_async_session -from danswer.db.engine import get_sqlalchemy_async_engine +from danswer.db.engine import get_async_session_with_tenant from danswer.db.models import AccessToken from danswer.db.models import OAuthAccount from danswer.db.models import User from danswer.utils.variable_functionality import ( fetch_versioned_implementation_with_fallback, ) +from ee.danswer.db.api_key import get_api_key_email_pattern def get_default_admin_user_emails() -> list[str]: @@ -33,10 +36,24 @@ def get_default_admin_user_emails() -> list[str]: return get_default_admin_user_emails_fn() +def get_total_users_count(db_session: Session) -> int: + """ + Returns the total number of users in the system. + This is the sum of users and invited users. + """ + user_count = ( + db_session.query(User) + .filter(~User.email.endswith(get_api_key_email_pattern())) # type: ignore + .count() + ) + invited_users = len(get_invited_users()) + return user_count + invited_users + + async def get_user_count() -> int: - async with AsyncSession(get_sqlalchemy_async_engine()) as asession: + async with get_async_session_with_tenant() as session: stmt = select(func.count(User.id)) - result = await asession.execute(stmt) + result = await session.execute(stmt) user_count = result.scalar() if user_count is None: raise RuntimeError("Was not able to fetch the user count.") diff --git a/backend/danswer/db/chat.py b/backend/danswer/db/chat.py index feb2e2b4b51..4aaee092972 100644 --- a/backend/danswer/db/chat.py +++ b/backend/danswer/db/chat.py @@ -43,7 +43,7 @@ def get_chat_session_by_id( - chat_session_id: int, + chat_session_id: UUID, user_id: UUID | None, db_session: Session, include_deleted: bool = False, @@ -87,9 +87,9 @@ def get_chat_sessions_by_slack_thread_id( def get_valid_messages_from_query_sessions( - chat_session_ids: list[int], + chat_session_ids: list[UUID], db_session: Session, -) -> dict[int, str]: +) -> dict[UUID, str]: user_message_subquery = ( select( ChatMessage.chat_session_id, func.min(ChatMessage.id).label("user_msg_id") @@ -196,7 +196,7 @@ def delete_orphaned_search_docs(db_session: Session) -> None: def delete_messages_and_files_from_chat_session( - chat_session_id: int, db_session: Session + chat_session_id: UUID, db_session: Session ) -> None: # Select messages older than cutoff_time with files messages_with_files = db_session.execute( @@ -253,7 +253,7 @@ def create_chat_session( def update_chat_session( db_session: Session, user_id: UUID | None, - chat_session_id: int, + chat_session_id: UUID, description: str | None = None, sharing_status: ChatSessionSharedStatus | None = None, ) -> ChatSession: @@ -276,7 +276,7 @@ def update_chat_session( def delete_chat_session( user_id: UUID | None, - chat_session_id: int, + chat_session_id: UUID, db_session: Session, hard_delete: bool = HARD_DELETE_CHATS, ) -> None: @@ -337,7 +337,7 @@ def get_chat_message( def get_chat_messages_by_sessions( - chat_session_ids: list[int], + chat_session_ids: list[UUID], user_id: UUID | None, db_session: Session, skip_permission_check: bool = False, @@ -370,7 +370,7 @@ def get_search_docs_for_chat_message( def get_chat_messages_by_session( - chat_session_id: int, + chat_session_id: UUID, user_id: UUID | None, db_session: Session, skip_permission_check: bool = False, @@ -388,7 +388,7 @@ def get_chat_messages_by_session( ) if prefetch_tool_calls: - stmt = stmt.options(joinedload(ChatMessage.tool_calls)) + stmt = stmt.options(joinedload(ChatMessage.tool_call)) result = db_session.scalars(stmt).unique().all() else: result = db_session.scalars(stmt).all() @@ -397,7 +397,7 @@ def get_chat_messages_by_session( def get_or_create_root_message( - chat_session_id: int, + chat_session_id: UUID, db_session: Session, ) -> ChatMessage: try: @@ -433,7 +433,7 @@ def get_or_create_root_message( def reserve_message_id( db_session: Session, - chat_session_id: int, + chat_session_id: UUID, parent_message: int, message_type: MessageType, ) -> int: @@ -460,7 +460,7 @@ def reserve_message_id( def create_new_chat_message( - chat_session_id: int, + chat_session_id: UUID, parent_message: ChatMessage, message: str, prompt_id: int | None, @@ -474,7 +474,7 @@ def create_new_chat_message( alternate_assistant_id: int | None = None, # Maps the citation number [n] to the DB SearchDoc citations: dict[int, int] | None = None, - tool_calls: list[ToolCall] | None = None, + tool_call: ToolCall | None = None, commit: bool = True, reserved_message_id: int | None = None, overridden_model: str | None = None, @@ -494,7 +494,7 @@ def create_new_chat_message( existing_message.message_type = message_type existing_message.citations = citations existing_message.files = files - existing_message.tool_calls = tool_calls if tool_calls else [] + existing_message.tool_call = tool_call existing_message.error = error existing_message.alternate_assistant_id = alternate_assistant_id existing_message.overridden_model = overridden_model @@ -513,7 +513,7 @@ def create_new_chat_message( message_type=message_type, citations=citations, files=files, - tool_calls=tool_calls if tool_calls else [], + tool_call=tool_call, error=error, alternate_assistant_id=alternate_assistant_id, overridden_model=overridden_model, @@ -749,14 +749,13 @@ def translate_db_message_to_chat_message_detail( time_sent=chat_message.time_sent, citations=chat_message.citations, files=chat_message.files or [], - tool_calls=[ - ToolCallFinalResult( - tool_name=tool_call.tool_name, - tool_args=tool_call.tool_arguments, - tool_result=tool_call.tool_result, - ) - for tool_call in chat_message.tool_calls - ], + tool_call=ToolCallFinalResult( + tool_name=chat_message.tool_call.tool_name, + tool_args=chat_message.tool_call.tool_arguments, + tool_result=chat_message.tool_call.tool_result, + ) + if chat_message.tool_call + else None, alternate_assistant_id=chat_message.alternate_assistant_id, overridden_model=chat_message.overridden_model, ) diff --git a/backend/danswer/db/connector.py b/backend/danswer/db/connector.py index 89e6977103e..835f74d437c 100644 --- a/backend/danswer/db/connector.py +++ b/backend/danswer/db/connector.py @@ -1,3 +1,5 @@ +from datetime import datetime +from datetime import timezone from typing import cast from sqlalchemy import and_ @@ -246,7 +248,7 @@ def create_initial_default_connector(db_session: Session) -> None: logger.warning( "Default connector does not have expected values. Updating to proper state." ) - # Ensure default connector has correct valuesg + # Ensure default connector has correct values default_connector.source = DocumentSource.INGESTION_API default_connector.input_type = InputType.LOAD_STATE default_connector.refresh_freq = None @@ -268,3 +270,15 @@ def create_initial_default_connector(db_session: Session) -> None: ) db_session.add(connector) db_session.commit() + + +def mark_ccpair_as_pruned(cc_pair_id: int, db_session: Session) -> None: + stmt = select(ConnectorCredentialPair).where( + ConnectorCredentialPair.id == cc_pair_id + ) + cc_pair = db_session.scalar(stmt) + if cc_pair is None: + raise ValueError(f"No cc_pair with ID: {cc_pair_id}") + + cc_pair.last_pruned = datetime.now(timezone.utc) + db_session.commit() diff --git a/backend/danswer/db/connector_credential_pair.py b/backend/danswer/db/connector_credential_pair.py index f9d79df96ae..b333dd78603 100644 --- a/backend/danswer/db/connector_credential_pair.py +++ b/backend/danswer/db/connector_credential_pair.py @@ -341,6 +341,8 @@ def add_credential_to_connector( access_type: AccessType, groups: list[int] | None, auto_sync_options: dict | None = None, + initial_status: ConnectorCredentialPairStatus = ConnectorCredentialPairStatus.ACTIVE, + last_successful_index_time: datetime | None = None, ) -> StatusResponse: connector = fetch_connector_by_id(connector_id, db_session) credential = fetch_credential_by_id(credential_id, user, db_session) @@ -384,12 +386,14 @@ def add_credential_to_connector( connector_id=connector_id, credential_id=credential_id, name=cc_pair_name, - status=ConnectorCredentialPairStatus.ACTIVE, + status=initial_status, access_type=access_type, auto_sync_options=auto_sync_options, + last_successful_index_time=last_successful_index_time, ) db_session.add(association) db_session.flush() # make sure the association has an id + db_session.refresh(association) if groups and access_type != AccessType.SYNC: _relate_groups_to_cc_pair__no_commit( diff --git a/backend/danswer/db/credentials.py b/backend/danswer/db/credentials.py index abab904cc48..86532dfc542 100644 --- a/backend/danswer/db/credentials.py +++ b/backend/danswer/db/credentials.py @@ -10,10 +10,8 @@ from danswer.auth.schemas import UserRole from danswer.configs.constants import DocumentSource -from danswer.connectors.gmail.constants import ( - GMAIL_DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY, -) -from danswer.connectors.google_drive.constants import ( +from danswer.configs.constants import KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY +from danswer.connectors.google_utils.shared_constants import ( DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY, ) from danswer.db.models import ConnectorCredentialPair @@ -40,6 +38,8 @@ DocumentSource.MEDIAWIKI, } +PUBLIC_CREDENTIAL_ID = 0 + def _add_user_filters( stmt: Select, @@ -242,7 +242,6 @@ def create_credential( ) db_session.add(credential) db_session.flush() # This ensures the credential gets an ID - _relate_credential_to_user_groups__no_commit( db_session=db_session, credential_id=credential.id, @@ -385,12 +384,11 @@ def delete_credential( def create_initial_public_credential(db_session: Session) -> None: - public_cred_id = 0 error_msg = ( "DB is not in a valid initial state." "There must exist an empty public credential for data connectors that do not require additional Auth." ) - first_credential = fetch_credential_by_id(public_cred_id, None, db_session) + first_credential = fetch_credential_by_id(PUBLIC_CREDENTIAL_ID, None, db_session) if first_credential is not None: if first_credential.credential_json != {} or first_credential.user is not None: @@ -398,7 +396,7 @@ def create_initial_public_credential(db_session: Session) -> None: return credential = Credential( - id=public_cred_id, + id=PUBLIC_CREDENTIAL_ID, credential_json={}, user_id=None, ) @@ -406,14 +404,30 @@ def create_initial_public_credential(db_session: Session) -> None: db_session.commit() +def cleanup_gmail_credentials(db_session: Session) -> None: + gmail_credentials = fetch_credentials_by_source( + db_session=db_session, user=None, document_source=DocumentSource.GMAIL + ) + for credential in gmail_credentials: + db_session.delete(credential) + db_session.commit() + + +def cleanup_google_drive_credentials(db_session: Session) -> None: + google_drive_credentials = fetch_credentials_by_source( + db_session=db_session, user=None, document_source=DocumentSource.GOOGLE_DRIVE + ) + for credential in google_drive_credentials: + db_session.delete(credential) + db_session.commit() + + def delete_gmail_service_account_credentials( user: User | None, db_session: Session ) -> None: credentials = fetch_credentials(db_session=db_session, user=user) for credential in credentials: - if credential.credential_json.get( - GMAIL_DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY - ): + if credential.credential_json.get(DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY): db_session.delete(credential) db_session.commit() @@ -424,7 +438,7 @@ def delete_google_drive_service_account_credentials( ) -> None: credentials = fetch_credentials(db_session=db_session, user=user) for credential in credentials: - if credential.credential_json.get(DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY): + if credential.credential_json.get(KV_GOOGLE_DRIVE_SERVICE_ACCOUNT_KEY): db_session.delete(credential) db_session.commit() diff --git a/backend/danswer/db/document.py b/backend/danswer/db/document.py index 8aee28aef05..2e142a2c0b5 100644 --- a/backend/danswer/db/document.py +++ b/backend/danswer/db/document.py @@ -375,6 +375,20 @@ def update_docs_last_modified__no_commit( doc.last_modified = now +def mark_document_as_modified( + document_id: str, + db_session: Session, +) -> None: + stmt = select(DbDocument).where(DbDocument.id == document_id) + doc = db_session.scalar(stmt) + if doc is None: + raise ValueError(f"No document with ID: {document_id}") + + # update last_synced + doc.last_modified = datetime.now(timezone.utc) + db_session.commit() + + def mark_document_as_synced(document_id: str, db_session: Session) -> None: stmt = select(DbDocument).where(DbDocument.id == document_id) doc = db_session.scalar(stmt) diff --git a/backend/danswer/db/document_set.py b/backend/danswer/db/document_set.py index 0ba6c4e9ab3..b5af99b22d4 100644 --- a/backend/danswer/db/document_set.py +++ b/backend/danswer/db/document_set.py @@ -398,7 +398,7 @@ def mark_document_set_as_to_be_deleted( def delete_document_set_cc_pair_relationship__no_commit( connector_id: int, credential_id: int, db_session: Session -) -> None: +) -> int: """Deletes all rows from DocumentSet__ConnectorCredentialPair where the connector_credential_pair_id matches the given cc_pair_id.""" delete_stmt = delete(DocumentSet__ConnectorCredentialPair).where( @@ -409,7 +409,8 @@ def delete_document_set_cc_pair_relationship__no_commit( == ConnectorCredentialPair.id, ) ) - db_session.execute(delete_stmt) + result = db_session.execute(delete_stmt) + return result.rowcount # type: ignore def fetch_document_sets( diff --git a/backend/danswer/db/engine.py b/backend/danswer/db/engine.py index af44498be24..511424e214e 100644 --- a/backend/danswer/db/engine.py +++ b/backend/danswer/db/engine.py @@ -1,12 +1,18 @@ import contextlib +import re import threading import time from collections.abc import AsyncGenerator from collections.abc import Generator +from contextlib import asynccontextmanager +from contextlib import contextmanager from datetime import datetime from typing import Any from typing import ContextManager +import jwt +from fastapi import HTTPException +from fastapi import Request from sqlalchemy import event from sqlalchemy import text from sqlalchemy.engine import create_engine @@ -19,15 +25,23 @@ from danswer.configs.app_configs import LOG_POSTGRES_CONN_COUNTS from danswer.configs.app_configs import LOG_POSTGRES_LATENCY +from danswer.configs.app_configs import POSTGRES_API_SERVER_POOL_OVERFLOW +from danswer.configs.app_configs import POSTGRES_API_SERVER_POOL_SIZE from danswer.configs.app_configs import POSTGRES_DB from danswer.configs.app_configs import POSTGRES_HOST +from danswer.configs.app_configs import POSTGRES_IDLE_SESSIONS_TIMEOUT from danswer.configs.app_configs import POSTGRES_PASSWORD from danswer.configs.app_configs import POSTGRES_POOL_PRE_PING from danswer.configs.app_configs import POSTGRES_POOL_RECYCLE from danswer.configs.app_configs import POSTGRES_PORT from danswer.configs.app_configs import POSTGRES_USER +from danswer.configs.app_configs import USER_AUTH_SECRET from danswer.configs.constants import POSTGRES_UNKNOWN_APP_NAME from danswer.utils.logger import setup_logger +from shared_configs.configs import MULTI_TENANT +from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA +from shared_configs.configs import TENANT_ID_PREFIX +from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR logger = setup_logger() @@ -37,11 +51,10 @@ # global so we don't create more than one engine per process # outside of being best practice, this is needed so we can properly pool # connections and not create a new pool on every request -_ASYNC_ENGINE: AsyncEngine | None = None +_ASYNC_ENGINE: AsyncEngine | None = None SessionFactory: sessionmaker[Session] | None = None - if LOG_POSTGRES_LATENCY: # Function to log before query execution @event.listens_for(Engine, "before_cursor_execute") @@ -105,10 +118,19 @@ def get_db_current_time(db_session: Session) -> datetime: return result +# Regular expression to validate schema names to prevent SQL injection +SCHEMA_NAME_REGEX = re.compile(r"^[a-zA-Z0-9_-]+$") + + +def is_valid_schema_name(name: str) -> bool: + return SCHEMA_NAME_REGEX.match(name) is not None + + class SqlEngine: - """Class to manage a global sql alchemy engine (needed for proper resource control) + """Class to manage a global SQLAlchemy engine (needed for proper resource control). Will eventually subsume most of the standalone functions in this file. - Sync only for now""" + Sync only for now. + """ _engine: Engine | None = None _lock: threading.Lock = threading.Lock() @@ -116,8 +138,8 @@ class SqlEngine: # Default parameters for engine creation DEFAULT_ENGINE_KWARGS = { - "pool_size": 40, - "max_overflow": 10, + "pool_size": 20, + "max_overflow": 5, "pool_pre_ping": POSTGRES_POOL_PRE_PING, "pool_recycle": POSTGRES_POOL_RECYCLE, } @@ -137,16 +159,18 @@ def _init_engine(cls, **engine_kwargs: Any) -> Engine: @classmethod def init_engine(cls, **engine_kwargs: Any) -> None: """Allow the caller to init the engine with extra params. Different clients - such as the API server and different celery workers and tasks - need different settings.""" + such as the API server and different Celery workers and tasks + need different settings. + """ with cls._lock: if not cls._engine: cls._engine = cls._init_engine(**engine_kwargs) @classmethod def get_engine(cls) -> Engine: - """Gets the sql alchemy engine. Will init a default engine if init hasn't - already been called. You probably want to init first!""" + """Gets the SQLAlchemy engine. Will init a default engine if init hasn't + already been called. You probably want to init first! + """ if not cls._engine: with cls._lock: if not cls._engine: @@ -166,6 +190,29 @@ def get_app_name(cls) -> str: return cls._app_name +def get_all_tenant_ids() -> list[str] | list[None]: + if not MULTI_TENANT: + return [None] + with get_session_with_tenant(tenant_id=POSTGRES_DEFAULT_SCHEMA) as session: + result = session.execute( + text( + f""" + SELECT schema_name + FROM information_schema.schemata + WHERE schema_name NOT IN ('pg_catalog', 'information_schema', '{POSTGRES_DEFAULT_SCHEMA}')""" + ) + ) + tenant_ids = [row[0] for row in result] + + valid_tenants = [ + tenant + for tenant in tenant_ids + if tenant is None or tenant.startswith(TENANT_ID_PREFIX) + ] + + return valid_tenants + + def build_connection_string( *, db_api: str = ASYNC_DB_API, @@ -178,14 +225,9 @@ def build_connection_string( ) -> str: if app_name: return f"postgresql+{db_api}://{user}:{password}@{host}:{port}/{db}?application_name={app_name}" - return f"postgresql+{db_api}://{user}:{password}@{host}:{port}/{db}" -def init_sqlalchemy_engine(app_name: str) -> None: - SqlEngine.set_app_name(app_name) - - def get_sqlalchemy_engine() -> Engine: return SqlEngine.get_engine() @@ -193,7 +235,7 @@ def get_sqlalchemy_engine() -> Engine: def get_sqlalchemy_async_engine() -> AsyncEngine: global _ASYNC_ENGINE if _ASYNC_ENGINE is None: - # underlying asyncpg cannot accept application_name directly in the connection string + # Underlying asyncpg cannot accept application_name directly in the connection string # https://github.com/MagicStack/asyncpg/issues/798 connection_string = build_connection_string() _ASYNC_ENGINE = create_async_engine( @@ -203,33 +245,201 @@ def get_sqlalchemy_async_engine() -> AsyncEngine: "application_name": SqlEngine.get_app_name() + "_async" } }, - pool_size=40, - max_overflow=10, + # async engine is only used by API server, so we can use those values + # here as well + pool_size=POSTGRES_API_SERVER_POOL_SIZE, + max_overflow=POSTGRES_API_SERVER_POOL_OVERFLOW, pool_pre_ping=POSTGRES_POOL_PRE_PING, pool_recycle=POSTGRES_POOL_RECYCLE, ) return _ASYNC_ENGINE -def get_session_context_manager() -> ContextManager[Session]: - return contextlib.contextmanager(get_session)() +# Dependency to get the current tenant ID +# If no token is present, uses the default schema for this use case +def get_current_tenant_id(request: Request) -> str: + """Dependency that extracts the tenant ID from the JWT token in the request and sets the context variable.""" + if not MULTI_TENANT: + tenant_id = POSTGRES_DEFAULT_SCHEMA + CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id) + return tenant_id + + token = request.cookies.get("fastapiusersauth") + if not token: + current_value = CURRENT_TENANT_ID_CONTEXTVAR.get() + # If no token is present, use the default schema or handle accordingly + return current_value + + try: + payload = jwt.decode( + token, + USER_AUTH_SECRET, + audience=["fastapi-users:auth"], + algorithms=["HS256"], + ) + tenant_id = payload.get("tenant_id", POSTGRES_DEFAULT_SCHEMA) + if not is_valid_schema_name(tenant_id): + raise HTTPException(status_code=400, detail="Invalid tenant ID format") + CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id) + + return tenant_id + except jwt.InvalidTokenError: + return CURRENT_TENANT_ID_CONTEXTVAR.get() + except Exception as e: + logger.error(f"Unexpected error in get_current_tenant_id: {str(e)}") + raise HTTPException(status_code=500, detail="Internal server error") + + +@asynccontextmanager +async def get_async_session_with_tenant( + tenant_id: str | None = None, +) -> AsyncGenerator[AsyncSession, None]: + if tenant_id is None: + tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get() + + if not is_valid_schema_name(tenant_id): + logger.error(f"Invalid tenant ID: {tenant_id}") + raise Exception("Invalid tenant ID") + + engine = get_sqlalchemy_async_engine() + async_session_factory = sessionmaker( + bind=engine, expire_on_commit=False, class_=AsyncSession + ) # type: ignore + + async with async_session_factory() as session: + try: + # Set the search_path to the tenant's schema + await session.execute(text(f'SET search_path = "{tenant_id}"')) + if POSTGRES_IDLE_SESSIONS_TIMEOUT: + await session.execute( + f"SET SESSION idle_in_transaction_session_timeout = {POSTGRES_IDLE_SESSIONS_TIMEOUT}" + ) + except Exception: + logger.exception("Error setting search_path.") + # You can choose to re-raise the exception or handle it + # Here, we'll re-raise to prevent proceeding with an incorrect session + raise + else: + yield session + + +@contextmanager +def get_session_with_tenant( + tenant_id: str | None = None, +) -> Generator[Session, None, None]: + """ + Generate a database session bound to a connection with the appropriate tenant schema set. + This preserves the tenant ID across the session and reverts to the previous tenant ID + after the session is closed. + """ + engine = get_sqlalchemy_engine() + + # Store the previous tenant ID + previous_tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get() + + if tenant_id is None: + tenant_id = previous_tenant_id + else: + CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id) + + event.listen(engine, "checkout", set_search_path_on_checkout) + + if not is_valid_schema_name(tenant_id): + raise HTTPException(status_code=400, detail="Invalid tenant ID") + + try: + # Establish a raw connection + with engine.connect() as connection: + # Access the raw DBAPI connection and set the search_path + dbapi_connection = connection.connection + + # Set the search_path outside of any transaction + cursor = dbapi_connection.cursor() + try: + cursor.execute(f'SET search_path = "{tenant_id}"') + if POSTGRES_IDLE_SESSIONS_TIMEOUT: + cursor.execute( + f"SET SESSION idle_in_transaction_session_timeout = {POSTGRES_IDLE_SESSIONS_TIMEOUT}" + ) + finally: + cursor.close() + + # Bind the session to the connection + with Session(bind=connection, expire_on_commit=False) as session: + try: + yield session + finally: + # Reset search_path to default after the session is used + if MULTI_TENANT: + cursor = dbapi_connection.cursor() + try: + cursor.execute('SET search_path TO "$user", public') + finally: + cursor.close() + + finally: + # Restore the previous tenant ID + CURRENT_TENANT_ID_CONTEXTVAR.set(previous_tenant_id) + + +def set_search_path_on_checkout( + dbapi_conn: Any, connection_record: Any, connection_proxy: Any +) -> None: + tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get() + if tenant_id and is_valid_schema_name(tenant_id): + with dbapi_conn.cursor() as cursor: + cursor.execute(f'SET search_path TO "{tenant_id}"') + + +def get_session_generator_with_tenant() -> Generator[Session, None, None]: + tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get() + with get_session_with_tenant(tenant_id) as session: + yield session def get_session() -> Generator[Session, None, None]: - # The line below was added to monitor the latency caused by Postgres connections - # during API calls. - # with tracer.trace("db.get_session"): - with Session(get_sqlalchemy_engine(), expire_on_commit=False) as session: + """Generate a database session with the appropriate tenant schema set.""" + tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get() + if tenant_id == POSTGRES_DEFAULT_SCHEMA and MULTI_TENANT: + raise HTTPException(status_code=401, detail="User must authenticate") + + engine = get_sqlalchemy_engine() + + with Session(engine, expire_on_commit=False) as session: + if MULTI_TENANT: + if not is_valid_schema_name(tenant_id): + raise HTTPException(status_code=400, detail="Invalid tenant ID") + # Set the search_path to the tenant's schema + session.execute(text(f'SET search_path = "{tenant_id}"')) yield session async def get_async_session() -> AsyncGenerator[AsyncSession, None]: - async with AsyncSession( - get_sqlalchemy_async_engine(), expire_on_commit=False - ) as async_session: + """Generate an async database session with the appropriate tenant schema set.""" + tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get() + engine = get_sqlalchemy_async_engine() + async with AsyncSession(engine, expire_on_commit=False) as async_session: + if MULTI_TENANT: + if not is_valid_schema_name(tenant_id): + raise HTTPException(status_code=400, detail="Invalid tenant ID") + # Set the search_path to the tenant's schema + await async_session.execute(text(f'SET search_path = "{tenant_id}"')) yield async_session +def get_session_context_manager() -> ContextManager[Session]: + """Context manager for database sessions.""" + return contextlib.contextmanager(get_session_generator_with_tenant)() + + +def get_session_factory() -> sessionmaker[Session]: + """Get a session factory.""" + global SessionFactory + if SessionFactory is None: + SessionFactory = sessionmaker(bind=get_sqlalchemy_engine()) + return SessionFactory + + async def warm_up_connections( sync_connections_to_warm_up: int = 20, async_connections_to_warm_up: int = 20 ) -> None: @@ -251,10 +461,3 @@ async def warm_up_connections( await async_conn.execute(text("SELECT 1")) for async_conn in async_connections: await async_conn.close() - - -def get_session_factory() -> sessionmaker[Session]: - global SessionFactory - if SessionFactory is None: - SessionFactory = sessionmaker(bind=get_sqlalchemy_engine()) - return SessionFactory diff --git a/backend/danswer/db/index_attempt.py b/backend/danswer/db/index_attempt.py index 32e20d065c0..b9c3d9d4ca2 100644 --- a/backend/danswer/db/index_attempt.py +++ b/backend/danswer/db/index_attempt.py @@ -1,4 +1,7 @@ from collections.abc import Sequence +from datetime import datetime +from datetime import timedelta +from datetime import timezone from sqlalchemy import and_ from sqlalchemy import delete @@ -19,8 +22,6 @@ from danswer.server.documents.models import ConnectorCredentialPair from danswer.server.documents.models import ConnectorCredentialPairIdentifier from danswer.utils.logger import setup_logger -from danswer.utils.telemetry import optional_telemetry -from danswer.utils.telemetry import RecordType logger = setup_logger() @@ -66,7 +67,33 @@ def create_index_attempt( return new_attempt.id -def get_inprogress_index_attempts( +def mock_successful_index_attempt( + connector_credential_pair_id: int, + search_settings_id: int, + docs_indexed: int, + db_session: Session, +) -> int: + """Should not be used in any user triggered flows""" + db_time = func.now() + new_attempt = IndexAttempt( + connector_credential_pair_id=connector_credential_pair_id, + search_settings_id=search_settings_id, + from_beginning=True, + status=IndexingStatus.SUCCESS, + total_docs_indexed=docs_indexed, + new_docs_indexed=docs_indexed, + # Need this to be some convincing random looking value and it can't be 0 + # or the indexing rate would calculate out to infinity + time_started=db_time - timedelta(seconds=1.92), + time_updated=db_time, + ) + db_session.add(new_attempt) + db_session.commit() + + return new_attempt.id + + +def get_in_progress_index_attempts( connector_id: int | None, db_session: Session, ) -> list[IndexAttempt]: @@ -81,13 +108,15 @@ def get_inprogress_index_attempts( return list(incomplete_attempts.all()) -def get_not_started_index_attempts(db_session: Session) -> list[IndexAttempt]: +def get_all_index_attempts_by_status( + status: IndexingStatus, db_session: Session +) -> list[IndexAttempt]: """This eagerly loads the connector and credential so that the db_session can be expired before running long-living indexing jobs, which causes increasing memory usage. Results are ordered by time_created (oldest to newest).""" stmt = select(IndexAttempt) - stmt = stmt.where(IndexAttempt.status == IndexingStatus.NOT_STARTED) + stmt = stmt.where(IndexAttempt.status == status) stmt = stmt.order_by(IndexAttempt.time_created) stmt = stmt.options( joinedload(IndexAttempt.connector_credential_pair).joinedload( @@ -101,47 +130,116 @@ def get_not_started_index_attempts(db_session: Session) -> list[IndexAttempt]: return list(new_attempts.all()) +def transition_attempt_to_in_progress( + index_attempt_id: int, + db_session: Session, +) -> IndexAttempt: + """Locks the row when we try to update""" + try: + attempt = db_session.execute( + select(IndexAttempt) + .where(IndexAttempt.id == index_attempt_id) + .with_for_update() + ).scalar_one() + + if attempt is None: + raise RuntimeError( + f"Unable to find IndexAttempt for ID '{index_attempt_id}'" + ) + + if attempt.status != IndexingStatus.NOT_STARTED: + raise RuntimeError( + f"Indexing attempt with ID '{index_attempt_id}' is not in NOT_STARTED status. " + f"Current status is '{attempt.status}'." + ) + + attempt.status = IndexingStatus.IN_PROGRESS + attempt.time_started = attempt.time_started or func.now() # type: ignore + db_session.commit() + return attempt + except Exception: + db_session.rollback() + logger.exception("transition_attempt_to_in_progress exceptioned.") + raise + + def mark_attempt_in_progress( index_attempt: IndexAttempt, db_session: Session, ) -> None: - index_attempt.status = IndexingStatus.IN_PROGRESS - index_attempt.time_started = index_attempt.time_started or func.now() # type: ignore - db_session.commit() + try: + attempt = db_session.execute( + select(IndexAttempt) + .where(IndexAttempt.id == index_attempt.id) + .with_for_update() + ).scalar_one() + + attempt.status = IndexingStatus.IN_PROGRESS + attempt.time_started = index_attempt.time_started or func.now() # type: ignore + db_session.commit() + except Exception: + db_session.rollback() + raise def mark_attempt_succeeded( index_attempt: IndexAttempt, db_session: Session, ) -> None: - index_attempt.status = IndexingStatus.SUCCESS - db_session.add(index_attempt) - db_session.commit() + try: + attempt = db_session.execute( + select(IndexAttempt) + .where(IndexAttempt.id == index_attempt.id) + .with_for_update() + ).scalar_one() + + attempt.status = IndexingStatus.SUCCESS + db_session.commit() + except Exception: + db_session.rollback() + raise def mark_attempt_partially_succeeded( index_attempt: IndexAttempt, db_session: Session, ) -> None: - index_attempt.status = IndexingStatus.COMPLETED_WITH_ERRORS - db_session.add(index_attempt) - db_session.commit() + try: + attempt = db_session.execute( + select(IndexAttempt) + .where(IndexAttempt.id == index_attempt.id) + .with_for_update() + ).scalar_one() + + attempt.status = IndexingStatus.COMPLETED_WITH_ERRORS + db_session.commit() + except Exception: + db_session.rollback() + raise def mark_attempt_failed( - index_attempt: IndexAttempt, + index_attempt_id: int, db_session: Session, failure_reason: str = "Unknown", full_exception_trace: str | None = None, ) -> None: - index_attempt.status = IndexingStatus.FAILED - index_attempt.error_msg = failure_reason - index_attempt.full_exception_trace = full_exception_trace - db_session.add(index_attempt) - db_session.commit() - - source = index_attempt.connector_credential_pair.connector.source - optional_telemetry(record_type=RecordType.FAILURE, data={"connector": source}) + try: + attempt = db_session.execute( + select(IndexAttempt) + .where(IndexAttempt.id == index_attempt_id) + .with_for_update() + ).scalar_one() + + if not attempt.time_started: + attempt.time_started = datetime.now(timezone.utc) + attempt.status = IndexingStatus.FAILED + attempt.error_msg = failure_reason + attempt.full_exception_trace = full_exception_trace + db_session.commit() + except Exception: + db_session.rollback() + raise def update_docs_indexed( @@ -435,14 +533,13 @@ def cancel_indexing_attempts_for_ccpair( db_session.execute(stmt) - db_session.commit() - def cancel_indexing_attempts_past_model( db_session: Session, ) -> None: """Stops all indexing attempts that are in progress or not started for any embedding model that not present/future""" + db_session.execute( update(IndexAttempt) .where( @@ -455,8 +552,6 @@ def cancel_indexing_attempts_past_model( .values(status=IndexingStatus.FAILED) ) - db_session.commit() - def count_unique_cc_pairs_with_successful_index_attempts( search_settings_id: int | None, diff --git a/backend/danswer/db/llm.py b/backend/danswer/db/llm.py index af2ded9562a..b01fd81079c 100644 --- a/backend/danswer/db/llm.py +++ b/backend/danswer/db/llm.py @@ -83,6 +83,7 @@ def upsert_llm_provider( existing_llm_provider.model_names = llm_provider.model_names existing_llm_provider.is_public = llm_provider.is_public existing_llm_provider.display_model_names = llm_provider.display_model_names + existing_llm_provider.deployment_name = llm_provider.deployment_name if not existing_llm_provider.id: # If its not already in the db, we need to generate an ID by flushing @@ -94,10 +95,11 @@ def upsert_llm_provider( group_ids=llm_provider.groups, db_session=db_session, ) + full_llm_provider = FullLLMProvider.from_model(existing_llm_provider) db_session.commit() - return FullLLMProvider.from_model(existing_llm_provider) + return full_llm_provider def fetch_existing_embedding_providers( diff --git a/backend/danswer/db/models.py b/backend/danswer/db/models.py index fff6b12336d..3ff2133155b 100644 --- a/backend/danswer/db/models.py +++ b/backend/danswer/db/models.py @@ -5,9 +5,12 @@ from typing import Literal from typing import NotRequired from typing import Optional +from uuid import uuid4 from typing_extensions import TypedDict # noreorder from uuid import UUID +from sqlalchemy.dialects.postgresql import UUID as PGUUID + from fastapi_users_db_sqlalchemy import SQLAlchemyBaseOAuthAccountTableUUID from fastapi_users_db_sqlalchemy import SQLAlchemyBaseUserTableUUID from fastapi_users_db_sqlalchemy.access_token import SQLAlchemyBaseAccessTokenTableUUID @@ -50,13 +53,14 @@ from danswer.db.enums import IndexModelStatus from danswer.db.enums import TaskStatus from danswer.db.pydantic_type import PydanticType -from danswer.dynamic_configs.interface import JSON_ro +from danswer.key_value_store.interface import JSON_ro from danswer.file_store.models import FileDescriptor from danswer.llm.override_models import LLMOverride from danswer.llm.override_models import PromptOverride from danswer.search.enums import RecencyBiasSetting from danswer.utils.encryption import decrypt_bytes_to_string from danswer.utils.encryption import encrypt_string_to_bytes +from danswer.utils.headers import HeaderItemDict from shared_configs.enums import EmbeddingProvider from shared_configs.enums import RerankerProvider @@ -231,6 +235,9 @@ class Notification(Base): first_shown: Mapped[datetime.datetime] = mapped_column(DateTime(timezone=True)) user: Mapped[User] = relationship("User", back_populates="notifications") + additional_data: Mapped[dict | None] = mapped_column( + postgresql.JSONB(), nullable=True + ) """ @@ -414,6 +421,12 @@ class ConnectorCredentialPair(Base): last_successful_index_time: Mapped[datetime.datetime | None] = mapped_column( DateTime(timezone=True), default=None ) + + # last successful prune + last_pruned: Mapped[datetime.datetime | None] = mapped_column( + DateTime(timezone=True), nullable=True, index=True + ) + total_docs_indexed: Mapped[int] = mapped_column(Integer, default=0) connector: Mapped["Connector"] = relationship( @@ -609,6 +622,7 @@ class SearchSettings(Base): normalize: Mapped[bool] = mapped_column(Boolean) query_prefix: Mapped[str | None] = mapped_column(String, nullable=True) passage_prefix: Mapped[str | None] = mapped_column(String, nullable=True) + status: Mapped[IndexModelStatus] = mapped_column( Enum(IndexModelStatus, native_enum=False) ) @@ -664,6 +678,20 @@ def __repr__(self) -> str: return f"" + @property + def api_version(self) -> str | None: + return ( + self.cloud_provider.api_version if self.cloud_provider is not None else None + ) + + @property + def deployment_name(self) -> str | None: + return ( + self.cloud_provider.deployment_name + if self.cloud_provider is not None + else None + ) + @property def api_url(self) -> str | None: return self.cloud_provider.api_url if self.cloud_provider is not None else None @@ -706,9 +734,10 @@ class IndexAttempt(Base): full_exception_trace: Mapped[str | None] = mapped_column(Text, default=None) # Nullable because in the past, we didn't allow swapping out embedding models live search_settings_id: Mapped[int] = mapped_column( - ForeignKey("search_settings.id"), - nullable=False, + ForeignKey("search_settings.id", ondelete="SET NULL"), + nullable=True, ) + time_created: Mapped[datetime.datetime] = mapped_column( DateTime(timezone=True), server_default=func.now(), @@ -728,7 +757,7 @@ class IndexAttempt(Base): "ConnectorCredentialPair", back_populates="index_attempts" ) - search_settings: Mapped[SearchSettings] = relationship( + search_settings: Mapped[SearchSettings | None] = relationship( "SearchSettings", back_populates="index_attempts" ) @@ -889,17 +918,24 @@ class ToolCall(Base): tool_arguments: Mapped[dict[str, JSON_ro]] = mapped_column(postgresql.JSONB()) tool_result: Mapped[JSON_ro] = mapped_column(postgresql.JSONB()) - message_id: Mapped[int] = mapped_column(ForeignKey("chat_message.id")) + message_id: Mapped[int | None] = mapped_column( + ForeignKey("chat_message.id"), nullable=False + ) + # Update the relationship message: Mapped["ChatMessage"] = relationship( - "ChatMessage", back_populates="tool_calls" + "ChatMessage", + back_populates="tool_call", + uselist=False, ) class ChatSession(Base): __tablename__ = "chat_session" - id: Mapped[int] = mapped_column(primary_key=True) + id: Mapped[UUID] = mapped_column( + PGUUID(as_uuid=True), primary_key=True, default=uuid4 + ) user_id: Mapped[UUID | None] = mapped_column( ForeignKey("user.id", ondelete="CASCADE"), nullable=True ) @@ -969,7 +1005,9 @@ class ChatMessage(Base): __tablename__ = "chat_message" id: Mapped[int] = mapped_column(primary_key=True) - chat_session_id: Mapped[int] = mapped_column(ForeignKey("chat_session.id")) + chat_session_id: Mapped[UUID] = mapped_column( + PGUUID(as_uuid=True), ForeignKey("chat_session.id") + ) alternate_assistant_id = mapped_column( Integer, ForeignKey("persona.id"), nullable=True @@ -1019,12 +1057,13 @@ class ChatMessage(Base): secondary=ChatMessage__SearchDoc.__table__, back_populates="chat_messages", ) - # NOTE: Should always be attached to the `assistant` message. - # represents the tool calls used to generate this message - tool_calls: Mapped[list["ToolCall"]] = relationship( + + tool_call: Mapped["ToolCall"] = relationship( "ToolCall", back_populates="message", + uselist=False, ) + standard_answers: Mapped[list["StandardAnswer"]] = relationship( "StandardAnswer", secondary=ChatMessage__StandardAnswer.__table__, @@ -1137,6 +1176,8 @@ class LLMProvider(Base): postgresql.ARRAY(String), nullable=True ) + deployment_name: Mapped[str | None] = mapped_column(String, nullable=True) + # should only be set for a single provider is_default_provider: Mapped[bool | None] = mapped_column(Boolean, unique=True) # EE only @@ -1156,6 +1197,9 @@ class CloudEmbeddingProvider(Base): ) api_url: Mapped[str | None] = mapped_column(String, nullable=True) api_key: Mapped[str | None] = mapped_column(EncryptedString()) + api_version: Mapped[str | None] = mapped_column(String, nullable=True) + deployment_name: Mapped[str | None] = mapped_column(String, nullable=True) + search_settings: Mapped[list["SearchSettings"]] = relationship( "SearchSettings", back_populates="cloud_provider", @@ -1255,7 +1299,7 @@ class Tool(Base): openapi_schema: Mapped[dict[str, Any] | None] = mapped_column( postgresql.JSONB(), nullable=True ) - custom_headers: Mapped[list[dict[str, str]] | None] = mapped_column( + custom_headers: Mapped[list[HeaderItemDict] | None] = mapped_column( postgresql.JSONB(), nullable=True ) # user who created / owns the tool. Will be None for built-in tools. @@ -1755,3 +1799,23 @@ class UsageReport(Base): requestor = relationship("User") file = relationship("PGFileStore") + + +""" +Multi-tenancy related tables +""" + + +class PublicBase(DeclarativeBase): + __abstract__ = True + + +class UserTenantMapping(Base): + __tablename__ = "user_tenant_mapping" + __table_args__ = ( + UniqueConstraint("email", "tenant_id", name="uq_user_tenant"), + {"schema": "public"}, + ) + + email: Mapped[str] = mapped_column(String, nullable=False, primary_key=True) + tenant_id: Mapped[str] = mapped_column(String, nullable=False) diff --git a/backend/danswer/db/notification.py b/backend/danswer/db/notification.py index 61586208c69..a6cdf989177 100644 --- a/backend/danswer/db/notification.py +++ b/backend/danswer/db/notification.py @@ -1,23 +1,47 @@ +from uuid import UUID + from sqlalchemy import select from sqlalchemy.orm import Session from sqlalchemy.sql import func +from danswer.auth.schemas import UserRole from danswer.configs.constants import NotificationType from danswer.db.models import Notification from danswer.db.models import User def create_notification( - user: User | None, + user_id: UUID | None, notif_type: NotificationType, db_session: Session, + additional_data: dict | None = None, ) -> Notification: + # Check if an undismissed notification of the same type and data exists + existing_notification = ( + db_session.query(Notification) + .filter_by( + user_id=user_id, + notif_type=notif_type, + dismissed=False, + ) + .filter(Notification.additional_data == additional_data) + .first() + ) + + if existing_notification: + # Update the last_shown timestamp + existing_notification.last_shown = func.now() + db_session.commit() + return existing_notification + + # Create a new notification if none exists notification = Notification( - user_id=user.id if user else None, + user_id=user_id, notif_type=notif_type, dismissed=False, last_shown=func.now(), first_shown=func.now(), + additional_data=additional_data, ) db_session.add(notification) db_session.commit() @@ -31,7 +55,9 @@ def get_notification_by_id( notif = db_session.get(Notification, notification_id) if not notif: raise ValueError(f"No notification found with id {notification_id}") - if notif.user_id != user_id: + if notif.user_id != user_id and not ( + notif.user_id is None and user is not None and user.role == UserRole.ADMIN + ): raise PermissionError( f"User {user_id} is not authorized to access notification {notification_id}" ) diff --git a/backend/danswer/db/persona.py b/backend/danswer/db/persona.py index 36d2d25c402..f23b3f6d071 100644 --- a/backend/danswer/db/persona.py +++ b/backend/danswer/db/persona.py @@ -328,7 +328,6 @@ def update_all_personas_display_priority( for persona in personas: persona.display_priority = display_priority_map[persona.id] - db_session.commit() diff --git a/backend/danswer/db/search_settings.py b/backend/danswer/db/search_settings.py index e3f35e31007..5392ec23411 100644 --- a/backend/danswer/db/search_settings.py +++ b/backend/danswer/db/search_settings.py @@ -12,7 +12,7 @@ from danswer.configs.model_configs import OLD_DEFAULT_DOCUMENT_ENCODER_MODEL from danswer.configs.model_configs import OLD_DEFAULT_MODEL_DOC_EMBEDDING_DIM from danswer.configs.model_configs import OLD_DEFAULT_MODEL_NORMALIZE_EMBEDDINGS -from danswer.db.engine import get_sqlalchemy_engine +from danswer.db.engine import get_session_with_tenant from danswer.db.llm import fetch_embedding_provider from danswer.db.models import CloudEmbeddingProvider from danswer.db.models import IndexAttempt @@ -152,7 +152,7 @@ def get_all_search_settings(db_session: Session) -> list[SearchSettings]: def get_multilingual_expansion(db_session: Session | None = None) -> list[str]: if db_session is None: - with Session(get_sqlalchemy_engine()) as db_session: + with get_session_with_tenant() as db_session: search_settings = get_current_search_settings(db_session) else: search_settings = get_current_search_settings(db_session) diff --git a/backend/danswer/db/swap_index.py b/backend/danswer/db/swap_index.py index 8f6d1718924..8b583bd2e4b 100644 --- a/backend/danswer/db/swap_index.py +++ b/backend/danswer/db/swap_index.py @@ -8,26 +8,35 @@ from danswer.db.index_attempt import ( count_unique_cc_pairs_with_successful_index_attempts, ) +from danswer.db.models import SearchSettings from danswer.db.search_settings import get_current_search_settings from danswer.db.search_settings import get_secondary_search_settings from danswer.db.search_settings import update_search_settings_status -from danswer.dynamic_configs.factory import get_dynamic_config_store +from danswer.key_value_store.factory import get_kv_store from danswer.utils.logger import setup_logger + logger = setup_logger() -def check_index_swap(db_session: Session) -> None: +def check_index_swap(db_session: Session) -> SearchSettings | None: """Get count of cc-pairs and count of successful index_attempts for the new model grouped by connector + credential, if it's the same, then assume - new index is done building. If so, swap the indices and expire the old one.""" + new index is done building. If so, swap the indices and expire the old one. + + Returns None if search settings did not change, or the old search settings if they + did change. + """ + + old_search_settings = None + # Default CC-pair created for Ingestion API unused here all_cc_pairs = get_connector_credential_pairs(db_session) cc_pair_count = max(len(all_cc_pairs) - 1, 0) search_settings = get_secondary_search_settings(db_session) if not search_settings: - return + return None unique_cc_indexings = count_unique_cc_pairs_with_successful_index_attempts( search_settings_id=search_settings.id, db_session=db_session @@ -40,9 +49,9 @@ def check_index_swap(db_session: Session) -> None: if cc_pair_count == 0 or cc_pair_count == unique_cc_indexings: # Swap indices - now_old_search_settings = get_current_search_settings(db_session) + current_search_settings = get_current_search_settings(db_session) update_search_settings_status( - search_settings=now_old_search_settings, + search_settings=current_search_settings, new_status=IndexModelStatus.PAST, db_session=db_session, ) @@ -54,7 +63,7 @@ def check_index_swap(db_session: Session) -> None: ) if cc_pair_count > 0: - kv_store = get_dynamic_config_store() + kv_store = get_kv_store() kv_store.store(KV_REINDEX_KEY, False) # Expire jobs for the now past index/embedding model @@ -63,3 +72,7 @@ def check_index_swap(db_session: Session) -> None: # Recount aggregates for cc_pair in all_cc_pairs: resync_cc_pair(cc_pair, db_session=db_session) + + old_search_settings = current_search_settings + + return old_search_settings diff --git a/backend/danswer/db/tools.py b/backend/danswer/db/tools.py index 248744b5639..0fd126d0065 100644 --- a/backend/danswer/db/tools.py +++ b/backend/danswer/db/tools.py @@ -1,4 +1,5 @@ from typing import Any +from typing import cast from uuid import UUID from sqlalchemy import select @@ -6,6 +7,7 @@ from danswer.db.models import Tool from danswer.server.features.tool.models import Header +from danswer.utils.headers import HeaderItemDict from danswer.utils.logger import setup_logger logger = setup_logger() @@ -67,7 +69,9 @@ def update_tool( if user_id is not None: tool.user_id = user_id if custom_headers is not None: - tool.custom_headers = [header.dict() for header in custom_headers] + tool.custom_headers = [ + cast(HeaderItemDict, header.model_dump()) for header in custom_headers + ] db_session.commit() return tool diff --git a/backend/danswer/document_index/factory.py b/backend/danswer/document_index/factory.py index aedaec147d0..92dde3dda43 100644 --- a/backend/danswer/document_index/factory.py +++ b/backend/danswer/document_index/factory.py @@ -3,6 +3,7 @@ from danswer.db.search_settings import get_current_search_settings from danswer.document_index.interfaces import DocumentIndex from danswer.document_index.vespa.index import VespaIndex +from shared_configs.configs import MULTI_TENANT def get_default_document_index( @@ -14,7 +15,9 @@ def get_default_document_index( index both need to be updated, updates are applied to both indices""" # Currently only supporting Vespa return VespaIndex( - index_name=primary_index_name, secondary_index_name=secondary_index_name + index_name=primary_index_name, + secondary_index_name=secondary_index_name, + multitenant=MULTI_TENANT, ) diff --git a/backend/danswer/document_index/interfaces.py b/backend/danswer/document_index/interfaces.py index b499d696743..07c1b24ab2e 100644 --- a/backend/danswer/document_index/interfaces.py +++ b/backend/danswer/document_index/interfaces.py @@ -55,6 +55,21 @@ class DocumentMetadata: from_ingestion_api: bool = False +@dataclass +class VespaDocumentFields: + """ + Specifies fields in Vespa for a document. Fields set to None will be ignored. + Perhaps we should name this in an implementation agnostic fashion, but it's more + understandable like this for now. + """ + + # all other fields except these 4 will always be left alone by the update request + access: DocumentAccess | None = None + document_sets: set[str] | None = None + boost: float | None = None + hidden: bool | None = None + + @dataclass class UpdateRequest: """ @@ -112,6 +127,17 @@ def ensure_indices_exist( """ raise NotImplementedError + @staticmethod + @abc.abstractmethod + def register_multitenant_indices( + indices: list[str], + embedding_dims: list[int], + ) -> None: + """ + Register multitenant indices with the document index. + """ + raise NotImplementedError + class Indexable(abc.ABC): """ @@ -157,7 +183,7 @@ class Deletable(abc.ABC): """ @abc.abstractmethod - def delete_single(self, doc_id: str) -> None: + def delete_single(self, doc_id: str) -> int: """ Given a single document id, hard delete it from the document index @@ -188,11 +214,9 @@ class Updatable(abc.ABC): """ @abc.abstractmethod - def update_single(self, update_request: UpdateRequest) -> None: + def update_single(self, doc_id: str, fields: VespaDocumentFields) -> int: """ - Updates some set of chunks for a document. The document and fields to update - are specified in the update request. Each update request in the list applies - its changes to a list of document ids. + Updates all chunks for a document with the specified fields. None values mean that the field does not need an update. The rationale for a single update function is that it allows retries and parallelism @@ -200,14 +224,10 @@ def update_single(self, update_request: UpdateRequest) -> None: us to individually handle error conditions per document. Parameters: - - update_request: for a list of document ids in the update request, apply the same updates - to all of the documents with those ids. + - fields: the fields to update in the document. Any field set to None will not be changed. Return: - - an HTTPStatus code. The code can used to decide whether to fail immediately, - retry, etc. Although this method likely hits an HTTP API behind the - scenes, the usage of HTTPStatus is a convenience and the interface is not - actually HTTP specific. + None """ raise NotImplementedError diff --git a/backend/danswer/document_index/vespa/app_config/schemas/danswer_chunk.sd b/backend/danswer/document_index/vespa/app_config/schemas/danswer_chunk.sd index be279f6a611..b98c9343f3f 100644 --- a/backend/danswer/document_index/vespa/app_config/schemas/danswer_chunk.sd +++ b/backend/danswer/document_index/vespa/app_config/schemas/danswer_chunk.sd @@ -1,5 +1,6 @@ schema DANSWER_CHUNK_NAME { document DANSWER_CHUNK_NAME { + TENANT_ID_REPLACEMENT # Not to be confused with the UUID generated for this chunk which is called documentid by default field document_id type string { indexing: summary | attribute diff --git a/backend/danswer/document_index/vespa/chunk_retrieval.py b/backend/danswer/document_index/vespa/chunk_retrieval.py index e4b2ad83ce2..ce95ff6fecb 100644 --- a/backend/danswer/document_index/vespa/chunk_retrieval.py +++ b/backend/danswer/document_index/vespa/chunk_retrieval.py @@ -7,11 +7,12 @@ from typing import Any from typing import cast -import requests +import httpx from retry import retry from danswer.configs.app_configs import LOG_VESPA_TIMING_INFORMATION from danswer.document_index.interfaces import VespaChunkRequest +from danswer.document_index.vespa.shared_utils.utils import get_vespa_http_client from danswer.document_index.vespa.shared_utils.vespa_request_builders import ( build_vespa_filters, ) @@ -192,20 +193,21 @@ def _get_chunks_via_visit_api( document_chunks: list[dict] = [] while True: - response = requests.get(url, params=params) try: - response.raise_for_status() - except requests.HTTPError as e: - request_info = f"Headers: {response.request.headers}\nPayload: {params}" - response_info = f"Status Code: {response.status_code}\nResponse Content: {response.text}" - error_base = f"Error occurred getting chunk by Document ID {chunk_request.document_id}" + filtered_params = {k: v for k, v in params.items() if v is not None} + with get_vespa_http_client() as http_client: + response = http_client.get(url, params=filtered_params) + response.raise_for_status() + except httpx.HTTPError as e: + error_base = "Failed to query Vespa" logger.error( f"{error_base}:\n" - f"{request_info}\n" - f"{response_info}\n" - f"Exception: {e}" + f"Request URL: {e.request.url}\n" + f"Request Headers: {e.request.headers}\n" + f"Request Payload: {params}\n" + f"Exception: {str(e)}" ) - raise requests.HTTPError(error_base) from e + raise httpx.HTTPError(error_base) from e # Check if the response contains any documents response_data = response.json() @@ -229,6 +231,7 @@ def _get_chunks_via_visit_api( return document_chunks +@retry(tries=10, delay=1, backoff=2) def get_all_vespa_ids_for_document_id( document_id: str, index_name: str, @@ -293,28 +296,24 @@ def query_vespa( if LOG_VESPA_TIMING_INFORMATION else {}, ) + try: - response = requests.post( - SEARCH_ENDPOINT, - json=params, - ) - response.raise_for_status() - except requests.HTTPError as e: - request_info = f"Headers: {response.request.headers}\nPayload: {params}" - response_info = ( - f"Status Code: {response.status_code}\n" - f"Response Content: {response.text}" - ) + with get_vespa_http_client() as http_client: + response = http_client.post(SEARCH_ENDPOINT, json=params) + response.raise_for_status() + except httpx.HTTPError as e: error_base = "Failed to query Vespa" logger.error( f"{error_base}:\n" - f"{request_info}\n" - f"{response_info}\n" - f"Exception: {e}" + f"Request URL: {e.request.url}\n" + f"Request Headers: {e.request.headers}\n" + f"Request Payload: {params}\n" + f"Exception: {str(e)}" ) - raise requests.HTTPError(error_base) from e + raise httpx.HTTPError(error_base) from e response_json: dict[str, Any] = response.json() + if LOG_VESPA_TIMING_INFORMATION: logger.debug("Vespa timing info: %s", response_json.get("timing")) hits = response_json["root"].get("children", []) diff --git a/backend/danswer/document_index/vespa/index.py b/backend/danswer/document_index/vespa/index.py index 972841bd636..c48f7bdedee 100644 --- a/backend/danswer/document_index/vespa/index.py +++ b/backend/danswer/document_index/vespa/index.py @@ -1,17 +1,20 @@ import concurrent.futures import io +import logging import os import re import time +import urllib import zipfile from dataclasses import dataclass from datetime import datetime from datetime import timedelta from typing import BinaryIO from typing import cast +from typing import List -import httpx -import requests +import httpx # type: ignore +import requests # type: ignore from danswer.configs.app_configs import DOCUMENT_INDEX_NAME from danswer.configs.chat_configs import DOC_TIME_DECAY @@ -23,6 +26,7 @@ from danswer.document_index.interfaces import DocumentInsertionRecord from danswer.document_index.interfaces import UpdateRequest from danswer.document_index.interfaces import VespaChunkRequest +from danswer.document_index.interfaces import VespaDocumentFields from danswer.document_index.vespa.chunk_retrieval import batch_search_api_retrieval from danswer.document_index.vespa.chunk_retrieval import ( get_all_vespa_ids_for_document_id, @@ -37,6 +41,7 @@ from danswer.document_index.vespa.indexing_utils import ( get_existing_documents_from_chunks, ) +from danswer.document_index.vespa.shared_utils.utils import get_vespa_http_client from danswer.document_index.vespa.shared_utils.utils import ( replace_invalid_doc_id_characters, ) @@ -55,20 +60,28 @@ from danswer.document_index.vespa_constants import HIDDEN from danswer.document_index.vespa_constants import NUM_THREADS from danswer.document_index.vespa_constants import SEARCH_THREAD_NUMBER_PAT +from danswer.document_index.vespa_constants import TENANT_ID_PAT +from danswer.document_index.vespa_constants import TENANT_ID_REPLACEMENT from danswer.document_index.vespa_constants import VESPA_APPLICATION_ENDPOINT from danswer.document_index.vespa_constants import VESPA_DIM_REPLACEMENT_PAT from danswer.document_index.vespa_constants import VESPA_TIMEOUT from danswer.document_index.vespa_constants import YQL_BASE -from danswer.dynamic_configs.factory import get_dynamic_config_store from danswer.indexing.models import DocMetadataAwareIndexChunk +from danswer.key_value_store.factory import get_kv_store from danswer.search.models import IndexFilters from danswer.search.models import InferenceChunkUncleaned from danswer.utils.batching import batch_generator from danswer.utils.logger import setup_logger +from shared_configs.configs import MULTI_TENANT from shared_configs.model_server_models import Embedding + logger = setup_logger() +# Set the logging level to WARNING to ignore INFO and DEBUG logs +httpx_logger = logging.getLogger("httpx") +httpx_logger.setLevel(logging.WARNING) + @dataclass class _VespaUpdateRequest: @@ -86,7 +99,7 @@ def in_memory_zip_from_file_bytes(file_contents: dict[str, bytes]) -> BinaryIO: return zip_buffer -def _create_document_xml_lines(doc_names: list[str | None]) -> str: +def _create_document_xml_lines(doc_names: list[str | None] | list[str]) -> str: doc_lines = [ f'' for doc_name in doc_names @@ -111,15 +124,28 @@ def add_ngrams_to_schema(schema_content: str) -> str: class VespaIndex(DocumentIndex): - def __init__(self, index_name: str, secondary_index_name: str | None) -> None: + def __init__( + self, + index_name: str, + secondary_index_name: str | None, + multitenant: bool = False, + ) -> None: self.index_name = index_name self.secondary_index_name = secondary_index_name + self.multitenant = multitenant + self.http_client = get_vespa_http_client() def ensure_indices_exist( self, index_embedding_dim: int, secondary_index_embedding_dim: int | None, ) -> None: + if MULTI_TENANT: + logger.info( + "Skipping Vespa index seup for multitenant (would wipe all indices)" + ) + return None + deploy_url = f"{VESPA_APPLICATION_ENDPOINT}/tenant/default/prepareandactivate" logger.info(f"Deploying Vespa application package to {deploy_url}") @@ -141,7 +167,7 @@ def ensure_indices_exist( SEARCH_THREAD_NUMBER_PAT, str(VESPA_SEARCHER_THREADS) ) - kv_store = get_dynamic_config_store() + kv_store = get_kv_store() needs_reindexing = False try: @@ -167,10 +193,14 @@ def ensure_indices_exist( with open(schema_file, "r") as schema_f: schema_template = schema_f.read() + schema_template = schema_template.replace(TENANT_ID_PAT, "") + schema = schema_template.replace( DANSWER_CHUNK_REPLACEMENT_PAT, self.index_name ).replace(VESPA_DIM_REPLACEMENT_PAT, str(index_embedding_dim)) + schema = add_ngrams_to_schema(schema) if needs_reindexing else schema + schema = schema.replace(TENANT_ID_PAT, "") zip_dict[f"schemas/{schema_names[0]}.sd"] = schema.encode("utf-8") if self.secondary_index_name: @@ -188,6 +218,91 @@ def ensure_indices_exist( f"Failed to prepare Vespa Danswer Index. Response: {response.text}" ) + @staticmethod + def register_multitenant_indices( + indices: list[str], + embedding_dims: list[int], + ) -> None: + if not MULTI_TENANT: + raise ValueError("Multi-tenant is not enabled") + + deploy_url = f"{VESPA_APPLICATION_ENDPOINT}/tenant/default/prepareandactivate" + logger.info(f"Deploying Vespa application package to {deploy_url}") + + vespa_schema_path = os.path.join( + os.getcwd(), "danswer", "document_index", "vespa", "app_config" + ) + schema_file = os.path.join(vespa_schema_path, "schemas", "danswer_chunk.sd") + services_file = os.path.join(vespa_schema_path, "services.xml") + overrides_file = os.path.join(vespa_schema_path, "validation-overrides.xml") + + with open(services_file, "r") as services_f: + services_template = services_f.read() + + # Generate schema names from index settings + schema_names = [index_name for index_name in indices] + + full_schemas = schema_names + + doc_lines = _create_document_xml_lines(full_schemas) + + services = services_template.replace(DOCUMENT_REPLACEMENT_PAT, doc_lines) + services = services.replace( + SEARCH_THREAD_NUMBER_PAT, str(VESPA_SEARCHER_THREADS) + ) + + kv_store = get_kv_store() + + needs_reindexing = False + try: + needs_reindexing = cast(bool, kv_store.load(KV_REINDEX_KEY)) + except Exception: + logger.debug("Could not load the reindexing flag. Using ngrams") + + with open(overrides_file, "r") as overrides_f: + overrides_template = overrides_f.read() + + # Vespa requires an override to erase data including the indices we're no longer using + # It also has a 30 day cap from current so we set it to 7 dynamically + now = datetime.now() + date_in_7_days = now + timedelta(days=7) + formatted_date = date_in_7_days.strftime("%Y-%m-%d") + + overrides = overrides_template.replace(DATE_REPLACEMENT, formatted_date) + + zip_dict = { + "services.xml": services.encode("utf-8"), + "validation-overrides.xml": overrides.encode("utf-8"), + } + + with open(schema_file, "r") as schema_f: + schema_template = schema_f.read() + + for i, index_name in enumerate(indices): + embedding_dim = embedding_dims[i] + logger.info( + f"Creating index: {index_name} with embedding dimension: {embedding_dim}" + ) + + schema = schema_template.replace( + DANSWER_CHUNK_REPLACEMENT_PAT, index_name + ).replace(VESPA_DIM_REPLACEMENT_PAT, str(embedding_dim)) + schema = schema.replace( + TENANT_ID_PAT, TENANT_ID_REPLACEMENT if MULTI_TENANT else "" + ) + schema = add_ngrams_to_schema(schema) if needs_reindexing else schema + zip_dict[f"schemas/{index_name}.sd"] = schema.encode("utf-8") + + zip_file = in_memory_zip_from_file_bytes(zip_dict) + + headers = {"Content-Type": "application/zip"} + response = requests.post(deploy_url, headers=headers, data=zip_file) + + if response.status_code != 200: + raise RuntimeError( + f"Failed to prepare Vespa Danswer Indexes. Response: {response.text}" + ) + def index( self, chunks: list[DocMetadataAwareIndexChunk], @@ -205,7 +320,7 @@ def index( # indexing / updates / deletes since we have to make a large volume of requests. with ( concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor, - httpx.Client(http2=True) as http_client, + get_vespa_http_client() as http_client, ): # Check for existing documents, existing documents need to have all of their chunks deleted # prior to indexing as the document size (num chunks) may have shrunk @@ -233,6 +348,7 @@ def index( chunks=chunk_batch, index_name=self.index_name, http_client=http_client, + multitenant=self.multitenant, executor=executor, ) @@ -267,9 +383,10 @@ def _update_chunk( # NOTE: using `httpx` here since `requests` doesn't support HTTP2. This is beneficient for # indexing / updates / deletes since we have to make a large volume of requests. + with ( concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor, - httpx.Client(http2=True) as http_client, + get_vespa_http_client() as http_client, ): for update_batch in batch_generator(updates, batch_size): future_to_document_id = { @@ -378,90 +495,89 @@ def update(self, update_requests: list[UpdateRequest]) -> None: time.monotonic() - update_start, ) - def update_single(self, update_request: UpdateRequest) -> None: + def update_single(self, doc_id: str, fields: VespaDocumentFields) -> int: """Note: if the document id does not exist, the update will be a no-op and the function will complete with no errors or exceptions. Handle other exceptions if you wish to implement retry behavior """ - if len(update_request.document_ids) != 1: - raise ValueError("update_request must contain a single document id") + + total_chunks_updated = 0 # Handle Vespa character limitations # Mutating update_request but it's not used later anyway - update_request.document_ids = [ - replace_invalid_doc_id_characters(doc_id) - for doc_id in update_request.document_ids - ] - - # update_start = time.monotonic() - - # Fetch all chunks for each document ahead of time - index_names = [self.index_name] - if self.secondary_index_name: - index_names.append(self.secondary_index_name) - - chunk_id_start_time = time.monotonic() - all_doc_chunk_ids: list[str] = [] - for index_name in index_names: - for document_id in update_request.document_ids: - # this calls vespa and can raise http exceptions - doc_chunk_ids = get_all_vespa_ids_for_document_id( - document_id=document_id, - index_name=index_name, - filters=None, - get_large_chunks=True, - ) - all_doc_chunk_ids.extend(doc_chunk_ids) - logger.debug( - f"Took {time.monotonic() - chunk_id_start_time:.2f} seconds to fetch all Vespa chunk IDs" - ) + normalized_doc_id = replace_invalid_doc_id_characters(doc_id) # Build the _VespaUpdateRequest objects update_dict: dict[str, dict] = {"fields": {}} - if update_request.boost is not None: - update_dict["fields"][BOOST] = {"assign": update_request.boost} - if update_request.document_sets is not None: + if fields.boost is not None: + update_dict["fields"][BOOST] = {"assign": fields.boost} + if fields.document_sets is not None: update_dict["fields"][DOCUMENT_SETS] = { - "assign": { - document_set: 1 for document_set in update_request.document_sets - } + "assign": {document_set: 1 for document_set in fields.document_sets} } - if update_request.access is not None: + if fields.access is not None: update_dict["fields"][ACCESS_CONTROL_LIST] = { - "assign": {acl_entry: 1 for acl_entry in update_request.access.to_acl()} + "assign": {acl_entry: 1 for acl_entry in fields.access.to_acl()} } - if update_request.hidden is not None: - update_dict["fields"][HIDDEN] = {"assign": update_request.hidden} + if fields.hidden is not None: + update_dict["fields"][HIDDEN] = {"assign": fields.hidden} if not update_dict["fields"]: logger.error("Update request received but nothing to update") - return + return 0 - processed_update_requests: list[_VespaUpdateRequest] = [] - for document_id in update_request.document_ids: - for doc_chunk_id in all_doc_chunk_ids: - processed_update_requests.append( - _VespaUpdateRequest( - document_id=document_id, - url=f"{DOCUMENT_ID_ENDPOINT.format(index_name=self.index_name)}/{doc_chunk_id}", - update_request=update_dict, - ) - ) + index_names = [self.index_name] + if self.secondary_index_name: + index_names.append(self.secondary_index_name) - with httpx.Client(http2=True) as http_client: - for update in processed_update_requests: - http_client.put( - update.url, - headers={"Content-Type": "application/json"}, - json=update.update_request, + with get_vespa_http_client() as http_client: + for index_name in index_names: + params = httpx.QueryParams( + { + "selection": f"{index_name}.document_id=='{normalized_doc_id}'", + "cluster": DOCUMENT_INDEX_NAME, + } ) - # logger.debug( - # "Finished updating Vespa documents in %.2f seconds", - # time.monotonic() - update_start, - # ) + while True: + try: + resp = http_client.put( + f"{DOCUMENT_ID_ENDPOINT.format(index_name=self.index_name)}", + params=params, + headers={"Content-Type": "application/json"}, + json=update_dict, + ) + + resp.raise_for_status() + except httpx.HTTPStatusError as e: + logger.error( + f"Failed to update chunks, details: {e.response.text}" + ) + raise - return + resp_data = resp.json() + + if "documentCount" in resp_data: + chunks_updated = resp_data["documentCount"] + total_chunks_updated += chunks_updated + + # Check for continuation token to handle pagination + if "continuation" not in resp_data: + break # Exit loop if no continuation token + + if not resp_data["continuation"]: + break # Exit loop if continuation token is empty + + params = params.set("continuation", resp_data["continuation"]) + + logger.debug( + f"VespaIndex.update_single: " + f"index={index_name} " + f"doc={normalized_doc_id} " + f"chunks_updated={total_chunks_updated}" + ) + + return total_chunks_updated def delete(self, doc_ids: list[str]) -> None: logger.info(f"Deleting {len(doc_ids)} documents from Vespa") @@ -470,7 +586,7 @@ def delete(self, doc_ids: list[str]) -> None: # NOTE: using `httpx` here since `requests` doesn't support HTTP2. This is beneficial for # indexing / updates / deletes since we have to make a large volume of requests. - with httpx.Client(http2=True) as http_client: + with get_vespa_http_client() as http_client: index_names = [self.index_name] if self.secondary_index_name: index_names.append(self.secondary_index_name) @@ -479,6 +595,70 @@ def delete(self, doc_ids: list[str]) -> None: delete_vespa_docs( document_ids=doc_ids, index_name=index_name, http_client=http_client ) + return + + def delete_single(self, doc_id: str) -> int: + """Possibly faster overall than the delete method due to using a single + delete call with a selection query.""" + + total_chunks_deleted = 0 + + # Vespa deletion is poorly documented ... luckily we found this + # https://docs.vespa.ai/en/operations/batch-delete.html#example + + doc_id = replace_invalid_doc_id_characters(doc_id) + + # NOTE: using `httpx` here since `requests` doesn't support HTTP2. This is beneficial for + # indexing / updates / deletes since we have to make a large volume of requests. + index_names = [self.index_name] + if self.secondary_index_name: + index_names.append(self.secondary_index_name) + + with get_vespa_http_client() as http_client: + for index_name in index_names: + params = httpx.QueryParams( + { + "selection": f"{index_name}.document_id=='{doc_id}'", + "cluster": DOCUMENT_INDEX_NAME, + } + ) + + while True: + try: + resp = http_client.delete( + f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}", + params=params, + ) + resp.raise_for_status() + except httpx.HTTPStatusError as e: + logger.error( + f"Failed to delete chunk, details: {e.response.text}" + ) + raise + + resp_data = resp.json() + + if "documentCount" in resp_data: + chunks_deleted = resp_data["documentCount"] + total_chunks_deleted += chunks_deleted + + # Check for continuation token to handle pagination + if "continuation" not in resp_data: + break # Exit loop if no continuation token + + if not resp_data["continuation"]: + break # Exit loop if continuation token is empty + + params = params.set("continuation", resp_data["continuation"]) + + logger.debug( + f"VespaIndex.delete_single: " + f"index={index_name} " + f"doc={doc_id} " + f"chunks_deleted={total_chunks_deleted}" + ) + + return total_chunks_deleted def delete_single(self, doc_id: str) -> None: """Possibly faster overall than the delete method due to using a single @@ -634,3 +814,158 @@ def admin_retrieval( } return query_vespa(params) + + @classmethod + def delete_entries_by_tenant_id(cls, tenant_id: str, index_name: str) -> None: + """ + Deletes all entries in the specified index with the given tenant_id. + + Parameters: + tenant_id (str): The tenant ID whose documents are to be deleted. + index_name (str): The name of the index from which to delete documents. + """ + logger.info( + f"Deleting entries with tenant_id: {tenant_id} from index: {index_name}" + ) + + # Step 1: Retrieve all document IDs with the given tenant_id + document_ids = cls._get_all_document_ids_by_tenant_id(tenant_id, index_name) + + if not document_ids: + logger.info( + f"No documents found with tenant_id: {tenant_id} in index: {index_name}" + ) + return + + # Step 2: Delete documents in batches + delete_requests = [ + _VespaDeleteRequest(document_id=doc_id, index_name=index_name) + for doc_id in document_ids + ] + + cls._apply_deletes_batched(delete_requests) + + @classmethod + def _get_all_document_ids_by_tenant_id( + cls, tenant_id: str, index_name: str + ) -> List[str]: + """ + Retrieves all document IDs with the specified tenant_id, handling pagination. + + Parameters: + tenant_id (str): The tenant ID to search for. + index_name (str): The name of the index to search in. + + Returns: + List[str]: A list of document IDs matching the tenant_id. + """ + offset = 0 + limit = 1000 # Vespa's maximum hits per query + document_ids = [] + + logger.debug( + f"Starting document ID retrieval for tenant_id: {tenant_id} in index: {index_name}" + ) + + while True: + # Construct the query to fetch document IDs + query_params = { + "yql": f'select id from sources * where tenant_id contains "{tenant_id}";', + "offset": str(offset), + "hits": str(limit), + "timeout": "10s", + "format": "json", + "summary": "id", + } + + url = f"{VESPA_APPLICATION_ENDPOINT}/search/" + + logger.debug( + f"Querying for document IDs with tenant_id: {tenant_id}, offset: {offset}" + ) + + with get_vespa_http_client(no_timeout=True) as http_client: + response = http_client.get(url, params=query_params) + response.raise_for_status() + + search_result = response.json() + hits = search_result.get("root", {}).get("children", []) + + if not hits: + break + + for hit in hits: + doc_id = hit.get("id") + if doc_id: + document_ids.append(doc_id) + + offset += limit # Move to the next page + + logger.debug( + f"Retrieved {len(document_ids)} document IDs for tenant_id: {tenant_id}" + ) + return document_ids + + @classmethod + def _apply_deletes_batched( + cls, + delete_requests: List["_VespaDeleteRequest"], + batch_size: int = BATCH_SIZE, + ) -> None: + """ + Deletes documents in batches using multiple threads. + + Parameters: + delete_requests (List[_VespaDeleteRequest]): The list of delete requests. + batch_size (int): The number of documents to delete in each batch. + """ + + def _delete_document( + delete_request: "_VespaDeleteRequest", http_client: httpx.Client + ) -> None: + logger.debug(f"Deleting document with ID {delete_request.document_id}") + response = http_client.delete( + delete_request.url, + headers={"Content-Type": "application/json"}, + ) + response.raise_for_status() + + logger.debug(f"Starting batch deletion for {len(delete_requests)} documents") + + with concurrent.futures.ThreadPoolExecutor(max_workers=NUM_THREADS) as executor: + with get_vespa_http_client(no_timeout=True) as http_client: + for batch_start in range(0, len(delete_requests), batch_size): + batch = delete_requests[batch_start : batch_start + batch_size] + + future_to_document_id = { + executor.submit( + _delete_document, + delete_request, + http_client, + ): delete_request.document_id + for delete_request in batch + } + + for future in concurrent.futures.as_completed( + future_to_document_id + ): + doc_id = future_to_document_id[future] + try: + future.result() + logger.debug(f"Successfully deleted document: {doc_id}") + except httpx.HTTPError as e: + logger.error(f"Failed to delete document {doc_id}: {e}") + # Optionally, implement retry logic or error handling here + + logger.info("Batch deletion completed") + + +class _VespaDeleteRequest: + def __init__(self, document_id: str, index_name: str) -> None: + self.document_id = document_id + # Encode the document ID to ensure it's safe for use in the URL + encoded_doc_id = urllib.parse.quote_plus(self.document_id) + self.url = ( + f"{VESPA_APPLICATION_ENDPOINT}/document/v1/" + f"{index_name}/{index_name}/docid/{encoded_doc_id}" + ) diff --git a/backend/danswer/document_index/vespa/indexing_utils.py b/backend/danswer/document_index/vespa/indexing_utils.py index 6b6ba8709d5..aafc6bf4efe 100644 --- a/backend/danswer/document_index/vespa/indexing_utils.py +++ b/backend/danswer/document_index/vespa/indexing_utils.py @@ -37,6 +37,7 @@ from danswer.document_index.vespa_constants import SKIP_TITLE_EMBEDDING from danswer.document_index.vespa_constants import SOURCE_LINKS from danswer.document_index.vespa_constants import SOURCE_TYPE +from danswer.document_index.vespa_constants import TENANT_ID from danswer.document_index.vespa_constants import TITLE from danswer.document_index.vespa_constants import TITLE_EMBEDDING from danswer.indexing.models import DocMetadataAwareIndexChunk @@ -56,7 +57,6 @@ def _does_document_exist( chunk. This checks for whether the chunk exists already in the index""" doc_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{doc_chunk_id}" doc_fetch_response = http_client.get(doc_url) - if doc_fetch_response.status_code == 404: return False @@ -65,6 +65,8 @@ def _does_document_exist( raise RuntimeError( f"Unexpected fetch document by ID value from Vespa " f"with error {doc_fetch_response.status_code}" + f"Index name: {index_name}" + f"Doc chunk id: {doc_chunk_id}" ) return True @@ -115,9 +117,12 @@ def get_existing_documents_from_chunks( return document_ids -@retry(tries=3, delay=1, backoff=2) +@retry(tries=5, delay=1, backoff=2) def _index_vespa_chunk( - chunk: DocMetadataAwareIndexChunk, index_name: str, http_client: httpx.Client + chunk: DocMetadataAwareIndexChunk, + index_name: str, + http_client: httpx.Client, + multitenant: bool, ) -> None: json_header = { "Content-Type": "application/json", @@ -174,6 +179,10 @@ def _index_vespa_chunk( BOOST: chunk.boost, } + if multitenant: + if chunk.tenant_id: + vespa_document_fields[TENANT_ID] = chunk.tenant_id + vespa_url = f"{DOCUMENT_ID_ENDPOINT.format(index_name=index_name)}/{vespa_chunk_id}" logger.debug(f'Indexing to URL "{vespa_url}"') res = http_client.post( @@ -192,6 +201,7 @@ def batch_index_vespa_chunks( chunks: list[DocMetadataAwareIndexChunk], index_name: str, http_client: httpx.Client, + multitenant: bool, executor: concurrent.futures.ThreadPoolExecutor | None = None, ) -> None: external_executor = True @@ -202,7 +212,9 @@ def batch_index_vespa_chunks( try: chunk_index_future = { - executor.submit(_index_vespa_chunk, chunk, index_name, http_client): chunk + executor.submit( + _index_vespa_chunk, chunk, index_name, http_client, multitenant + ): chunk for chunk in chunks } for future in concurrent.futures.as_completed(chunk_index_future): diff --git a/backend/danswer/document_index/vespa/shared_utils/utils.py b/backend/danswer/document_index/vespa/shared_utils/utils.py index c74afc9a629..49fdd680198 100644 --- a/backend/danswer/document_index/vespa/shared_utils/utils.py +++ b/backend/danswer/document_index/vespa/shared_utils/utils.py @@ -1,4 +1,12 @@ import re +from typing import cast + +import httpx + +from danswer.configs.app_configs import MANAGED_VESPA +from danswer.configs.app_configs import VESPA_CLOUD_CERT_PATH +from danswer.configs.app_configs import VESPA_CLOUD_KEY_PATH +from danswer.configs.app_configs import VESPA_REQUEST_TIMEOUT # NOTE: This does not seem to be used in reality despite the Vespa Docs pointing to this code # See here for reference: https://docs.vespa.ai/en/documents.html @@ -45,3 +53,19 @@ def remove_invalid_unicode_chars(text: str) -> str: "[\x00-\x08\x0b\x0c\x0e-\x1F\uD800-\uDFFF\uFFFE\uFFFF]" ) return _illegal_xml_chars_RE.sub("", text) + + +def get_vespa_http_client(no_timeout: bool = False) -> httpx.Client: + """ + Configure and return an HTTP client for communicating with Vespa, + including authentication if needed. + """ + + return httpx.Client( + cert=cast(tuple[str, str], (VESPA_CLOUD_CERT_PATH, VESPA_CLOUD_KEY_PATH)) + if MANAGED_VESPA + else None, + verify=False if not MANAGED_VESPA else True, + timeout=None if no_timeout else VESPA_REQUEST_TIMEOUT, + http2=True, + ) diff --git a/backend/danswer/document_index/vespa/shared_utils/vespa_request_builders.py b/backend/danswer/document_index/vespa/shared_utils/vespa_request_builders.py index 65752aa09c1..e7b778f1c84 100644 --- a/backend/danswer/document_index/vespa/shared_utils/vespa_request_builders.py +++ b/backend/danswer/document_index/vespa/shared_utils/vespa_request_builders.py @@ -12,6 +12,7 @@ from danswer.document_index.vespa_constants import HIDDEN from danswer.document_index.vespa_constants import METADATA_LIST from danswer.document_index.vespa_constants import SOURCE_TYPE +from danswer.document_index.vespa_constants import TENANT_ID from danswer.search.models import IndexFilters from danswer.utils.logger import setup_logger @@ -53,6 +54,9 @@ def _build_time_filter( filter_str = f"!({HIDDEN}=true) and " if not include_hidden else "" + if filters.tenant_id: + filter_str += f'({TENANT_ID} contains "{filters.tenant_id}") and ' + # CAREFUL touching this one, currently there is no second ACL double-check post retrieval if filters.access_control_list is not None: filter_str += _build_or_filters( diff --git a/backend/danswer/document_index/vespa_constants.py b/backend/danswer/document_index/vespa_constants.py index 8409efe1dea..30039922f1a 100644 --- a/backend/danswer/document_index/vespa_constants.py +++ b/backend/danswer/document_index/vespa_constants.py @@ -1,3 +1,4 @@ +from danswer.configs.app_configs import VESPA_CLOUD_URL from danswer.configs.app_configs import VESPA_CONFIG_SERVER_HOST from danswer.configs.app_configs import VESPA_HOST from danswer.configs.app_configs import VESPA_PORT @@ -9,17 +10,31 @@ DOCUMENT_REPLACEMENT_PAT = "DOCUMENT_REPLACEMENT" SEARCH_THREAD_NUMBER_PAT = "SEARCH_THREAD_NUMBER" DATE_REPLACEMENT = "DATE_REPLACEMENT" +SEARCH_THREAD_NUMBER_PAT = "SEARCH_THREAD_NUMBER" +TENANT_ID_PAT = "TENANT_ID_REPLACEMENT" +TENANT_ID_REPLACEMENT = """field tenant_id type string { + indexing: summary | attribute + rank: filter + attribute: fast-search + }""" # config server -VESPA_CONFIG_SERVER_URL = f"http://{VESPA_CONFIG_SERVER_HOST}:{VESPA_TENANT_PORT}" + + +VESPA_CONFIG_SERVER_URL = ( + VESPA_CLOUD_URL or f"http://{VESPA_CONFIG_SERVER_HOST}:{VESPA_TENANT_PORT}" +) VESPA_APPLICATION_ENDPOINT = f"{VESPA_CONFIG_SERVER_URL}/application/v2" # main search application -VESPA_APP_CONTAINER_URL = f"http://{VESPA_HOST}:{VESPA_PORT}" +VESPA_APP_CONTAINER_URL = VESPA_CLOUD_URL or f"http://{VESPA_HOST}:{VESPA_PORT}" + + # danswer_chunk below is defined in vespa/app_configs/schemas/danswer_chunk.sd DOCUMENT_ID_ENDPOINT = ( f"{VESPA_APP_CONTAINER_URL}/document/v1/default/{{index_name}}/docid" ) + SEARCH_ENDPOINT = f"{VESPA_APP_CONTAINER_URL}/search/" NUM_THREADS = ( @@ -35,7 +50,7 @@ VESPA_TIMEOUT = "3s" BATCH_SIZE = 128 # Specific to Vespa - +TENANT_ID = "tenant_id" DOCUMENT_ID = "document_id" CHUNK_ID = "chunk_id" BLURB = "blurb" diff --git a/backend/danswer/dynamic_configs/factory.py b/backend/danswer/dynamic_configs/factory.py deleted file mode 100644 index 44b6e096b6d..00000000000 --- a/backend/danswer/dynamic_configs/factory.py +++ /dev/null @@ -1,15 +0,0 @@ -from danswer.configs.app_configs import DYNAMIC_CONFIG_STORE -from danswer.dynamic_configs.interface import DynamicConfigStore -from danswer.dynamic_configs.store import FileSystemBackedDynamicConfigStore -from danswer.dynamic_configs.store import PostgresBackedDynamicConfigStore - - -def get_dynamic_config_store() -> DynamicConfigStore: - dynamic_config_store_type = DYNAMIC_CONFIG_STORE - if dynamic_config_store_type == FileSystemBackedDynamicConfigStore.__name__: - raise NotImplementedError("File based config store no longer supported") - if dynamic_config_store_type == PostgresBackedDynamicConfigStore.__name__: - return PostgresBackedDynamicConfigStore() - - # TODO: change exception type - raise Exception("Unknown dynamic config store type") diff --git a/backend/danswer/dynamic_configs/store.py b/backend/danswer/dynamic_configs/store.py deleted file mode 100644 index cc53da938ad..00000000000 --- a/backend/danswer/dynamic_configs/store.py +++ /dev/null @@ -1,102 +0,0 @@ -import json -import os -from collections.abc import Iterator -from contextlib import contextmanager -from pathlib import Path -from typing import cast - -from filelock import FileLock -from sqlalchemy.orm import Session - -from danswer.db.engine import get_session_factory -from danswer.db.models import KVStore -from danswer.dynamic_configs.interface import ConfigNotFoundError -from danswer.dynamic_configs.interface import DynamicConfigStore -from danswer.dynamic_configs.interface import JSON_ro - - -FILE_LOCK_TIMEOUT = 10 - - -def _get_file_lock(file_name: Path) -> FileLock: - return FileLock(file_name.with_suffix(".lock")) - - -class FileSystemBackedDynamicConfigStore(DynamicConfigStore): - def __init__(self, dir_path: str) -> None: - # TODO (chris): maybe require all possible keys to be passed in - # at app start somehow to prevent key overlaps - self.dir_path = Path(dir_path) - - def store(self, key: str, val: JSON_ro, encrypt: bool = False) -> None: - file_path = self.dir_path / key - lock = _get_file_lock(file_path) - with lock.acquire(timeout=FILE_LOCK_TIMEOUT): - with open(file_path, "w+") as f: - json.dump(val, f) - - def load(self, key: str) -> JSON_ro: - file_path = self.dir_path / key - if not file_path.exists(): - raise ConfigNotFoundError - lock = _get_file_lock(file_path) - with lock.acquire(timeout=FILE_LOCK_TIMEOUT): - with open(self.dir_path / key) as f: - return cast(JSON_ro, json.load(f)) - - def delete(self, key: str) -> None: - file_path = self.dir_path / key - if not file_path.exists(): - raise ConfigNotFoundError - lock = _get_file_lock(file_path) - with lock.acquire(timeout=FILE_LOCK_TIMEOUT): - os.remove(file_path) - - -class PostgresBackedDynamicConfigStore(DynamicConfigStore): - @contextmanager - def get_session(self) -> Iterator[Session]: - factory = get_session_factory() - session: Session = factory() - try: - yield session - finally: - session.close() - - def store(self, key: str, val: JSON_ro, encrypt: bool = False) -> None: - # The actual encryption/decryption is done in Postgres, we just need to choose - # which field to set - encrypted_val = val if encrypt else None - plain_val = val if not encrypt else None - with self.get_session() as session: - obj = session.query(KVStore).filter_by(key=key).first() - if obj: - obj.value = plain_val - obj.encrypted_value = encrypted_val - else: - obj = KVStore( - key=key, value=plain_val, encrypted_value=encrypted_val - ) # type: ignore - session.query(KVStore).filter_by(key=key).delete() # just in case - session.add(obj) - session.commit() - - def load(self, key: str) -> JSON_ro: - with self.get_session() as session: - obj = session.query(KVStore).filter_by(key=key).first() - if not obj: - raise ConfigNotFoundError - - if obj.value is not None: - return cast(JSON_ro, obj.value) - if obj.encrypted_value is not None: - return cast(JSON_ro, obj.encrypted_value) - - return None - - def delete(self, key: str) -> None: - with self.get_session() as session: - result = session.query(KVStore).filter_by(key=key).delete() # type: ignore - if result == 0: - raise ConfigNotFoundError - session.commit() diff --git a/backend/danswer/file_processing/extract_file_text.py b/backend/danswer/file_processing/extract_file_text.py index 36df08ac465..9effad5b4e0 100644 --- a/backend/danswer/file_processing/extract_file_text.py +++ b/backend/danswer/file_processing/extract_file_text.py @@ -20,6 +20,8 @@ from danswer.configs.constants import DANSWER_METADATA_FILENAME from danswer.file_processing.html_utils import parse_html_page_basic +from danswer.file_processing.unstructured import get_unstructured_api_key +from danswer.file_processing.unstructured import unstructured_to_text from danswer.utils.logger import setup_logger logger = setup_logger() @@ -206,8 +208,9 @@ def read_pdf_file( # By user request, keep files that are unreadable just so they # can be discoverable by title. return "", metadata - else: - logger.warning("No Password available to to decrypt pdf") + elif pdf_reader.is_encrypted: + logger.warning("No Password available to decrypt pdf, returning empty") + return "", metadata # Extract metadata from the PDF, removing leading '/' from keys if present # This standardizes the metadata keys for consistency @@ -331,9 +334,10 @@ def file_io_to_text(file: IO[Any]) -> str: def extract_file_text( - file_name: str | None, file: IO[Any], + file_name: str, break_on_unprocessable: bool = True, + extension: str | None = None, ) -> str: extension_to_function: dict[str, Callable[[IO[Any]], str]] = { ".pdf": pdf_to_text, @@ -345,22 +349,29 @@ def extract_file_text( ".html": parse_html_page_basic, } - def _process_file() -> str: - if file_name: - extension = get_file_ext(file_name) - if check_file_ext_is_valid(extension): - return extension_to_function.get(extension, file_io_to_text)(file) + try: + if get_unstructured_api_key(): + return unstructured_to_text(file, file_name) + + if file_name or extension: + if extension is not None: + final_extension = extension + elif file_name is not None: + final_extension = get_file_ext(file_name) - # Either the file somehow has no name or the extension is not one that we are familiar with + if check_file_ext_is_valid(final_extension): + return extension_to_function.get(final_extension, file_io_to_text)(file) + + # Either the file somehow has no name or the extension is not one that we recognize if is_text_file(file): return file_io_to_text(file) raise ValueError("Unknown file extension and unknown text encoding") - try: - return _process_file() except Exception as e: if break_on_unprocessable: - raise RuntimeError(f"Failed to process file: {str(e)}") from e - logger.warning(f"Failed to process file: {str(e)}") + raise RuntimeError( + f"Failed to process file {file_name or 'Unknown'}: {str(e)}" + ) from e + logger.warning(f"Failed to process file {file_name or 'Unknown'}: {str(e)}") return "" diff --git a/backend/danswer/file_processing/html_utils.py b/backend/danswer/file_processing/html_utils.py index 48782981f89..d1948d011f5 100644 --- a/backend/danswer/file_processing/html_utils.py +++ b/backend/danswer/file_processing/html_utils.py @@ -4,11 +4,17 @@ from typing import IO import bs4 +import trafilatura # type: ignore +from trafilatura.settings import use_config # type: ignore from danswer.configs.app_configs import HTML_BASED_CONNECTOR_TRANSFORM_LINKS_STRATEGY +from danswer.configs.app_configs import PARSE_WITH_TRAFILATURA from danswer.configs.app_configs import WEB_CONNECTOR_IGNORED_CLASSES from danswer.configs.app_configs import WEB_CONNECTOR_IGNORED_ELEMENTS from danswer.file_processing.enums import HtmlBasedConnectorTransformLinksStrategy +from danswer.utils.logger import setup_logger + +logger = setup_logger() MINTLIFY_UNWANTED = ["sticky", "hidden"] @@ -47,6 +53,18 @@ def format_element_text(element_text: str, link_href: str | None) -> str: return f"[{element_text_no_newlines}]({link_href})" +def parse_html_with_trafilatura(html_content: str) -> str: + """Parse HTML content using trafilatura.""" + config = use_config() + config.set("DEFAULT", "include_links", "True") + config.set("DEFAULT", "include_tables", "True") + config.set("DEFAULT", "include_images", "True") + config.set("DEFAULT", "include_formatting", "True") + + extracted_text = trafilatura.extract(html_content, config=config) + return strip_excessive_newlines_and_spaces(extracted_text) if extracted_text else "" + + def format_document_soup( document: bs4.BeautifulSoup, table_cell_separator: str = "\t" ) -> str: @@ -183,7 +201,21 @@ def web_html_cleanup( for undesired_tag in additional_element_types_to_discard: [tag.extract() for tag in soup.find_all(undesired_tag)] + soup_string = str(soup) + page_text = "" + + if PARSE_WITH_TRAFILATURA: + try: + page_text = parse_html_with_trafilatura(soup_string) + if not page_text: + raise ValueError("Empty content returned by trafilatura.") + except Exception as e: + logger.info(f"Trafilatura parsing failed: {e}. Falling back on bs4.") + page_text = format_document_soup(soup) + else: + page_text = format_document_soup(soup) + # 200B is ZeroWidthSpace which we don't care for - page_text = format_document_soup(soup).replace("\u200B", "") + cleaned_text = page_text.replace("\u200B", "") - return ParsedHTML(title=title, cleaned_text=page_text) + return ParsedHTML(title=title, cleaned_text=cleaned_text) diff --git a/backend/danswer/file_processing/unstructured.py b/backend/danswer/file_processing/unstructured.py new file mode 100644 index 00000000000..dc61869ee9c --- /dev/null +++ b/backend/danswer/file_processing/unstructured.py @@ -0,0 +1,67 @@ +from typing import Any +from typing import cast +from typing import IO + +from unstructured.staging.base import dict_to_elements +from unstructured_client import UnstructuredClient # type: ignore +from unstructured_client.models import operations # type: ignore +from unstructured_client.models import shared + +from danswer.configs.constants import KV_UNSTRUCTURED_API_KEY +from danswer.key_value_store.factory import get_kv_store +from danswer.key_value_store.interface import KvKeyNotFoundError +from danswer.utils.logger import setup_logger + + +logger = setup_logger() + + +def get_unstructured_api_key() -> str | None: + kv_store = get_kv_store() + try: + return cast(str, kv_store.load(KV_UNSTRUCTURED_API_KEY)) + except KvKeyNotFoundError: + return None + + +def update_unstructured_api_key(api_key: str) -> None: + kv_store = get_kv_store() + kv_store.store(KV_UNSTRUCTURED_API_KEY, api_key) + + +def delete_unstructured_api_key() -> None: + kv_store = get_kv_store() + kv_store.delete(KV_UNSTRUCTURED_API_KEY) + + +def _sdk_partition_request( + file: IO[Any], file_name: str, **kwargs: Any +) -> operations.PartitionRequest: + try: + request = operations.PartitionRequest( + partition_parameters=shared.PartitionParameters( + files=shared.Files(content=file.read(), file_name=file_name), + **kwargs, + ), + ) + return request + except Exception as e: + logger.error(f"Error creating partition request for file {file_name}: {str(e)}") + raise + + +def unstructured_to_text(file: IO[Any], file_name: str) -> str: + logger.debug(f"Starting to read file: {file_name}") + req = _sdk_partition_request(file, file_name, strategy="auto") + + unstructured_client = UnstructuredClient(api_key_auth=get_unstructured_api_key()) + + response = unstructured_client.general.partition(req) # type: ignore + elements = dict_to_elements(response.elements) + + if response.status_code != 200: + err = f"Received unexpected status code {response.status_code} from Unstructured API." + logger.error(err) + raise ValueError(err) + + return "\n\n".join(str(el) for el in elements) diff --git a/backend/danswer/file_store/models.py b/backend/danswer/file_store/models.py index d944a2fd270..5bf964287e3 100644 --- a/backend/danswer/file_store/models.py +++ b/backend/danswer/file_store/models.py @@ -13,6 +13,7 @@ class ChatFileType(str, Enum): DOC = "document" # Plain text only contain the text PLAIN_TEXT = "plain_text" + CSV = "csv" class FileDescriptor(TypedDict): diff --git a/backend/danswer/file_store/utils.py b/backend/danswer/file_store/utils.py index b71d20bbbb4..e9eea2c262d 100644 --- a/backend/danswer/file_store/utils.py +++ b/backend/danswer/file_store/utils.py @@ -8,12 +8,13 @@ from sqlalchemy.orm import Session from danswer.configs.constants import FileOrigin -from danswer.db.engine import get_session_context_manager +from danswer.db.engine import get_session_with_tenant from danswer.db.models import ChatMessage from danswer.file_store.file_store import get_default_file_store from danswer.file_store.models import FileDescriptor from danswer.file_store.models import InMemoryChatFile from danswer.utils.threadpool_concurrency import run_functions_tuples_in_parallel +from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR def load_chat_file( @@ -52,11 +53,11 @@ def load_all_chat_files( return files -def save_file_from_url(url: str) -> str: +def save_file_from_url(url: str, tenant_id: str) -> str: """NOTE: using multiple sessions here, since this is often called using multithreading. In practice, sharing a session has resulted in weird errors.""" - with get_session_context_manager() as db_session: + with get_session_with_tenant(tenant_id) as db_session: response = requests.get(url) response.raise_for_status() @@ -75,7 +76,10 @@ def save_file_from_url(url: str) -> str: def save_files_from_urls(urls: list[str]) -> list[str]: + tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get() + funcs: list[tuple[Callable[..., Any], tuple[Any, ...]]] = [ - (save_file_from_url, (url,)) for url in urls + (save_file_from_url, (url, tenant_id)) for url in urls ] + # Must pass in tenant_id here, since this is called by multithreading return run_functions_tuples_in_parallel(funcs) diff --git a/backend/danswer/indexing/chunker.py b/backend/danswer/indexing/chunker.py index a25cfc3d32b..57f05a66e1e 100644 --- a/backend/danswer/indexing/chunker.py +++ b/backend/danswer/indexing/chunker.py @@ -15,7 +15,7 @@ from danswer.natural_language_processing.utils import BaseTokenizer from danswer.utils.logger import setup_logger from danswer.utils.text_processing import shared_precompare_cleanup - +from shared_configs.configs import STRICT_CHUNK_TOKEN_LIMIT # Not supporting overlaps, we need a clean combination of chunks and it is unclear if overlaps # actually help quality at all @@ -27,6 +27,7 @@ MAX_METADATA_PERCENTAGE = 0.25 CHUNK_MIN_CONTENT = 256 + logger = setup_logger() @@ -157,6 +158,24 @@ def __init__( else None ) + def _split_oversized_chunk(self, text: str, content_token_limit: int) -> list[str]: + """ + Splits the text into smaller chunks based on token count to ensure + no chunk exceeds the content_token_limit. + """ + tokens = self.tokenizer.tokenize(text) + chunks = [] + start = 0 + total_tokens = len(tokens) + while start < total_tokens: + end = min(start + content_token_limit, total_tokens) + token_chunk = tokens[start:end] + # Join the tokens to reconstruct the text + chunk_text = " ".join(token_chunk) + chunks.append(chunk_text) + start = end + return chunks + def _extract_blurb(self, text: str) -> str: texts = self.blurb_splitter.split_text(text) if not texts: @@ -217,14 +236,42 @@ def _create_chunk( chunk_text = "" split_texts = self.chunk_splitter.split_text(section_text) + for i, split_text in enumerate(split_texts): - chunks.append( - _create_chunk( - text=split_text, - links={0: section_link_text}, - is_continuation=(i != 0), + split_token_count = len(self.tokenizer.tokenize(split_text)) + + if STRICT_CHUNK_TOKEN_LIMIT: + split_token_count = len(self.tokenizer.tokenize(split_text)) + if split_token_count > content_token_limit: + # Further split the oversized chunk + smaller_chunks = self._split_oversized_chunk( + split_text, content_token_limit + ) + for i, small_chunk in enumerate(smaller_chunks): + chunks.append( + _create_chunk( + text=small_chunk, + links={0: section_link_text}, + is_continuation=(i != 0), + ) + ) + else: + chunks.append( + _create_chunk( + text=split_text, + links={0: section_link_text}, + ) + ) + + else: + chunks.append( + _create_chunk( + text=split_text, + links={0: section_link_text}, + is_continuation=(i != 0), + ) ) - ) + continue current_token_count = len(self.tokenizer.tokenize(chunk_text)) diff --git a/backend/danswer/indexing/embedder.py b/backend/danswer/indexing/embedder.py index 259bebd3fd9..1c11a01b390 100644 --- a/backend/danswer/indexing/embedder.py +++ b/backend/danswer/indexing/embedder.py @@ -32,6 +32,8 @@ def __init__( provider_type: EmbeddingProvider | None, api_key: str | None, api_url: str | None, + api_version: str | None, + deployment_name: str | None, heartbeat: Heartbeat | None, ): self.model_name = model_name @@ -41,6 +43,8 @@ def __init__( self.provider_type = provider_type self.api_key = api_key self.api_url = api_url + self.api_version = api_version + self.deployment_name = deployment_name self.embedding_model = EmbeddingModel( model_name=model_name, @@ -50,6 +54,8 @@ def __init__( api_key=api_key, provider_type=provider_type, api_url=api_url, + api_version=api_version, + deployment_name=deployment_name, # The below are globally set, this flow always uses the indexing one server_host=INDEXING_MODEL_SERVER_HOST, server_port=INDEXING_MODEL_SERVER_PORT, @@ -75,6 +81,8 @@ def __init__( provider_type: EmbeddingProvider | None = None, api_key: str | None = None, api_url: str | None = None, + api_version: str | None = None, + deployment_name: str | None = None, heartbeat: Heartbeat | None = None, ): super().__init__( @@ -85,6 +93,8 @@ def __init__( provider_type, api_key, api_url, + api_version, + deployment_name, heartbeat, ) @@ -93,6 +103,9 @@ def embed_chunks( self, chunks: list[DocAwareChunk], ) -> list[IndexChunk]: + """Adds embeddings to the chunks, the title and metadata suffixes are added to the chunk as well + if they exist. If there is no space for it, it would have been thrown out at the chunking step. + """ # All chunks at this point must have some non-empty content flat_chunk_texts: list[str] = [] large_chunks_present = False @@ -111,6 +124,11 @@ def embed_chunks( flat_chunk_texts.append(chunk_text) if chunk.mini_chunk_texts: + if chunk.large_chunk_reference_ids: + # A large chunk does not contain mini chunks, if it matches the large chunk + # with a high score, then mini chunks would not be used anyway + # otherwise it should match the normal chunk + raise RuntimeError("Large chunk contains mini chunks") flat_chunk_texts.extend(chunk.mini_chunk_texts) embeddings = self.embedding_model.encode( @@ -193,5 +211,7 @@ def from_db_search_settings( provider_type=search_settings.provider_type, api_key=search_settings.api_key, api_url=search_settings.api_url, + api_version=search_settings.api_version, + deployment_name=search_settings.deployment_name, heartbeat=heartbeat, ) diff --git a/backend/danswer/indexing/indexing_pipeline.py b/backend/danswer/indexing/indexing_pipeline.py index 992bce2dccf..507956ff40f 100644 --- a/backend/danswer/indexing/indexing_pipeline.py +++ b/backend/danswer/indexing/indexing_pipeline.py @@ -137,6 +137,7 @@ def index_doc_batch_with_handler( attempt_id: int | None, db_session: Session, ignore_time_skip: bool = False, + tenant_id: str | None = None, ) -> tuple[int, int]: r = (0, 0) try: @@ -148,6 +149,7 @@ def index_doc_batch_with_handler( index_attempt_metadata=index_attempt_metadata, db_session=db_session, ignore_time_skip=ignore_time_skip, + tenant_id=tenant_id, ) except Exception as e: if INDEXING_EXCEPTION_LIMIT == 0: @@ -193,6 +195,8 @@ def index_doc_batch_prepare( db_session: Session, ignore_time_skip: bool = False, ) -> DocumentBatchPrepareContext | None: + """This sets up the documents in the relational DB (source of truth) for permissions, metadata, etc. + This preceeds indexing it into the actual document index.""" documents = [] for document in document_batch: empty_contents = not any(section.text.strip() for section in document.sections) @@ -261,6 +265,7 @@ def index_doc_batch( index_attempt_metadata: IndexAttemptMetadata, db_session: Session, ignore_time_skip: bool = False, + tenant_id: str | None = None, ) -> tuple[int, int]: """Takes different pieces of the indexing pipeline and applies it to a batch of documents Note that the documents should already be batched at this point so that it does not inflate the @@ -324,6 +329,7 @@ def index_doc_batch( if chunk.source_document.id in ctx.id_to_db_doc_map else DEFAULT_BOOST ), + tenant_id=tenant_id, ) for chunk in chunks_with_embeddings ] @@ -373,6 +379,7 @@ def build_indexing_pipeline( chunker: Chunker | None = None, ignore_time_skip: bool = False, attempt_id: int | None = None, + tenant_id: str | None = None, ) -> IndexingPipelineProtocol: """Builds a pipeline which takes in a list (batch) of docs and indexes them.""" search_settings = get_current_search_settings(db_session) @@ -416,4 +423,5 @@ def build_indexing_pipeline( ignore_time_skip=ignore_time_skip, attempt_id=attempt_id, db_session=db_session, + tenant_id=tenant_id, ) diff --git a/backend/danswer/indexing/models.py b/backend/danswer/indexing/models.py index c789a2b351b..39cfa2cca0c 100644 --- a/backend/danswer/indexing/models.py +++ b/backend/danswer/indexing/models.py @@ -75,6 +75,7 @@ class DocMetadataAwareIndexChunk(IndexChunk): negative -> ranked lower. """ + tenant_id: str | None = None access: "DocumentAccess" document_sets: set[str] boost: int @@ -86,6 +87,7 @@ def from_index_chunk( access: "DocumentAccess", document_sets: set[str], boost: int, + tenant_id: str | None, ) -> "DocMetadataAwareIndexChunk": index_chunk_data = index_chunk.model_dump() return cls( @@ -93,6 +95,7 @@ def from_index_chunk( access=access, document_sets=document_sets, boost=boost, + tenant_id=tenant_id, ) diff --git a/backend/danswer/key_value_store/__init__.py b/backend/danswer/key_value_store/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/backend/danswer/key_value_store/factory.py b/backend/danswer/key_value_store/factory.py new file mode 100644 index 00000000000..142e9031b77 --- /dev/null +++ b/backend/danswer/key_value_store/factory.py @@ -0,0 +1,8 @@ +from danswer.key_value_store.interface import KeyValueStore +from danswer.key_value_store.store import PgRedisKVStore + + +def get_kv_store() -> KeyValueStore: + # In the Multi Tenant case, the tenant context is picked up automatically, it does not need to be passed in + # It's read from the global thread level variable + return PgRedisKVStore() diff --git a/backend/danswer/dynamic_configs/interface.py b/backend/danswer/key_value_store/interface.py similarity index 71% rename from backend/danswer/dynamic_configs/interface.py rename to backend/danswer/key_value_store/interface.py index 999ad939615..aa815171f06 100644 --- a/backend/danswer/dynamic_configs/interface.py +++ b/backend/danswer/key_value_store/interface.py @@ -9,11 +9,13 @@ ) -class ConfigNotFoundError(Exception): +class KvKeyNotFoundError(Exception): pass -class DynamicConfigStore: +class KeyValueStore: + # In the Multi Tenant case, the tenant context is picked up automatically, it does not need to be passed in + # It's read from the global thread level variable @abc.abstractmethod def store(self, key: str, val: JSON_ro, encrypt: bool = False) -> None: raise NotImplementedError diff --git a/backend/danswer/key_value_store/store.py b/backend/danswer/key_value_store/store.py new file mode 100644 index 00000000000..8e60dab88b1 --- /dev/null +++ b/backend/danswer/key_value_store/store.py @@ -0,0 +1,120 @@ +import json +from collections.abc import Iterator +from contextlib import contextmanager +from typing import cast + +from fastapi import HTTPException +from redis.client import Redis +from sqlalchemy import text +from sqlalchemy.orm import Session + +from danswer.db.engine import get_sqlalchemy_engine +from danswer.db.engine import is_valid_schema_name +from danswer.db.models import KVStore +from danswer.key_value_store.interface import JSON_ro +from danswer.key_value_store.interface import KeyValueStore +from danswer.key_value_store.interface import KvKeyNotFoundError +from danswer.redis.redis_pool import get_redis_client +from danswer.utils.logger import setup_logger +from shared_configs.configs import MULTI_TENANT +from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA +from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR + +logger = setup_logger() + + +REDIS_KEY_PREFIX = "danswer_kv_store:" +KV_REDIS_KEY_EXPIRATION = 60 * 60 * 24 # 1 Day + + +class PgRedisKVStore(KeyValueStore): + def __init__( + self, redis_client: Redis | None = None, tenant_id: str | None = None + ) -> None: + # If no redis_client is provided, fall back to the context var + if redis_client is not None: + self.redis_client = redis_client + else: + tenant_id = tenant_id or CURRENT_TENANT_ID_CONTEXTVAR.get() + self.redis_client = get_redis_client(tenant_id=tenant_id) + + @contextmanager + def get_session(self) -> Iterator[Session]: + engine = get_sqlalchemy_engine() + with Session(engine, expire_on_commit=False) as session: + if MULTI_TENANT: + tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get() + if tenant_id == POSTGRES_DEFAULT_SCHEMA: + raise HTTPException( + status_code=401, detail="User must authenticate" + ) + if not is_valid_schema_name(tenant_id): + raise HTTPException(status_code=400, detail="Invalid tenant ID") + # Set the search_path to the tenant's schema + session.execute(text(f'SET search_path = "{tenant_id}"')) + yield session + + def store(self, key: str, val: JSON_ro, encrypt: bool = False) -> None: + # Not encrypted in Redis, but encrypted in Postgres + try: + self.redis_client.set( + REDIS_KEY_PREFIX + key, json.dumps(val), ex=KV_REDIS_KEY_EXPIRATION + ) + except Exception as e: + # Fallback gracefully to Postgres if Redis fails + logger.error(f"Failed to set value in Redis for key '{key}': {str(e)}") + + encrypted_val = val if encrypt else None + plain_val = val if not encrypt else None + with self.get_session() as session: + obj = session.query(KVStore).filter_by(key=key).first() + if obj: + obj.value = plain_val + obj.encrypted_value = encrypted_val + else: + obj = KVStore( + key=key, value=plain_val, encrypted_value=encrypted_val + ) # type: ignore + session.query(KVStore).filter_by(key=key).delete() # just in case + session.add(obj) + session.commit() + + def load(self, key: str) -> JSON_ro: + try: + redis_value = self.redis_client.get(REDIS_KEY_PREFIX + key) + if redis_value: + assert isinstance(redis_value, bytes) + return json.loads(redis_value.decode("utf-8")) + except Exception as e: + logger.error(f"Failed to get value from Redis for key '{key}': {str(e)}") + + with self.get_session() as session: + obj = session.query(KVStore).filter_by(key=key).first() + if not obj: + raise KvKeyNotFoundError + + if obj.value is not None: + value = obj.value + elif obj.encrypted_value is not None: + value = obj.encrypted_value + else: + value = None + + try: + self.redis_client.set(REDIS_KEY_PREFIX + key, json.dumps(value)) + except Exception as e: + logger.error(f"Failed to set value in Redis for key '{key}': {str(e)}") + + return cast(JSON_ro, value) + + def delete(self, key: str) -> None: + try: + self.redis_client.delete(REDIS_KEY_PREFIX + key) + except Exception as e: + logger.error(f"Failed to delete value from Redis for key '{key}': {str(e)}") + + with self.get_session() as session: + result = session.query(KVStore).filter_by(key=key).delete() # type: ignore + if result == 0: + raise KvKeyNotFoundError + session.commit() diff --git a/backend/danswer/llm/answering/answer.py b/backend/danswer/llm/answering/answer.py index 5cddadeb2db..c447db452ff 100644 --- a/backend/danswer/llm/answering/answer.py +++ b/backend/danswer/llm/answering/answer.py @@ -1,72 +1,44 @@ -import itertools from collections.abc import Callable from collections.abc import Iterator -from typing import Any -from typing import cast from uuid import uuid4 from langchain.schema.messages import BaseMessage from langchain_core.messages import AIMessageChunk -from langchain_core.messages import HumanMessage +from langchain_core.messages import ToolCall -from danswer.chat.chat_utils import llm_doc_from_inference_section from danswer.chat.models import AnswerQuestionPossibleReturn from danswer.chat.models import CitationInfo from danswer.chat.models import DanswerAnswerPiece -from danswer.chat.models import LlmDoc -from danswer.chat.models import StreamStopInfo -from danswer.chat.models import StreamStopReason -from danswer.configs.chat_configs import QA_PROMPT_OVERRIDE from danswer.file_store.utils import InMemoryChatFile +from danswer.llm.answering.llm_response_handler import LLMCall +from danswer.llm.answering.llm_response_handler import LLMResponseHandlerManager from danswer.llm.answering.models import AnswerStyleConfig from danswer.llm.answering.models import PreviousMessage from danswer.llm.answering.models import PromptConfig -from danswer.llm.answering.models import StreamProcessor from danswer.llm.answering.prompts.build import AnswerPromptBuilder from danswer.llm.answering.prompts.build import default_build_system_message from danswer.llm.answering.prompts.build import default_build_user_message -from danswer.llm.answering.prompts.citations_prompt import ( - build_citations_system_message, +from danswer.llm.answering.stream_processing.answer_response_handler import ( + AnswerResponseHandler, ) -from danswer.llm.answering.prompts.citations_prompt import build_citations_user_message -from danswer.llm.answering.prompts.quotes_prompt import build_quotes_user_message -from danswer.llm.answering.stream_processing.citation_processing import ( - build_citation_processor, +from danswer.llm.answering.stream_processing.answer_response_handler import ( + CitationResponseHandler, ) -from danswer.llm.answering.stream_processing.quotes_processing import ( - build_quotes_processor, +from danswer.llm.answering.stream_processing.answer_response_handler import ( + DummyAnswerResponseHandler, +) +from danswer.llm.answering.stream_processing.answer_response_handler import ( + QuotesResponseHandler, ) -from danswer.llm.answering.stream_processing.utils import DocumentIdOrderMapping from danswer.llm.answering.stream_processing.utils import map_document_id_order +from danswer.llm.answering.tool.tool_response_handler import ToolResponseHandler from danswer.llm.interfaces import LLM -from danswer.llm.interfaces import ToolChoiceOptions from danswer.natural_language_processing.utils import get_tokenizer -from danswer.tools.custom.custom_tool_prompt_builder import ( - build_user_message_for_custom_tool_for_non_tool_calling_llm, -) -from danswer.tools.force import filter_tools_for_force_tool_use from danswer.tools.force import ForceUseTool -from danswer.tools.images.image_generation_tool import IMAGE_GENERATION_RESPONSE_ID -from danswer.tools.images.image_generation_tool import ImageGenerationResponse -from danswer.tools.images.image_generation_tool import ImageGenerationTool -from danswer.tools.images.prompt import build_image_generation_user_prompt -from danswer.tools.internet_search.internet_search_tool import InternetSearchTool -from danswer.tools.message import build_tool_message -from danswer.tools.message import ToolCallSummary -from danswer.tools.search.search_tool import FINAL_CONTEXT_DOCUMENTS_ID -from danswer.tools.search.search_tool import SEARCH_DOC_CONTENT_ID -from danswer.tools.search.search_tool import SEARCH_RESPONSE_SUMMARY_ID -from danswer.tools.search.search_tool import SearchResponseSummary -from danswer.tools.search.search_tool import SearchTool +from danswer.tools.models import ToolResponse from danswer.tools.tool import Tool -from danswer.tools.tool import ToolResponse -from danswer.tools.tool_runner import ( - check_which_tools_should_run_for_non_tool_calling_llm, -) -from danswer.tools.tool_runner import ToolCallFinalResult +from danswer.tools.tool_implementations.search.search_tool import SearchTool from danswer.tools.tool_runner import ToolCallKickoff -from danswer.tools.tool_runner import ToolRunner -from danswer.tools.tool_selection import select_single_tool_for_non_tool_calling_llm from danswer.tools.utils import explicit_tool_calling_supported from danswer.utils.logger import setup_logger @@ -74,29 +46,9 @@ logger = setup_logger() -def _get_answer_stream_processor( - context_docs: list[LlmDoc], - doc_id_to_rank_map: DocumentIdOrderMapping, - answer_style_configs: AnswerStyleConfig, -) -> StreamProcessor: - if answer_style_configs.citation_config: - return build_citation_processor( - context_docs=context_docs, doc_id_to_rank_map=doc_id_to_rank_map - ) - if answer_style_configs.quotes_config: - return build_quotes_processor( - context_docs=context_docs, is_json_prompt=not (QA_PROMPT_OVERRIDE == "weak") - ) - - raise RuntimeError("Not implemented yet") - - AnswerStream = Iterator[AnswerQuestionPossibleReturn | ToolCallKickoff | ToolResponse] -logger = setup_logger() - - class Answer: def __init__( self, @@ -136,8 +88,6 @@ def __init__( self.tools = tools or [] self.force_use_tool = force_use_tool - self.skip_explicit_tool_calling = skip_explicit_tool_calling - self.message_history = message_history or [] # used for QA flow where we only want to send a single message self.single_message_history = single_message_history @@ -162,327 +112,141 @@ def __init__( self.skip_gen_ai_answer_generation = skip_gen_ai_answer_generation self._is_cancelled = False - def _update_prompt_builder_for_search_tool( - self, prompt_builder: AnswerPromptBuilder, final_context_documents: list[LlmDoc] - ) -> None: - if self.answer_style_config.citation_config: - prompt_builder.update_system_prompt( - build_citations_system_message(self.prompt_config) - ) - prompt_builder.update_user_prompt( - build_citations_user_message( - question=self.question, - prompt_config=self.prompt_config, - context_docs=final_context_documents, - files=self.latest_query_files, - all_doc_useful=( - self.answer_style_config.citation_config.all_docs_useful - if self.answer_style_config.citation_config - else False - ), - history_message=self.single_message_history or "", - ) - ) - elif self.answer_style_config.quotes_config: - prompt_builder.update_user_prompt( - build_quotes_user_message( - question=self.question, - context_docs=final_context_documents, - history_str=self.single_message_history or "", - prompt=self.prompt_config, - ) - ) - - def _raw_output_for_explicit_tool_calling_llms( - self, - ) -> Iterator[ - str | StreamStopInfo | ToolCallKickoff | ToolResponse | ToolCallFinalResult - ]: - prompt_builder = AnswerPromptBuilder(self.message_history, self.llm.config) - - tool_call_chunk: AIMessageChunk | None = None - if self.force_use_tool.force_use and self.force_use_tool.args is not None: - # if we are forcing a tool WITH args specified, we don't need to check which tools to run - # / need to generate the args - tool_call_chunk = AIMessageChunk( - content="", - ) - tool_call_chunk.tool_calls = [ - { - "name": self.force_use_tool.tool_name, - "args": self.force_use_tool.args, - "id": str(uuid4()), - } - ] - else: - # if tool calling is supported, first try the raw message - # to see if we don't need to use any tools - prompt_builder.update_system_prompt( - default_build_system_message(self.prompt_config) - ) - prompt_builder.update_user_prompt( - default_build_user_message( - self.question, self.prompt_config, self.latest_query_files - ) - ) - prompt = prompt_builder.build() - final_tool_definitions = [ - tool.tool_definition() - for tool in filter_tools_for_force_tool_use( - self.tools, self.force_use_tool - ) - ] - - for message in self.llm.stream( - prompt=prompt, - tools=final_tool_definitions if final_tool_definitions else None, - tool_choice="required" if self.force_use_tool.force_use else None, - ): - if isinstance(message, AIMessageChunk) and ( - message.tool_call_chunks or message.tool_calls - ): - if tool_call_chunk is None: - tool_call_chunk = message - else: - tool_call_chunk += message # type: ignore - else: - if message.content: - if self.is_cancelled: - return - yield cast(str, message.content) - if ( - message.additional_kwargs.get("usage_metadata", {}).get("stop") - == "length" - ): - yield StreamStopInfo( - stop_reason=StreamStopReason.CONTEXT_LENGTH - ) - - if not tool_call_chunk: - return # no tool call needed - - # if we have a tool call, we need to call the tool - tool_call_requests = tool_call_chunk.tool_calls - for tool_call_request in tool_call_requests: - known_tools_by_name = [ - tool for tool in self.tools if tool.name == tool_call_request["name"] - ] - - if not known_tools_by_name: - logger.error( - "Tool call requested with unknown name field. \n" - f"self.tools: {self.tools}" - f"tool_call_request: {tool_call_request}" - ) - if self.tools: - tool = self.tools[0] - else: - continue - else: - tool = known_tools_by_name[0] - tool_args = ( - self.force_use_tool.args - if self.force_use_tool.tool_name == tool.name - and self.force_use_tool.args - else tool_call_request["args"] - ) - - tool_runner = ToolRunner(tool, tool_args) - yield tool_runner.kickoff() - yield from tool_runner.tool_responses() - - tool_call_summary = ToolCallSummary( - tool_call_request=tool_call_chunk, - tool_call_result=build_tool_message( - tool_call_request, tool_runner.tool_message_content() - ), + self.using_tool_calling_llm = ( + explicit_tool_calling_supported( + self.llm.config.model_provider, self.llm.config.model_name ) + and not skip_explicit_tool_calling + ) - if tool.name in {SearchTool._NAME, InternetSearchTool._NAME}: - self._update_prompt_builder_for_search_tool(prompt_builder, []) - elif tool.name == ImageGenerationTool._NAME: - img_urls = [ - img_generation_result["url"] - for img_generation_result in tool_runner.tool_final_result().tool_result - ] - prompt_builder.update_user_prompt( - build_image_generation_user_prompt( - query=self.question, img_urls=img_urls - ) - ) - yield tool_runner.tool_final_result() + def _get_tools_list(self) -> list[Tool]: + if not self.force_use_tool.force_use: + return self.tools - prompt = prompt_builder.build(tool_call_summary=tool_call_summary) + tool = next( + (t for t in self.tools if t.name == self.force_use_tool.tool_name), None + ) + if tool is None: + raise RuntimeError(f"Tool '{self.force_use_tool.tool_name}' not found") - yield from self._process_llm_stream( - prompt=prompt, - # as of now, we don't support multiple tool calls in sequence, which is why - # we don't need to pass this in here - # tools=[tool.tool_definition() for tool in self.tools], + logger.info( + f"Forcefully using tool='{tool.name}'" + + ( + f" with args='{self.force_use_tool.args}'" + if self.force_use_tool.args is not None + else "" ) + ) + return [tool] - return + def _handle_specified_tool_call( + self, llm_calls: list[LLMCall], tool: Tool, tool_args: dict + ) -> AnswerStream: + current_llm_call = llm_calls[-1] - # This method processes the LLM stream and yields the content or stop information - def _process_llm_stream( - self, - prompt: Any, - tools: list[dict] | None = None, - tool_choice: ToolChoiceOptions | None = None, - ) -> Iterator[str | StreamStopInfo]: - for message in self.llm.stream( - prompt=prompt, tools=tools, tool_choice=tool_choice - ): - if isinstance(message, AIMessageChunk): - if message.content: - if self.is_cancelled: - return StreamStopInfo(stop_reason=StreamStopReason.CANCELLED) - yield cast(str, message.content) - - if ( - message.additional_kwargs.get("usage_metadata", {}).get("stop") - == "length" - ): - yield StreamStopInfo(stop_reason=StreamStopReason.CONTEXT_LENGTH) - - def _raw_output_for_non_explicit_tool_calling_llms( - self, - ) -> Iterator[ - str | StreamStopInfo | ToolCallKickoff | ToolResponse | ToolCallFinalResult - ]: - prompt_builder = AnswerPromptBuilder(self.message_history, self.llm.config) - chosen_tool_and_args: tuple[Tool, dict] | None = None - - if self.force_use_tool.force_use: - # if we are forcing a tool, we don't need to check which tools to run - tool = next( - iter( - [ - tool - for tool in self.tools - if tool.name == self.force_use_tool.tool_name - ] - ), - None, - ) - if not tool: - raise RuntimeError(f"Tool '{self.force_use_tool.tool_name}' not found") + # make a dummy tool handler + tool_handler = ToolResponseHandler([tool]) - tool_args = ( - self.force_use_tool.args - if self.force_use_tool.args is not None - else tool.get_args_for_non_tool_calling_llm( - query=self.question, - history=self.message_history, - llm=self.llm, - force_run=True, - ) - ) + dummy_tool_call_chunk = AIMessageChunk(content="") + dummy_tool_call_chunk.tool_calls = [ + ToolCall(name=tool.name, args=tool_args, id=str(uuid4())) + ] - if tool_args is None: - raise RuntimeError(f"Tool '{tool.name}' did not return args") + response_handler_manager = LLMResponseHandlerManager( + tool_handler, DummyAnswerResponseHandler(), self.is_cancelled + ) + yield from response_handler_manager.handle_llm_response( + iter([dummy_tool_call_chunk]) + ) - chosen_tool_and_args = (tool, tool_args) + new_llm_call = response_handler_manager.next_llm_call(current_llm_call) + if new_llm_call: + yield from self._get_response(llm_calls + [new_llm_call]) else: - tool_options = check_which_tools_should_run_for_non_tool_calling_llm( - tools=self.tools, - query=self.question, - history=self.message_history, - llm=self.llm, - ) + raise RuntimeError("Tool call handler did not return a new LLM call") - available_tools_and_args = [ - (self.tools[ind], args) - for ind, args in enumerate(tool_options) - if args is not None - ] + def _get_response(self, llm_calls: list[LLMCall]) -> AnswerStream: + current_llm_call = llm_calls[-1] - logger.info( - f"Selecting single tool from tools: {[(tool.name, args) for tool, args in available_tools_and_args]}" + # handle the case where no decision has to be made; we simply run the tool + if ( + current_llm_call.force_use_tool.force_use + and current_llm_call.force_use_tool.args is not None + ): + tool_name, tool_args = ( + current_llm_call.force_use_tool.tool_name, + current_llm_call.force_use_tool.args, ) - - chosen_tool_and_args = ( - select_single_tool_for_non_tool_calling_llm( - tools_and_args=available_tools_and_args, - history=self.message_history, - query=self.question, - llm=self.llm, - ) - if available_tools_and_args - else None + tool = next( + (t for t in current_llm_call.tools if t.name == tool_name), None ) + if not tool: + raise RuntimeError(f"Tool '{tool_name}' not found") - logger.notice(f"Chosen tool: {chosen_tool_and_args}") + yield from self._handle_specified_tool_call(llm_calls, tool, tool_args) + return - if not chosen_tool_and_args: - prompt_builder.update_system_prompt( - default_build_system_message(self.prompt_config) - ) - prompt_builder.update_user_prompt( - default_build_user_message( - self.question, self.prompt_config, self.latest_query_files + # special pre-logic for non-tool calling LLM case + if not self.using_tool_calling_llm and current_llm_call.tools: + chosen_tool_and_args = ( + ToolResponseHandler.get_tool_call_for_non_tool_calling_llm( + current_llm_call, self.llm ) ) - prompt = prompt_builder.build() - yield from self._process_llm_stream( - prompt=prompt, - tools=None, - ) + if chosen_tool_and_args: + tool, tool_args = chosen_tool_and_args + yield from self._handle_specified_tool_call(llm_calls, tool, tool_args) + return + + # if we're skipping gen ai answer generation, we should break + # out unless we're forcing a tool call. If we don't, we might generate an + # answer, which is a no-no! + if ( + self.skip_gen_ai_answer_generation + and not current_llm_call.force_use_tool.force_use + ): return - tool, tool_args = chosen_tool_and_args - tool_runner = ToolRunner(tool, tool_args) - yield tool_runner.kickoff() + # set up "handlers" to listen to the LLM response stream and + # feed back the processed results + handle tool call requests + # + figure out what the next LLM call should be + tool_call_handler = ToolResponseHandler(current_llm_call.tools) - if tool.name in {SearchTool._NAME, InternetSearchTool._NAME}: - final_context_documents = None - for response in tool_runner.tool_responses(): - if response.id == FINAL_CONTEXT_DOCUMENTS_ID: - final_context_documents = cast(list[LlmDoc], response.response) - yield response - - if final_context_documents is None: - raise RuntimeError( - f"{tool.name} did not return final context documents" - ) + search_result = SearchTool.get_search_result(current_llm_call) or [] - self._update_prompt_builder_for_search_tool( - prompt_builder, final_context_documents + answer_handler: AnswerResponseHandler + if self.answer_style_config.citation_config: + answer_handler = CitationResponseHandler( + context_docs=search_result, + doc_id_to_rank_map=map_document_id_order(search_result), ) - elif tool.name == ImageGenerationTool._NAME: - img_urls = [] - for response in tool_runner.tool_responses(): - if response.id == IMAGE_GENERATION_RESPONSE_ID: - img_generation_response = cast( - list[ImageGenerationResponse], response.response - ) - img_urls = [img.url for img in img_generation_response] - - yield response - - prompt_builder.update_user_prompt( - build_image_generation_user_prompt( - query=self.question, - img_urls=img_urls, - ) + elif self.answer_style_config.quotes_config: + answer_handler = QuotesResponseHandler( + context_docs=search_result, ) else: - prompt_builder.update_user_prompt( - HumanMessage( - content=build_user_message_for_custom_tool_for_non_tool_calling_llm( - self.question, - tool.name, - *tool_runner.tool_responses(), - ) - ) - ) - final = tool_runner.tool_final_result() + raise ValueError("No answer style config provided") - yield final + response_handler_manager = LLMResponseHandlerManager( + tool_call_handler, answer_handler, self.is_cancelled + ) - prompt = prompt_builder.build() + # DEBUG: good breakpoint + stream = self.llm.stream( + prompt=current_llm_call.prompt_builder.build(), + tools=[tool.tool_definition() for tool in current_llm_call.tools] or None, + tool_choice=( + "required" + if current_llm_call.tools and current_llm_call.force_use_tool.force_use + else None + ), + structured_response_format=self.answer_style_config.structured_response_format, + ) + yield from response_handler_manager.handle_llm_response(stream) - yield from self._process_llm_stream(prompt=prompt, tools=None) + new_llm_call = response_handler_manager.next_llm_call(current_llm_call) + if new_llm_call: + yield from self._get_response(llm_calls + [new_llm_call]) @property def processed_streamed_output(self) -> AnswerStream: @@ -490,94 +254,30 @@ def processed_streamed_output(self) -> AnswerStream: yield from self._processed_stream return - output_generator = ( - self._raw_output_for_explicit_tool_calling_llms() - if explicit_tool_calling_supported( - self.llm.config.model_provider, self.llm.config.model_name - ) - and not self.skip_explicit_tool_calling - else self._raw_output_for_non_explicit_tool_calling_llms() + prompt_builder = AnswerPromptBuilder( + user_message=default_build_user_message( + user_query=self.question, + prompt_config=self.prompt_config, + files=self.latest_query_files, + ), + message_history=self.message_history, + llm_config=self.llm.config, + single_message_history=self.single_message_history, + ) + prompt_builder.update_system_prompt( + default_build_system_message(self.prompt_config) + ) + llm_call = LLMCall( + prompt_builder=prompt_builder, + tools=self._get_tools_list(), + force_use_tool=self.force_use_tool, + files=self.latest_query_files, + tool_call_info=[], + using_tool_calling_llm=self.using_tool_calling_llm, ) - - def _process_stream( - stream: Iterator[ToolCallKickoff | ToolResponse | str | StreamStopInfo], - ) -> AnswerStream: - message = None - - # special things we need to keep track of for the SearchTool - # raw results that will be displayed to the user - search_results: list[LlmDoc] | None = None - # processed docs to feed into the LLM - final_context_docs: list[LlmDoc] | None = None - - for message in stream: - if isinstance(message, ToolCallKickoff) or isinstance( - message, ToolCallFinalResult - ): - yield message - elif isinstance(message, ToolResponse): - if message.id == SEARCH_RESPONSE_SUMMARY_ID: - # We don't need to run section merging in this flow, this variable is only used - # below to specify the ordering of the documents for the purpose of matching - # citations to the right search documents. The deduplication logic is more lightweight - # there and we don't need to do it twice - search_results = [ - llm_doc_from_inference_section(section) - for section in cast( - SearchResponseSummary, message.response - ).top_sections - ] - elif message.id == FINAL_CONTEXT_DOCUMENTS_ID: - final_context_docs = cast(list[LlmDoc], message.response) - yield message - - elif ( - message.id == SEARCH_DOC_CONTENT_ID - and not self._return_contexts - ): - continue - - yield message - else: - # assumes all tool responses will come first, then the final answer - break - - if not self.skip_gen_ai_answer_generation: - process_answer_stream_fn = _get_answer_stream_processor( - context_docs=final_context_docs or [], - # if doc selection is enabled, then search_results will be None, - # so we need to use the final_context_docs - doc_id_to_rank_map=map_document_id_order( - search_results or final_context_docs or [] - ), - answer_style_configs=self.answer_style_config, - ) - - stream_stop_info = None - - def _stream() -> Iterator[str]: - nonlocal stream_stop_info - for item in itertools.chain([message], stream): - if isinstance(item, StreamStopInfo): - stream_stop_info = item - return - - # this should never happen, but we're seeing weird behavior here so handling for now - if not isinstance(item, str): - logger.error( - f"Received non-string item in answer stream: {item}. Skipping." - ) - continue - - yield item - - yield from process_answer_stream_fn(_stream()) - - if stream_stop_info: - yield stream_stop_info processed_stream = [] - for processed_packet in _process_stream(output_generator): + for processed_packet in self._get_response([llm_call]): processed_stream.append(processed_packet) yield processed_packet @@ -601,7 +301,6 @@ def citations(self) -> list[CitationInfo]: return citations - @property def is_cancelled(self) -> bool: if self._is_cancelled: return True diff --git a/backend/danswer/llm/answering/llm_response_handler.py b/backend/danswer/llm/answering/llm_response_handler.py new file mode 100644 index 00000000000..f8426844244 --- /dev/null +++ b/backend/danswer/llm/answering/llm_response_handler.py @@ -0,0 +1,84 @@ +from collections.abc import Callable +from collections.abc import Generator +from collections.abc import Iterator +from typing import TYPE_CHECKING + +from langchain_core.messages import BaseMessage +from pydantic.v1 import BaseModel as BaseModel__v1 + +from danswer.chat.models import CitationInfo +from danswer.chat.models import DanswerAnswerPiece +from danswer.chat.models import DanswerQuotes +from danswer.chat.models import StreamStopInfo +from danswer.chat.models import StreamStopReason +from danswer.file_store.models import InMemoryChatFile +from danswer.llm.answering.prompts.build import AnswerPromptBuilder +from danswer.tools.force import ForceUseTool +from danswer.tools.models import ToolCallFinalResult +from danswer.tools.models import ToolCallKickoff +from danswer.tools.models import ToolResponse +from danswer.tools.tool import Tool + + +if TYPE_CHECKING: + from danswer.llm.answering.stream_processing.answer_response_handler import ( + AnswerResponseHandler, + ) + from danswer.llm.answering.tool.tool_response_handler import ToolResponseHandler + + +ResponsePart = ( + DanswerAnswerPiece + | CitationInfo + | DanswerQuotes + | ToolCallKickoff + | ToolResponse + | ToolCallFinalResult + | StreamStopInfo +) + + +class LLMCall(BaseModel__v1): + prompt_builder: AnswerPromptBuilder + tools: list[Tool] + force_use_tool: ForceUseTool + files: list[InMemoryChatFile] + tool_call_info: list[ToolCallKickoff | ToolResponse | ToolCallFinalResult] + using_tool_calling_llm: bool + + class Config: + arbitrary_types_allowed = True + + +class LLMResponseHandlerManager: + def __init__( + self, + tool_handler: "ToolResponseHandler", + answer_handler: "AnswerResponseHandler", + is_cancelled: Callable[[], bool], + ): + self.tool_handler = tool_handler + self.answer_handler = answer_handler + self.is_cancelled = is_cancelled + + def handle_llm_response( + self, + stream: Iterator[BaseMessage], + ) -> Generator[ResponsePart, None, None]: + all_messages: list[BaseMessage] = [] + for message in stream: + if self.is_cancelled(): + yield StreamStopInfo(stop_reason=StreamStopReason.CANCELLED) + return + # tool handler doesn't do anything until the full message is received + # NOTE: still need to run list() to get this to run + list(self.tool_handler.handle_response_part(message, all_messages)) + yield from self.answer_handler.handle_response_part(message, all_messages) + all_messages.append(message) + + # potentially give back all info on the selected tool call + its result + yield from self.tool_handler.handle_response_part(None, all_messages) + yield from self.answer_handler.handle_response_part(None, all_messages) + + def next_llm_call(self, llm_call: LLMCall) -> LLMCall | None: + return self.tool_handler.next_llm_call(llm_call) diff --git a/backend/danswer/llm/answering/models.py b/backend/danswer/llm/answering/models.py index fb5fa9c313e..03f72a0968c 100644 --- a/backend/danswer/llm/answering/models.py +++ b/backend/danswer/llm/answering/models.py @@ -33,7 +33,7 @@ class PreviousMessage(BaseModel): token_count: int message_type: MessageType files: list[InMemoryChatFile] - tool_calls: list[ToolCallFinalResult] + tool_call: ToolCallFinalResult | None @classmethod def from_chat_message( @@ -51,14 +51,13 @@ def from_chat_message( for file in available_files if str(file.file_id) in message_file_ids ], - tool_calls=[ - ToolCallFinalResult( - tool_name=tool_call.tool_name, - tool_args=tool_call.tool_arguments, - tool_result=tool_call.tool_result, - ) - for tool_call in chat_message.tool_calls - ], + tool_call=ToolCallFinalResult( + tool_name=chat_message.tool_call.tool_name, + tool_args=chat_message.tool_call.tool_arguments, + tool_result=chat_message.tool_call.tool_result, + ) + if chat_message.tool_call + else None, ) def to_langchain_msg(self) -> BaseMessage: @@ -116,6 +115,10 @@ class AnswerStyleConfig(BaseModel): document_pruning_config: DocumentPruningConfig = Field( default_factory=DocumentPruningConfig ) + # forces the LLM to return a structured response, see + # https://platform.openai.com/docs/guides/structured-outputs/introduction + # right now, only used by the simple chat API + structured_response_format: dict | None = None @model_validator(mode="after") def check_quotes_and_citation(self) -> "AnswerStyleConfig": diff --git a/backend/danswer/llm/answering/prompts/build.py b/backend/danswer/llm/answering/prompts/build.py index f53d4481f6e..b5b774f522d 100644 --- a/backend/danswer/llm/answering/prompts/build.py +++ b/backend/danswer/llm/answering/prompts/build.py @@ -12,12 +12,12 @@ from danswer.llm.interfaces import LLMConfig from danswer.llm.utils import build_content_with_imgs from danswer.llm.utils import check_message_tokens +from danswer.llm.utils import message_to_prompt_and_imgs from danswer.llm.utils import translate_history_to_basemessages from danswer.natural_language_processing.utils import get_tokenizer from danswer.prompts.chat_prompts import CHAT_USER_CONTEXT_FREE_PROMPT from danswer.prompts.prompt_utils import add_date_time_to_prompt from danswer.prompts.prompt_utils import drop_messages_history_overflow -from danswer.tools.message import ToolCallSummary def default_build_system_message( @@ -54,18 +54,14 @@ def default_build_user_message( class AnswerPromptBuilder: def __init__( - self, message_history: list[PreviousMessage], llm_config: LLMConfig + self, + user_message: HumanMessage, + message_history: list[PreviousMessage], + llm_config: LLMConfig, + single_message_history: str | None = None, ) -> None: self.max_tokens = compute_max_llm_input_tokens(llm_config) - ( - self.message_history, - self.history_token_cnts, - ) = translate_history_to_basemessages(message_history) - - self.system_message_and_token_cnt: tuple[SystemMessage, int] | None = None - self.user_message_and_token_cnt: tuple[HumanMessage, int] | None = None - llm_tokenizer = get_tokenizer( provider_type=llm_config.model_provider, model_name=llm_config.model_name, @@ -74,6 +70,24 @@ def __init__( Callable[[str], list[int]], llm_tokenizer.encode ) + self.raw_message_history = message_history + ( + self.message_history, + self.history_token_cnts, + ) = translate_history_to_basemessages(message_history) + + # for cases where like the QA flow where we want to condense the chat history + # into a single message rather than a sequence of User / Assistant messages + self.single_message_history = single_message_history + + self.system_message_and_token_cnt: tuple[SystemMessage, int] | None = None + self.user_message_and_token_cnt = ( + user_message, + check_message_tokens(user_message, self.llm_tokenizer_encode_func), + ) + + self.new_messages_and_token_cnts: list[tuple[BaseMessage, int]] = [] + def update_system_prompt(self, system_message: SystemMessage | None) -> None: if not system_message: self.system_message_and_token_cnt = None @@ -85,18 +99,21 @@ def update_system_prompt(self, system_message: SystemMessage | None) -> None: ) def update_user_prompt(self, user_message: HumanMessage) -> None: - if not user_message: - self.user_message_and_token_cnt = None - return - self.user_message_and_token_cnt = ( user_message, check_message_tokens(user_message, self.llm_tokenizer_encode_func), ) - def build( - self, tool_call_summary: ToolCallSummary | None = None - ) -> list[BaseMessage]: + def append_message(self, message: BaseMessage) -> None: + """Append a new message to the message history.""" + token_count = check_message_tokens(message, self.llm_tokenizer_encode_func) + self.new_messages_and_token_cnts.append((message, token_count)) + + def get_user_message_content(self) -> str: + query, _ = message_to_prompt_and_imgs(self.user_message_and_token_cnt[0]) + return query + + def build(self) -> list[BaseMessage]: if not self.user_message_and_token_cnt: raise ValueError("User message must be set before building prompt") @@ -113,25 +130,8 @@ def build( final_messages_with_tokens.append(self.user_message_and_token_cnt) - if tool_call_summary: - final_messages_with_tokens.append( - ( - tool_call_summary.tool_call_request, - check_message_tokens( - tool_call_summary.tool_call_request, - self.llm_tokenizer_encode_func, - ), - ) - ) - final_messages_with_tokens.append( - ( - tool_call_summary.tool_call_result, - check_message_tokens( - tool_call_summary.tool_call_result, - self.llm_tokenizer_encode_func, - ), - ) - ) + if self.new_messages_and_token_cnts: + final_messages_with_tokens.extend(self.new_messages_and_token_cnts) return drop_messages_history_overflow( final_messages_with_tokens, self.max_tokens diff --git a/backend/danswer/llm/answering/prompts/citations_prompt.py b/backend/danswer/llm/answering/prompts/citations_prompt.py index 52345f3e587..b7ca7797e88 100644 --- a/backend/danswer/llm/answering/prompts/citations_prompt.py +++ b/backend/danswer/llm/answering/prompts/citations_prompt.py @@ -6,7 +6,6 @@ from danswer.db.models import Persona from danswer.db.persona import get_default_prompt__read_only from danswer.db.search_settings import get_multilingual_expansion -from danswer.file_store.utils import InMemoryChatFile from danswer.llm.answering.models import PromptConfig from danswer.llm.factory import get_llms_for_persona from danswer.llm.factory import get_main_llm_from_tuple @@ -14,10 +13,12 @@ from danswer.llm.utils import build_content_with_imgs from danswer.llm.utils import check_number_of_tokens from danswer.llm.utils import get_max_input_tokens +from danswer.llm.utils import message_to_prompt_and_imgs from danswer.prompts.chat_prompts import REQUIRE_CITATION_STATEMENT from danswer.prompts.constants import DEFAULT_IGNORE_STATEMENT from danswer.prompts.direct_qa_prompts import CITATIONS_PROMPT from danswer.prompts.direct_qa_prompts import CITATIONS_PROMPT_FOR_TOOL_CALLING +from danswer.prompts.direct_qa_prompts import HISTORY_BLOCK from danswer.prompts.prompt_utils import add_date_time_to_prompt from danswer.prompts.prompt_utils import build_complete_context_str from danswer.prompts.prompt_utils import build_task_prompt_reminders @@ -131,10 +132,9 @@ def build_citations_system_message( def build_citations_user_message( - question: str, + message: HumanMessage, prompt_config: PromptConfig, context_docs: list[LlmDoc] | list[InferenceChunk], - files: list[InMemoryChatFile], all_doc_useful: bool, history_message: str = "", ) -> HumanMessage: @@ -143,6 +143,13 @@ def build_citations_user_message( prompt=prompt_config, use_language_hint=bool(multilingual_expansion) ) + history_block = ( + HISTORY_BLOCK.format(history_str=history_message) + "\n" + if history_message + else "" + ) + query, img_urls = message_to_prompt_and_imgs(message) + if context_docs: context_docs_str = build_complete_context_str(context_docs) optional_ignore = "" if all_doc_useful else DEFAULT_IGNORE_STATEMENT @@ -151,20 +158,22 @@ def build_citations_user_message( optional_ignore_statement=optional_ignore, context_docs_str=context_docs_str, task_prompt=task_prompt_with_reminder, - user_query=question, - history_block=history_message, + user_query=query, + history_block=history_block, ) else: # if no context docs provided, assume we're in the tool calling flow user_prompt = CITATIONS_PROMPT_FOR_TOOL_CALLING.format( task_prompt=task_prompt_with_reminder, - user_query=question, - history_block=history_message, + user_query=query, + history_block=history_block, ) user_prompt = user_prompt.strip() user_msg = HumanMessage( - content=build_content_with_imgs(user_prompt, files) if files else user_prompt + content=build_content_with_imgs(user_prompt, img_urls=img_urls) + if img_urls + else user_prompt ) return user_msg diff --git a/backend/danswer/llm/answering/prompts/quotes_prompt.py b/backend/danswer/llm/answering/prompts/quotes_prompt.py index 07abc4356b6..42f736b627d 100644 --- a/backend/danswer/llm/answering/prompts/quotes_prompt.py +++ b/backend/danswer/llm/answering/prompts/quotes_prompt.py @@ -5,6 +5,7 @@ from danswer.configs.chat_configs import QA_PROMPT_OVERRIDE from danswer.db.search_settings import get_multilingual_expansion from danswer.llm.answering.models import PromptConfig +from danswer.llm.utils import message_to_prompt_and_imgs from danswer.prompts.direct_qa_prompts import CONTEXT_BLOCK from danswer.prompts.direct_qa_prompts import HISTORY_BLOCK from danswer.prompts.direct_qa_prompts import JSON_PROMPT @@ -75,7 +76,7 @@ def _build_strong_llm_quotes_prompt( def build_quotes_user_message( - question: str, + message: HumanMessage, context_docs: list[LlmDoc] | list[InferenceChunk], history_str: str, prompt: PromptConfig, @@ -86,28 +87,10 @@ def build_quotes_user_message( else _build_strong_llm_quotes_prompt ) - return prompt_builder( - question=question, - context_docs=context_docs, - history_str=history_str, - prompt=prompt, - ) - - -def build_quotes_prompt( - question: str, - context_docs: list[LlmDoc] | list[InferenceChunk], - history_str: str, - prompt: PromptConfig, -) -> HumanMessage: - prompt_builder = ( - _build_weak_llm_quotes_prompt - if QA_PROMPT_OVERRIDE == "weak" - else _build_strong_llm_quotes_prompt - ) + query, _ = message_to_prompt_and_imgs(message) return prompt_builder( - question=question, + question=query, context_docs=context_docs, history_str=history_str, prompt=prompt, diff --git a/backend/danswer/llm/answering/prune_and_merge.py b/backend/danswer/llm/answering/prune_and_merge.py index 0193de1f2aa..690a5d2280d 100644 --- a/backend/danswer/llm/answering/prune_and_merge.py +++ b/backend/danswer/llm/answering/prune_and_merge.py @@ -19,7 +19,7 @@ from danswer.prompts.prompt_utils import build_doc_context_str from danswer.search.models import InferenceChunk from danswer.search.models import InferenceSection -from danswer.tools.search.search_utils import section_to_dict +from danswer.tools.tool_implementations.search.search_utils import section_to_dict from danswer.utils.logger import setup_logger diff --git a/backend/danswer/llm/answering/stream_processing/answer_response_handler.py b/backend/danswer/llm/answering/stream_processing/answer_response_handler.py new file mode 100644 index 00000000000..80a1446163b --- /dev/null +++ b/backend/danswer/llm/answering/stream_processing/answer_response_handler.py @@ -0,0 +1,91 @@ +import abc +from collections.abc import Generator + +from langchain_core.messages import BaseMessage + +from danswer.chat.models import CitationInfo +from danswer.chat.models import LlmDoc +from danswer.llm.answering.llm_response_handler import ResponsePart +from danswer.llm.answering.stream_processing.citation_processing import ( + CitationProcessor, +) +from danswer.llm.answering.stream_processing.quotes_processing import ( + QuotesProcessor, +) +from danswer.llm.answering.stream_processing.utils import DocumentIdOrderMapping + + +class AnswerResponseHandler(abc.ABC): + @abc.abstractmethod + def handle_response_part( + self, + response_item: BaseMessage | None, + previous_response_items: list[BaseMessage], + ) -> Generator[ResponsePart, None, None]: + raise NotImplementedError + + +class DummyAnswerResponseHandler(AnswerResponseHandler): + def handle_response_part( + self, + response_item: BaseMessage | None, + previous_response_items: list[BaseMessage], + ) -> Generator[ResponsePart, None, None]: + # This is a dummy handler that returns nothing + yield from [] + + +class CitationResponseHandler(AnswerResponseHandler): + def __init__( + self, context_docs: list[LlmDoc], doc_id_to_rank_map: DocumentIdOrderMapping + ): + self.context_docs = context_docs + self.doc_id_to_rank_map = doc_id_to_rank_map + self.citation_processor = CitationProcessor( + context_docs=self.context_docs, + doc_id_to_rank_map=self.doc_id_to_rank_map, + ) + self.processed_text = "" + self.citations: list[CitationInfo] = [] + + def handle_response_part( + self, + response_item: BaseMessage | None, + previous_response_items: list[BaseMessage], + ) -> Generator[ResponsePart, None, None]: + if response_item is None: + return + + content = ( + response_item.content if isinstance(response_item.content, str) else "" + ) + + # Process the new content through the citation processor + yield from self.citation_processor.process_token(content) + + +class QuotesResponseHandler(AnswerResponseHandler): + def __init__( + self, + context_docs: list[LlmDoc], + is_json_prompt: bool = True, + ): + self.quotes_processor = QuotesProcessor( + context_docs=context_docs, + is_json_prompt=is_json_prompt, + ) + + def handle_response_part( + self, + response_item: BaseMessage | None, + previous_response_items: list[BaseMessage], + ) -> Generator[ResponsePart, None, None]: + if response_item is None: + yield from self.quotes_processor.process_token(None) + return + + content = ( + response_item.content if isinstance(response_item.content, str) else "" + ) + + yield from self.quotes_processor.process_token(content) diff --git a/backend/danswer/llm/answering/stream_processing/citation_processing.py b/backend/danswer/llm/answering/stream_processing/citation_processing.py index f1e5489550d..950ad207878 100644 --- a/backend/danswer/llm/answering/stream_processing/citation_processing.py +++ b/backend/danswer/llm/answering/stream_processing/citation_processing.py @@ -1,12 +1,10 @@ import re -from collections.abc import Iterator +from collections.abc import Generator -from danswer.chat.models import AnswerQuestionStreamReturn from danswer.chat.models import CitationInfo from danswer.chat.models import DanswerAnswerPiece from danswer.chat.models import LlmDoc from danswer.configs.chat_configs import STOP_STREAM_PAT -from danswer.llm.answering.models import StreamProcessor from danswer.llm.answering.stream_processing.utils import DocumentIdOrderMapping from danswer.prompts.constants import TRIPLE_BACKTICK from danswer.utils.logger import setup_logger @@ -19,128 +17,104 @@ def in_code_block(llm_text: str) -> bool: return count % 2 != 0 -def extract_citations_from_stream( - tokens: Iterator[str], - context_docs: list[LlmDoc], - doc_id_to_rank_map: DocumentIdOrderMapping, - stop_stream: str | None = STOP_STREAM_PAT, -) -> Iterator[DanswerAnswerPiece | CitationInfo]: - """ - Key aspects: - - 1. Stream Processing: - - Processes tokens one by one, allowing for real-time handling of large texts. - - 2. Citation Detection: - - Uses regex to find citations in the format [number]. - - Example: [1], [2], etc. - - 3. Citation Mapping: - - Maps detected citation numbers to actual document ranks using doc_id_to_rank_map. - - Example: [1] might become [3] if doc_id_to_rank_map maps it to 3. - - 4. Citation Formatting: - - Replaces citations with properly formatted versions. - - Adds links if available: [[1]](https://example.com) - - Handles cases where links are not available: [[1]]() - - 5. Duplicate Handling: - - Skips consecutive citations of the same document to avoid redundancy. - - 6. Output Generation: - - Yields DanswerAnswerPiece objects for regular text. - - Yields CitationInfo objects for each unique citation encountered. - - 7. Context Awareness: - - Uses context_docs to access document information for citations. - - This function effectively processes a stream of text, identifies and reformats citations, - and provides both the processed text and citation information as output. - """ - order_mapping = doc_id_to_rank_map.order_mapping - llm_out = "" - max_citation_num = len(context_docs) - citation_order = [] - curr_segment = "" - cited_inds = set() - hold = "" - - raw_out = "" - current_citations: list[int] = [] - past_cite_count = 0 - for raw_token in tokens: - raw_out += raw_token - if stop_stream: - next_hold = hold + raw_token - if stop_stream in next_hold: - break - if next_hold == stop_stream[: len(next_hold)]: - hold = next_hold - continue +class CitationProcessor: + def __init__( + self, + context_docs: list[LlmDoc], + doc_id_to_rank_map: DocumentIdOrderMapping, + stop_stream: str | None = STOP_STREAM_PAT, + ): + self.context_docs = context_docs + self.doc_id_to_rank_map = doc_id_to_rank_map + self.stop_stream = stop_stream + self.order_mapping = doc_id_to_rank_map.order_mapping + self.llm_out = "" + self.max_citation_num = len(context_docs) + self.citation_order: list[int] = [] + self.curr_segment = "" + self.cited_inds: set[int] = set() + self.hold = "" + self.current_citations: list[int] = [] + self.past_cite_count = 0 + + def process_token( + self, token: str | None + ) -> Generator[DanswerAnswerPiece | CitationInfo, None, None]: + # None -> end of stream + if token is None: + yield DanswerAnswerPiece(answer_piece=self.curr_segment) + return + + if self.stop_stream: + next_hold = self.hold + token + if self.stop_stream in next_hold: + return + if next_hold == self.stop_stream[: len(next_hold)]: + self.hold = next_hold + return token = next_hold - hold = "" - else: - token = raw_token + self.hold = "" - curr_segment += token - llm_out += token + self.curr_segment += token + self.llm_out += token # Handle code blocks without language tags - if "`" in curr_segment: - if curr_segment.endswith("`"): - continue - elif "```" in curr_segment: - piece_that_comes_after = curr_segment.split("```")[1][0] - if piece_that_comes_after == "\n" and in_code_block(llm_out): - curr_segment = curr_segment.replace("```", "```plaintext") + if "`" in self.curr_segment: + if self.curr_segment.endswith("`"): + return + elif "```" in self.curr_segment: + piece_that_comes_after = self.curr_segment.split("```")[1][0] + if piece_that_comes_after == "\n" and in_code_block(self.llm_out): + self.curr_segment = self.curr_segment.replace("```", "```plaintext") citation_pattern = r"\[(\d+)\]" - - citations_found = list(re.finditer(citation_pattern, curr_segment)) + citations_found = list(re.finditer(citation_pattern, self.curr_segment)) possible_citation_pattern = r"(\[\d*$)" # [1, [, etc - possible_citation_found = re.search(possible_citation_pattern, curr_segment) + possible_citation_found = re.search( + possible_citation_pattern, self.curr_segment + ) - # `past_cite_count`: number of characters since past citation - # 5 to ensure a citation hasn't occured - if len(citations_found) == 0 and len(llm_out) - past_cite_count > 5: - current_citations = [] + if len(citations_found) == 0 and len(self.llm_out) - self.past_cite_count > 5: + self.current_citations = [] - if citations_found and not in_code_block(llm_out): + result = "" # Initialize result here + if citations_found and not in_code_block(self.llm_out): last_citation_end = 0 length_to_add = 0 while len(citations_found) > 0: citation = citations_found.pop(0) numerical_value = int(citation.group(1)) - if 1 <= numerical_value <= max_citation_num: - context_llm_doc = context_docs[numerical_value - 1] - real_citation_num = order_mapping[context_llm_doc.document_id] + if 1 <= numerical_value <= self.max_citation_num: + context_llm_doc = self.context_docs[numerical_value - 1] + real_citation_num = self.order_mapping[context_llm_doc.document_id] - if real_citation_num not in citation_order: - citation_order.append(real_citation_num) + if real_citation_num not in self.citation_order: + self.citation_order.append(real_citation_num) - target_citation_num = citation_order.index(real_citation_num) + 1 + target_citation_num = ( + self.citation_order.index(real_citation_num) + 1 + ) # Skip consecutive citations of the same work - if target_citation_num in current_citations: + if target_citation_num in self.current_citations: start, end = citation.span() real_start = length_to_add + start diff = end - start - curr_segment = ( - curr_segment[: length_to_add + start] - + curr_segment[real_start + diff :] + self.curr_segment = ( + self.curr_segment[: length_to_add + start] + + self.curr_segment[real_start + diff :] ) length_to_add -= diff continue # Handle edge case where LLM outputs citation itself - # by allowing it to generate citations on its own. - if curr_segment.startswith("[["): - match = re.match(r"\[\[(\d+)\]\]", curr_segment) + if self.curr_segment.startswith("[["): + match = re.match(r"\[\[(\d+)\]\]", self.curr_segment) if match: try: doc_id = int(match.group(1)) - context_llm_doc = context_docs[doc_id - 1] + context_llm_doc = self.context_docs[doc_id - 1] yield CitationInfo( citation_num=target_citation_num, document_id=context_llm_doc.document_id, @@ -150,75 +124,57 @@ def extract_citations_from_stream( f"Manual LLM citation didn't properly cite documents {e}" ) else: - # Will continue attempt on next loops logger.warning( "Manual LLM citation wasn't able to close brackets" ) - continue link = context_llm_doc.link # Replace the citation in the current segment start, end = citation.span() - curr_segment = ( - curr_segment[: start + length_to_add] + self.curr_segment = ( + self.curr_segment[: start + length_to_add] + f"[{target_citation_num}]" - + curr_segment[end + length_to_add :] + + self.curr_segment[end + length_to_add :] ) - past_cite_count = len(llm_out) - current_citations.append(target_citation_num) + self.past_cite_count = len(self.llm_out) + self.current_citations.append(target_citation_num) - if target_citation_num not in cited_inds: - cited_inds.add(target_citation_num) + if target_citation_num not in self.cited_inds: + self.cited_inds.add(target_citation_num) yield CitationInfo( citation_num=target_citation_num, document_id=context_llm_doc.document_id, ) if link: - prev_length = len(curr_segment) - curr_segment = ( - curr_segment[: start + length_to_add] + prev_length = len(self.curr_segment) + self.curr_segment = ( + self.curr_segment[: start + length_to_add] + f"[[{target_citation_num}]]({link})" - + curr_segment[end + length_to_add :] + + self.curr_segment[end + length_to_add :] ) - length_to_add += len(curr_segment) - prev_length - + length_to_add += len(self.curr_segment) - prev_length else: - prev_length = len(curr_segment) - curr_segment = ( - curr_segment[: start + length_to_add] + prev_length = len(self.curr_segment) + self.curr_segment = ( + self.curr_segment[: start + length_to_add] + f"[[{target_citation_num}]]()" - + curr_segment[end + length_to_add :] + + self.curr_segment[end + length_to_add :] ) - length_to_add += len(curr_segment) - prev_length + length_to_add += len(self.curr_segment) - prev_length last_citation_end = end + length_to_add if last_citation_end > 0: - yield DanswerAnswerPiece(answer_piece=curr_segment[:last_citation_end]) - curr_segment = curr_segment[last_citation_end:] - if possible_citation_found: - continue - yield DanswerAnswerPiece(answer_piece=curr_segment) - curr_segment = "" - - if curr_segment: - yield DanswerAnswerPiece(answer_piece=curr_segment) - - -def build_citation_processor( - context_docs: list[LlmDoc], doc_id_to_rank_map: DocumentIdOrderMapping -) -> StreamProcessor: - def stream_processor( - tokens: Iterator[str], - ) -> AnswerQuestionStreamReturn: - yield from extract_citations_from_stream( - tokens=tokens, - context_docs=context_docs, - doc_id_to_rank_map=doc_id_to_rank_map, - ) + result += self.curr_segment[:last_citation_end] + self.curr_segment = self.curr_segment[last_citation_end:] + + if not possible_citation_found: + result += self.curr_segment + self.curr_segment = "" - return stream_processor + if result: + yield DanswerAnswerPiece(answer_piece=result) diff --git a/backend/danswer/llm/answering/stream_processing/quotes_processing.py b/backend/danswer/llm/answering/stream_processing/quotes_processing.py index 501a56b5aa7..33913133164 100644 --- a/backend/danswer/llm/answering/stream_processing/quotes_processing.py +++ b/backend/danswer/llm/answering/stream_processing/quotes_processing.py @@ -1,14 +1,11 @@ import math import re -from collections.abc import Callable from collections.abc import Generator -from collections.abc import Iterator from json import JSONDecodeError from typing import Optional import regex -from danswer.chat.models import AnswerQuestionStreamReturn from danswer.chat.models import DanswerAnswer from danswer.chat.models import DanswerAnswerPiece from danswer.chat.models import DanswerQuote @@ -157,7 +154,7 @@ def separate_answer_quotes( return _extract_answer_quotes_freeform(clean_up_code_blocks(answer_raw)) -def process_answer( +def _process_answer( answer_raw: str, docs: list[LlmDoc], is_json_prompt: bool = True, @@ -195,7 +192,7 @@ def _stream_json_answer_end(answer_so_far: str, next_token: str) -> bool: def _extract_quotes_from_completed_token_stream( model_output: str, context_docs: list[LlmDoc], is_json_prompt: bool = True ) -> DanswerQuotes: - answer, quotes = process_answer(model_output, context_docs, is_json_prompt) + answer, quotes = _process_answer(model_output, context_docs, is_json_prompt) if answer: logger.notice(answer) elif model_output: @@ -204,94 +201,101 @@ def _extract_quotes_from_completed_token_stream( return quotes -def process_model_tokens( - tokens: Iterator[str], - context_docs: list[LlmDoc], - is_json_prompt: bool = True, -) -> Generator[DanswerAnswerPiece | DanswerQuotes, None, None]: - """Used in the streaming case to process the model output - into an Answer and Quotes - - Yields Answer tokens back out in a dict for streaming to frontend - When Answer section ends, yields dict with answer_finished key - Collects all the tokens at the end to form the complete model output""" - quote_pat = f"\n{QUOTE_PAT}" - # Sometimes worse model outputs new line instead of : - quote_loose = f"\n{quote_pat[:-1]}\n" - # Sometime model outputs two newlines before quote section - quote_pat_full = f"\n{quote_pat}" - model_output: str = "" - found_answer_start = False if is_json_prompt else True - found_answer_end = False - hold_quote = "" - - for token in tokens: - model_previous = model_output - model_output += token - - if not found_answer_start: - m = answer_pattern.search(model_output) +class QuotesProcessor: + def __init__( + self, + context_docs: list[LlmDoc], + is_json_prompt: bool = True, + ): + self.context_docs = context_docs + self.is_json_prompt = is_json_prompt + + self.found_answer_start = False if is_json_prompt else True + self.found_answer_end = False + self.hold_quote = "" + self.model_output = "" + self.hold = "" + + def process_token( + self, token: str | None + ) -> Generator[DanswerAnswerPiece | DanswerQuotes, None, None]: + # None -> end of stream + if token is None: + if self.model_output: + yield _extract_quotes_from_completed_token_stream( + model_output=self.model_output, + context_docs=self.context_docs, + is_json_prompt=self.is_json_prompt, + ) + return + + model_previous = self.model_output + self.model_output += token + + if not self.found_answer_start: + m = answer_pattern.search(self.model_output) if m: - found_answer_start = True + self.found_answer_start = True - # Prevent heavy cases of hallucinations where model is never providing a JSON - # We want to quickly update the user - not stream forever - if is_json_prompt and len(model_output) > 70: + # Prevent heavy cases of hallucinations + if self.is_json_prompt and len(self.model_output) > 70: logger.warning("LLM did not produce json as prompted") - found_answer_end = True - continue - - remaining = model_output[m.end() :] + self.found_answer_end = True + return + + remaining = self.model_output[m.end() :] + + # Look for an unescaped quote, which means the answer is entirely contained + # in this token e.g. if the token is `{"answer": "blah", "qu` + quote_indices = [i for i, char in enumerate(remaining) if char == '"'] + for quote_idx in quote_indices: + # Check if quote is escaped by counting backslashes before it + num_backslashes = 0 + pos = quote_idx - 1 + while pos >= 0 and remaining[pos] == "\\": + num_backslashes += 1 + pos -= 1 + # If even number of backslashes, quote is not escaped + if num_backslashes % 2 == 0: + yield DanswerAnswerPiece(answer_piece=remaining[:quote_idx]) + return + + # If no unescaped quote found, yield the remaining string if len(remaining) > 0: yield DanswerAnswerPiece(answer_piece=remaining) - continue + return - if found_answer_start and not found_answer_end: - if is_json_prompt and _stream_json_answer_end(model_previous, token): - found_answer_end = True + if self.found_answer_start and not self.found_answer_end: + if self.is_json_prompt and _stream_json_answer_end(model_previous, token): + self.found_answer_end = True - # return the remaining part of the answer e.g. token might be 'd.", ' and we should yield 'd.' if token: try: answer_token_section = token.index('"') yield DanswerAnswerPiece( - answer_piece=hold_quote + token[:answer_token_section] + answer_piece=self.hold_quote + token[:answer_token_section] ) except ValueError: logger.error("Quotation mark not found in token") - yield DanswerAnswerPiece(answer_piece=hold_quote + token) + yield DanswerAnswerPiece(answer_piece=self.hold_quote + token) yield DanswerAnswerPiece(answer_piece=None) - continue - elif not is_json_prompt: - if quote_pat in hold_quote + token or quote_loose in hold_quote + token: - found_answer_end = True + return + + elif not self.is_json_prompt: + quote_pat = f"\n{QUOTE_PAT}" + quote_loose = f"\n{quote_pat[:-1]}\n" + quote_pat_full = f"\n{quote_pat}" + + if ( + quote_pat in self.hold_quote + token + or quote_loose in self.hold_quote + token + ): + self.found_answer_end = True yield DanswerAnswerPiece(answer_piece=None) - continue - if hold_quote + token in quote_pat_full: - hold_quote += token - continue - yield DanswerAnswerPiece(answer_piece=hold_quote + token) - hold_quote = "" - - logger.debug(f"Raw Model QnA Output: {model_output}") - - yield _extract_quotes_from_completed_token_stream( - model_output=model_output, - context_docs=context_docs, - is_json_prompt=is_json_prompt, - ) - - -def build_quotes_processor( - context_docs: list[LlmDoc], is_json_prompt: bool -) -> Callable[[Iterator[str]], AnswerQuestionStreamReturn]: - def stream_processor( - tokens: Iterator[str], - ) -> AnswerQuestionStreamReturn: - yield from process_model_tokens( - tokens=tokens, - context_docs=context_docs, - is_json_prompt=is_json_prompt, - ) - - return stream_processor + return + if self.hold_quote + token in quote_pat_full: + self.hold_quote += token + return + + yield DanswerAnswerPiece(answer_piece=self.hold_quote + token) + self.hold_quote = "" diff --git a/backend/danswer/llm/answering/tool/tool_response_handler.py b/backend/danswer/llm/answering/tool/tool_response_handler.py new file mode 100644 index 00000000000..08e7284f790 --- /dev/null +++ b/backend/danswer/llm/answering/tool/tool_response_handler.py @@ -0,0 +1,207 @@ +from collections.abc import Generator + +from langchain_core.messages import AIMessageChunk +from langchain_core.messages import BaseMessage +from langchain_core.messages import ToolCall + +from danswer.llm.answering.llm_response_handler import LLMCall +from danswer.llm.answering.llm_response_handler import ResponsePart +from danswer.llm.interfaces import LLM +from danswer.tools.force import ForceUseTool +from danswer.tools.message import build_tool_message +from danswer.tools.message import ToolCallSummary +from danswer.tools.models import ToolCallFinalResult +from danswer.tools.models import ToolCallKickoff +from danswer.tools.models import ToolResponse +from danswer.tools.tool import Tool +from danswer.tools.tool_runner import ( + check_which_tools_should_run_for_non_tool_calling_llm, +) +from danswer.tools.tool_runner import ToolRunner +from danswer.tools.tool_selection import select_single_tool_for_non_tool_calling_llm +from danswer.utils.logger import setup_logger + + +logger = setup_logger() + + +class ToolResponseHandler: + def __init__(self, tools: list[Tool]): + self.tools = tools + + self.tool_call_chunk: AIMessageChunk | None = None + self.tool_call_requests: list[ToolCall] = [] + + self.tool_runner: ToolRunner | None = None + self.tool_call_summary: ToolCallSummary | None = None + + self.tool_kickoff: ToolCallKickoff | None = None + self.tool_responses: list[ToolResponse] = [] + self.tool_final_result: ToolCallFinalResult | None = None + + @classmethod + def get_tool_call_for_non_tool_calling_llm( + cls, llm_call: LLMCall, llm: LLM + ) -> tuple[Tool, dict] | None: + if llm_call.force_use_tool.force_use: + # if we are forcing a tool, we don't need to check which tools to run + tool = next( + ( + t + for t in llm_call.tools + if t.name == llm_call.force_use_tool.tool_name + ), + None, + ) + if not tool: + raise RuntimeError( + f"Tool '{llm_call.force_use_tool.tool_name}' not found" + ) + + tool_args = ( + llm_call.force_use_tool.args + if llm_call.force_use_tool.args is not None + else tool.get_args_for_non_tool_calling_llm( + query=llm_call.prompt_builder.get_user_message_content(), + history=llm_call.prompt_builder.raw_message_history, + llm=llm, + force_run=True, + ) + ) + + if tool_args is None: + raise RuntimeError(f"Tool '{tool.name}' did not return args") + + return (tool, tool_args) + else: + tool_options = check_which_tools_should_run_for_non_tool_calling_llm( + tools=llm_call.tools, + query=llm_call.prompt_builder.get_user_message_content(), + history=llm_call.prompt_builder.raw_message_history, + llm=llm, + ) + + available_tools_and_args = [ + (llm_call.tools[ind], args) + for ind, args in enumerate(tool_options) + if args is not None + ] + + logger.info( + f"Selecting single tool from tools: {[(tool.name, args) for tool, args in available_tools_and_args]}" + ) + + chosen_tool_and_args = ( + select_single_tool_for_non_tool_calling_llm( + tools_and_args=available_tools_and_args, + history=llm_call.prompt_builder.raw_message_history, + query=llm_call.prompt_builder.get_user_message_content(), + llm=llm, + ) + if available_tools_and_args + else None + ) + + logger.notice(f"Chosen tool: {chosen_tool_and_args}") + return chosen_tool_and_args + + def _handle_tool_call(self) -> Generator[ResponsePart, None, None]: + if not self.tool_call_chunk or not self.tool_call_chunk.tool_calls: + return + + self.tool_call_requests = self.tool_call_chunk.tool_calls + + selected_tool: Tool | None = None + selected_tool_call_request: ToolCall | None = None + for tool_call_request in self.tool_call_requests: + known_tools_by_name = [ + tool for tool in self.tools if tool.name == tool_call_request["name"] + ] + + if not known_tools_by_name: + logger.error( + "Tool call requested with unknown name field. \n" + f"self.tools: {self.tools}" + f"tool_call_request: {tool_call_request}" + ) + continue + else: + selected_tool = known_tools_by_name[0] + selected_tool_call_request = tool_call_request + + if selected_tool and selected_tool_call_request: + break + + if not selected_tool or not selected_tool_call_request: + return + + logger.info(f"Selected tool: {selected_tool.name}") + logger.debug(f"Selected tool call request: {selected_tool_call_request}") + self.tool_runner = ToolRunner(selected_tool, selected_tool_call_request["args"]) + self.tool_kickoff = self.tool_runner.kickoff() + yield self.tool_kickoff + + for response in self.tool_runner.tool_responses(): + self.tool_responses.append(response) + yield response + + self.tool_final_result = self.tool_runner.tool_final_result() + yield self.tool_final_result + + self.tool_call_summary = ToolCallSummary( + tool_call_request=self.tool_call_chunk, + tool_call_result=build_tool_message( + selected_tool_call_request, self.tool_runner.tool_message_content() + ), + ) + + def handle_response_part( + self, + response_item: BaseMessage | None, + previous_response_items: list[BaseMessage], + ) -> Generator[ResponsePart, None, None]: + if response_item is None: + yield from self._handle_tool_call() + + if isinstance(response_item, AIMessageChunk) and ( + response_item.tool_call_chunks or response_item.tool_calls + ): + if self.tool_call_chunk is None: + self.tool_call_chunk = response_item + else: + self.tool_call_chunk += response_item # type: ignore + + return + + def next_llm_call(self, current_llm_call: LLMCall) -> LLMCall | None: + if ( + self.tool_runner is None + or self.tool_call_summary is None + or self.tool_kickoff is None + or self.tool_final_result is None + ): + return None + + tool_runner = self.tool_runner + new_prompt_builder = tool_runner.tool.build_next_prompt( + prompt_builder=current_llm_call.prompt_builder, + tool_call_summary=self.tool_call_summary, + tool_responses=self.tool_responses, + using_tool_calling_llm=current_llm_call.using_tool_calling_llm, + ) + return LLMCall( + prompt_builder=new_prompt_builder, + tools=[], # for now, only allow one tool call per response + force_use_tool=ForceUseTool( + force_use=False, + tool_name="", + args=None, + ), + files=current_llm_call.files, + using_tool_calling_llm=current_llm_call.using_tool_calling_llm, + tool_call_info=[ + self.tool_kickoff, + *self.tool_responses, + self.tool_final_result, + ], + ) diff --git a/backend/danswer/llm/chat_llm.py b/backend/danswer/llm/chat_llm.py index b65498a2ab1..8ee56f38531 100644 --- a/backend/danswer/llm/chat_llm.py +++ b/backend/danswer/llm/chat_llm.py @@ -83,8 +83,10 @@ def _convert_litellm_message_to_langchain_message( "args": json.loads(tool_call.function.arguments), "id": tool_call.id, } - for tool_call in (tool_calls if tool_calls else []) - ], + for tool_call in tool_calls + ] + if tool_calls + else [], ) elif role == "system": return SystemMessage(content=content) @@ -109,7 +111,7 @@ def _convert_message_to_dict(message: BaseMessage) -> dict: "arguments": json.dumps(tool_call["args"]), }, "type": "function", - "index": 0, # only support a single tool call atm + "index": tool_call.get("index", 0), } for tool_call in message.tool_calls ] @@ -158,12 +160,13 @@ def _convert_delta_to_message_chunk( if tool_calls: tool_call = tool_calls[0] tool_name = tool_call.function.name or (curr_msg and curr_msg.name) or "" + idx = tool_call.index tool_call_chunk = ToolCallChunk( name=tool_name, id=tool_call.id, args=tool_call.function.arguments, - index=0, # only support a single tool call atm + index=idx, ) return AIMessageChunk( @@ -204,6 +207,7 @@ def __init__( model_name: str, api_base: str | None = None, api_version: str | None = None, + deployment_name: str | None = None, max_output_tokens: int | None = None, custom_llm_provider: str | None = None, temperature: float = GEN_AI_TEMPERATURE, @@ -215,6 +219,7 @@ def __init__( self._model_version = model_name self._temperature = temperature self._api_key = api_key + self._deployment_name = deployment_name self._api_base = api_base self._api_version = api_version self._custom_llm_provider = custom_llm_provider @@ -277,12 +282,14 @@ def _completion( tools: list[dict] | None, tool_choice: ToolChoiceOptions | None, stream: bool, + structured_response_format: dict | None = None, ) -> litellm.ModelResponse | litellm.CustomStreamWrapper: if isinstance(prompt, list): prompt = [ _convert_message_to_dict(msg) if isinstance(msg, BaseMessage) else msg for msg in prompt ] + elif isinstance(prompt, str): prompt = [_convert_message_to_dict(HumanMessage(content=prompt))] @@ -311,6 +318,11 @@ def _completion( # NOTE: we can't pass this in if tools are not specified # or else OpenAI throws an error **({"parallel_tool_calls": False} if tools else {}), + **( + {"response_format": structured_response_format} + if structured_response_format + else {} + ), **self._model_kwargs, ) except Exception as e: @@ -326,6 +338,7 @@ def config(self) -> LLMConfig: api_key=self._api_key, api_base=self._api_base, api_version=self._api_version, + deployment_name=self._deployment_name, ) def _invoke_implementation( @@ -333,12 +346,16 @@ def _invoke_implementation( prompt: LanguageModelInput, tools: list[dict] | None = None, tool_choice: ToolChoiceOptions | None = None, + structured_response_format: dict | None = None, ) -> BaseMessage: if LOG_DANSWER_MODEL_INTERACTIONS: self.log_model_configs() response = cast( - litellm.ModelResponse, self._completion(prompt, tools, tool_choice, False) + litellm.ModelResponse, + self._completion( + prompt, tools, tool_choice, False, structured_response_format + ), ) choice = response.choices[0] if hasattr(choice, "message"): @@ -351,18 +368,21 @@ def _stream_implementation( prompt: LanguageModelInput, tools: list[dict] | None = None, tool_choice: ToolChoiceOptions | None = None, + structured_response_format: dict | None = None, ) -> Iterator[BaseMessage]: if LOG_DANSWER_MODEL_INTERACTIONS: self.log_model_configs() if DISABLE_LITELLM_STREAMING: - yield self.invoke(prompt) + yield self.invoke(prompt, tools, tool_choice, structured_response_format) return output = None response = cast( litellm.CustomStreamWrapper, - self._completion(prompt, tools, tool_choice, True), + self._completion( + prompt, tools, tool_choice, True, structured_response_format + ), ) try: for part in response: diff --git a/backend/danswer/llm/custom_llm.py b/backend/danswer/llm/custom_llm.py index 4a5ba7857c3..6b80406cf2f 100644 --- a/backend/danswer/llm/custom_llm.py +++ b/backend/danswer/llm/custom_llm.py @@ -80,6 +80,7 @@ def _invoke_implementation( prompt: LanguageModelInput, tools: list[dict] | None = None, tool_choice: ToolChoiceOptions | None = None, + structured_response_format: dict | None = None, ) -> BaseMessage: return self._execute(prompt) @@ -88,5 +89,6 @@ def _stream_implementation( prompt: LanguageModelInput, tools: list[dict] | None = None, tool_choice: ToolChoiceOptions | None = None, + structured_response_format: dict | None = None, ) -> Iterator[BaseMessage]: yield self._execute(prompt) diff --git a/backend/danswer/llm/factory.py b/backend/danswer/llm/factory.py index f57bfb524b9..eedf7ccc763 100644 --- a/backend/danswer/llm/factory.py +++ b/backend/danswer/llm/factory.py @@ -7,9 +7,9 @@ from danswer.db.models import Persona from danswer.llm.chat_llm import DefaultMultiLLM from danswer.llm.exceptions import GenAIDisabledException -from danswer.llm.headers import build_llm_extra_headers from danswer.llm.interfaces import LLM from danswer.llm.override_models import LLMOverride +from danswer.utils.headers import build_llm_extra_headers def get_main_llm_from_tuple( @@ -51,6 +51,7 @@ def _create_llm(model: str) -> LLM: return get_llm( provider=llm_provider.provider, model=model, + deployment_name=llm_provider.deployment_name, api_key=llm_provider.api_key, api_base=llm_provider.api_base, api_version=llm_provider.api_version, @@ -88,6 +89,7 @@ def _create_llm(model: str) -> LLM: return get_llm( provider=llm_provider.provider, model=model, + deployment_name=llm_provider.deployment_name, api_key=llm_provider.api_key, api_base=llm_provider.api_base, api_version=llm_provider.api_version, @@ -103,6 +105,7 @@ def _create_llm(model: str) -> LLM: def get_llm( provider: str, model: str, + deployment_name: str | None, api_key: str | None = None, api_base: str | None = None, api_version: str | None = None, @@ -114,6 +117,7 @@ def get_llm( return DefaultMultiLLM( model_provider=provider, model_name=model, + deployment_name=deployment_name, api_key=api_key, api_base=api_base, api_version=api_version, diff --git a/backend/danswer/llm/headers.py b/backend/danswer/llm/headers.py deleted file mode 100644 index b43c83e141e..00000000000 --- a/backend/danswer/llm/headers.py +++ /dev/null @@ -1,34 +0,0 @@ -from fastapi.datastructures import Headers - -from danswer.configs.model_configs import LITELLM_EXTRA_HEADERS -from danswer.configs.model_configs import LITELLM_PASS_THROUGH_HEADERS - - -def get_litellm_additional_request_headers( - headers: dict[str, str] | Headers -) -> dict[str, str]: - if not LITELLM_PASS_THROUGH_HEADERS: - return {} - - pass_through_headers: dict[str, str] = {} - for key in LITELLM_PASS_THROUGH_HEADERS: - if key in headers: - pass_through_headers[key] = headers[key] - else: - # fastapi makes all header keys lowercase, handling that here - lowercase_key = key.lower() - if lowercase_key in headers: - pass_through_headers[lowercase_key] = headers[lowercase_key] - - return pass_through_headers - - -def build_llm_extra_headers( - additional_headers: dict[str, str] | None = None -) -> dict[str, str]: - extra_headers: dict[str, str] = {} - if additional_headers: - extra_headers.update(additional_headers) - if LITELLM_EXTRA_HEADERS: - extra_headers.update(LITELLM_EXTRA_HEADERS) - return extra_headers diff --git a/backend/danswer/llm/interfaces.py b/backend/danswer/llm/interfaces.py index 5e39792c393..7deee11dfa6 100644 --- a/backend/danswer/llm/interfaces.py +++ b/backend/danswer/llm/interfaces.py @@ -24,7 +24,7 @@ class LLMConfig(BaseModel): api_key: str | None = None api_base: str | None = None api_version: str | None = None - + deployment_name: str | None = None # This disables the "model_" protected namespace for pydantic model_config = {"protected_namespaces": ()} @@ -88,11 +88,14 @@ def invoke( prompt: LanguageModelInput, tools: list[dict] | None = None, tool_choice: ToolChoiceOptions | None = None, + structured_response_format: dict | None = None, ) -> BaseMessage: self._precall(prompt) # TODO add a postcall to log model outputs independent of concrete class # implementation - return self._invoke_implementation(prompt, tools, tool_choice) + return self._invoke_implementation( + prompt, tools, tool_choice, structured_response_format + ) @abc.abstractmethod def _invoke_implementation( @@ -100,6 +103,7 @@ def _invoke_implementation( prompt: LanguageModelInput, tools: list[dict] | None = None, tool_choice: ToolChoiceOptions | None = None, + structured_response_format: dict | None = None, ) -> BaseMessage: raise NotImplementedError @@ -108,11 +112,14 @@ def stream( prompt: LanguageModelInput, tools: list[dict] | None = None, tool_choice: ToolChoiceOptions | None = None, + structured_response_format: dict | None = None, ) -> Iterator[BaseMessage]: self._precall(prompt) # TODO add a postcall to log model outputs independent of concrete class # implementation - return self._stream_implementation(prompt, tools, tool_choice) + return self._stream_implementation( + prompt, tools, tool_choice, structured_response_format + ) @abc.abstractmethod def _stream_implementation( @@ -120,5 +127,6 @@ def _stream_implementation( prompt: LanguageModelInput, tools: list[dict] | None = None, tool_choice: ToolChoiceOptions | None = None, + structured_response_format: dict | None = None, ) -> Iterator[BaseMessage]: raise NotImplementedError diff --git a/backend/danswer/llm/llm_provider_options.py b/backend/danswer/llm/llm_provider_options.py index 8fc1de73955..cf562ee5a27 100644 --- a/backend/danswer/llm/llm_provider_options.py +++ b/backend/danswer/llm/llm_provider_options.py @@ -16,10 +16,13 @@ class WellKnownLLMProviderDescriptor(BaseModel): api_base_required: bool api_version_required: bool custom_config_keys: list[CustomConfigKey] | None = None - llm_names: list[str] default_model: str | None = None default_fast_model: str | None = None + # set for providers like Azure, which require a deployment name. + deployment_name_required: bool = False + # set for providers like Azure, which support a single model per deployment. + single_model_supported: bool = False OPENAI_PROVIDER_NAME = "openai" @@ -58,6 +61,7 @@ class WellKnownLLMProviderDescriptor(BaseModel): IGNORABLE_ANTHROPIC_MODELS = [ "claude-2", "claude-instant-1", + "anthropic/claude-3-5-sonnet-20241022", ] ANTHROPIC_PROVIDER_NAME = "anthropic" ANTHROPIC_MODEL_NAMES = [ @@ -97,8 +101,8 @@ def fetch_available_well_known_llms() -> list[WellKnownLLMProviderDescriptor]: api_version_required=False, custom_config_keys=[], llm_names=fetch_models_for_provider(ANTHROPIC_PROVIDER_NAME), - default_model="claude-3-5-sonnet-20240620", - default_fast_model="claude-3-5-sonnet-20240620", + default_model="claude-3-5-sonnet-20241022", + default_fast_model="claude-3-5-sonnet-20241022", ), WellKnownLLMProviderDescriptor( name=AZURE_PROVIDER_NAME, @@ -108,6 +112,8 @@ def fetch_available_well_known_llms() -> list[WellKnownLLMProviderDescriptor]: api_version_required=True, custom_config_keys=[], llm_names=fetch_models_for_provider(AZURE_PROVIDER_NAME), + deployment_name_required=True, + single_model_supported=True, ), WellKnownLLMProviderDescriptor( name=BEDROCK_PROVIDER_NAME, @@ -130,8 +136,8 @@ def fetch_available_well_known_llms() -> list[WellKnownLLMProviderDescriptor]: ), ], llm_names=fetch_models_for_provider(BEDROCK_PROVIDER_NAME), - default_model="anthropic.claude-3-5-sonnet-20240620-v1:0", - default_fast_model="anthropic.claude-3-5-sonnet-20240620-v1:0", + default_model="anthropic.claude-3-5-sonnet-20241022-v2:0", + default_fast_model="anthropic.claude-3-5-sonnet-20241022-v2:0", ), ] diff --git a/backend/danswer/llm/utils.py b/backend/danswer/llm/utils.py index 3a5e40875f1..24017146912 100644 --- a/backend/danswer/llm/utils.py +++ b/backend/danswer/llm/utils.py @@ -1,3 +1,4 @@ +import io import json from collections.abc import Callable from collections.abc import Iterator @@ -7,6 +8,7 @@ from typing import Union import litellm # type: ignore +import pandas as pd import tiktoken from langchain.prompts.base import StringPromptValue from langchain.prompts.chat import ChatPromptValue @@ -107,11 +109,10 @@ def translate_danswer_msg_to_langchain( files: list[InMemoryChatFile] = [] # If the message is a `ChatMessage`, it doesn't have the downloaded files - # attached. Just ignore them for now. Also, OpenAI doesn't allow files to - # be attached to AI messages, so we must remove them - if not isinstance(msg, ChatMessage) and msg.message_type != MessageType.ASSISTANT: + # attached. Just ignore them for now. + if not isinstance(msg, ChatMessage): files = msg.files - content = build_content_with_imgs(msg.message, files) + content = build_content_with_imgs(msg.message, files, message_type=msg.message_type) if msg.message_type == MessageType.SYSTEM: raise ValueError("System messages are not currently part of history") @@ -135,6 +136,18 @@ def translate_history_to_basemessages( return history_basemessages, history_token_counts +def _process_csv_file(file: InMemoryChatFile) -> str: + df = pd.read_csv(io.StringIO(file.content.decode("utf-8"))) + csv_preview = df.head().to_string() + + file_name_section = ( + f"CSV FILE NAME: {file.filename}\n" + if file.filename + else "CSV FILE (NO NAME PROVIDED):\n" + ) + return f"{file_name_section}{CODE_BLOCK_PAT.format(csv_preview)}\n\n\n" + + def _build_content( message: str, files: list[InMemoryChatFile] | None = None, @@ -145,16 +158,26 @@ def _build_content( if files else None ) - if not text_files: + + csv_files = ( + [file for file in files if file.file_type == ChatFileType.CSV] + if files + else None + ) + + if not text_files and not csv_files: return message final_message_with_files = "FILES:\n\n" - for file in text_files: + for file in text_files or []: file_content = file.content.decode("utf-8") file_name_section = f"DOCUMENT: {file.filename}\n" if file.filename else "" final_message_with_files += ( f"{file_name_section}{CODE_BLOCK_PAT.format(file_content.strip())}\n\n\n" ) + for file in csv_files or []: + final_message_with_files += _process_csv_file(file) + final_message_with_files += message return final_message_with_files @@ -164,10 +187,19 @@ def build_content_with_imgs( message: str, files: list[InMemoryChatFile] | None = None, img_urls: list[str] | None = None, + message_type: MessageType = MessageType.USER, ) -> str | list[str | dict[str, Any]]: # matching Langchain's BaseMessage content type files = files or [] - img_files = [file for file in files if file.file_type == ChatFileType.IMAGE] + + # Only include image files for user messages + img_files = ( + [file for file in files if file.file_type == ChatFileType.IMAGE] + if message_type == MessageType.USER + else [] + ) + img_urls = img_urls or [] + message_main_content = _build_content(message, files) if not img_files and not img_urls: @@ -203,6 +235,28 @@ def build_content_with_imgs( ) +def message_to_prompt_and_imgs(message: BaseMessage) -> tuple[str, list[str]]: + if isinstance(message.content, str): + return message.content, [] + + imgs = [] + texts = [] + for part in message.content: + if isinstance(part, dict): + if part.get("type") == "image_url": + img_url = part.get("image_url", {}).get("url") + if img_url: + imgs.append(img_url) + elif part.get("type") == "text": + text = part.get("text") + if text: + texts.append(text) + else: + texts.append(part) + + return "".join(texts), imgs + + def dict_based_prompt_to_langchain_prompt( messages: list[dict[str, str]] ) -> list[BaseMessage]: @@ -342,12 +396,26 @@ def get_llm_max_tokens( try: model_obj = model_map.get(f"{model_provider}/{model_name}") - if not model_obj: - model_obj = model_map[model_name] - logger.debug(f"Using model object for {model_name}") - else: + if model_obj: logger.debug(f"Using model object for {model_provider}/{model_name}") + if not model_obj: + model_obj = model_map.get(model_name) + if model_obj: + logger.debug(f"Using model object for {model_name}") + + if not model_obj: + model_name_split = model_name.split("/") + if len(model_name_split) > 1: + model_obj = model_map.get(model_name_split[1]) + if model_obj: + logger.debug(f"Using model object for {model_name_split[1]}") + + if not model_obj: + raise RuntimeError( + f"No litellm entry found for {model_provider}/{model_name}" + ) + if "max_input_tokens" in model_obj: max_tokens = model_obj["max_input_tokens"] logger.info( diff --git a/backend/danswer/main.py b/backend/danswer/main.py index a5abb8f28c2..ae18ab3ccf2 100644 --- a/backend/danswer/main.py +++ b/backend/danswer/main.py @@ -1,10 +1,11 @@ -import time +import sys import traceback from collections.abc import AsyncGenerator from contextlib import asynccontextmanager from typing import Any from typing import cast +import sentry_sdk import uvicorn from fastapi import APIRouter from fastapi import FastAPI @@ -15,6 +16,8 @@ from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse from httpx_oauth.clients.google import GoogleOAuth2 +from sentry_sdk.integrations.fastapi import FastApiIntegration +from sentry_sdk.integrations.starlette import StarletteIntegration from sqlalchemy.orm import Session from danswer import __version__ @@ -23,56 +26,23 @@ from danswer.auth.schemas import UserUpdate from danswer.auth.users import auth_backend from danswer.auth.users import fastapi_users -from danswer.chat.load_yamls import load_chat_yamls from danswer.configs.app_configs import APP_API_PREFIX from danswer.configs.app_configs import APP_HOST from danswer.configs.app_configs import APP_PORT from danswer.configs.app_configs import AUTH_TYPE from danswer.configs.app_configs import DISABLE_GENERATIVE_AI -from danswer.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP from danswer.configs.app_configs import LOG_ENDPOINT_LATENCY from danswer.configs.app_configs import OAUTH_CLIENT_ID from danswer.configs.app_configs import OAUTH_CLIENT_SECRET +from danswer.configs.app_configs import POSTGRES_API_SERVER_POOL_OVERFLOW +from danswer.configs.app_configs import POSTGRES_API_SERVER_POOL_SIZE +from danswer.configs.app_configs import SYSTEM_RECURSION_LIMIT from danswer.configs.app_configs import USER_AUTH_SECRET from danswer.configs.app_configs import WEB_DOMAIN from danswer.configs.constants import AuthType -from danswer.configs.constants import KV_REINDEX_KEY -from danswer.configs.constants import KV_SEARCH_SETTINGS from danswer.configs.constants import POSTGRES_WEB_APP_NAME -from danswer.configs.model_configs import FAST_GEN_AI_MODEL_VERSION -from danswer.configs.model_configs import GEN_AI_API_KEY -from danswer.configs.model_configs import GEN_AI_MODEL_VERSION -from danswer.db.connector import check_connectors_exist -from danswer.db.connector import create_initial_default_connector -from danswer.db.connector_credential_pair import associate_default_cc_pair -from danswer.db.connector_credential_pair import get_connector_credential_pairs -from danswer.db.connector_credential_pair import resync_cc_pair -from danswer.db.credentials import create_initial_public_credential -from danswer.db.document import check_docs_exist -from danswer.db.engine import get_sqlalchemy_engine -from danswer.db.engine import init_sqlalchemy_engine +from danswer.db.engine import SqlEngine from danswer.db.engine import warm_up_connections -from danswer.db.index_attempt import cancel_indexing_attempts_past_model -from danswer.db.index_attempt import expire_index_attempts -from danswer.db.llm import fetch_default_provider -from danswer.db.llm import update_default_provider -from danswer.db.llm import upsert_llm_provider -from danswer.db.persona import delete_old_default_personas -from danswer.db.search_settings import get_current_search_settings -from danswer.db.search_settings import get_secondary_search_settings -from danswer.db.search_settings import update_current_search_settings -from danswer.db.search_settings import update_secondary_search_settings -from danswer.db.swap_index import check_index_swap -from danswer.document_index.factory import get_default_document_index -from danswer.document_index.interfaces import DocumentIndex -from danswer.dynamic_configs.factory import get_dynamic_config_store -from danswer.dynamic_configs.interface import ConfigNotFoundError -from danswer.indexing.models import IndexingSetting -from danswer.natural_language_processing.search_nlp_models import EmbeddingModel -from danswer.natural_language_processing.search_nlp_models import warm_up_bi_encoder -from danswer.natural_language_processing.search_nlp_models import warm_up_cross_encoder -from danswer.search.models import SavedSearchSettings -from danswer.search.retrieval.search_runner import download_nltk_data from danswer.server.auth_check import check_router_auth from danswer.server.danswer_api.ingestion import router as danswer_api_router from danswer.server.documents.cc_pair import router as cc_pair_router @@ -86,6 +56,7 @@ admin_router as admin_input_prompt_router, ) from danswer.server.features.input_prompt.api import basic_router as input_prompt_router +from danswer.server.features.notifications.api import router as notification_router from danswer.server.features.persona.api import admin_router as admin_persona_router from danswer.server.features.persona.api import basic_router as persona_router from danswer.server.features.prompt.api import basic_router as prompt_router @@ -98,7 +69,6 @@ from danswer.server.manage.get_state import router as state_router from danswer.server.manage.llm.api import admin_router as llm_admin_router from danswer.server.manage.llm.api import basic_router as llm_router -from danswer.server.manage.llm.models import LLMProviderUpsertRequest from danswer.server.manage.search_settings import router as search_settings_router from danswer.server.manage.slack_bot import router as slack_bot_management_router from danswer.server.manage.users import router as user_router @@ -110,15 +80,11 @@ from danswer.server.query_and_chat.query_backend import basic_router as query_router from danswer.server.settings.api import admin_router as settings_admin_router from danswer.server.settings.api import basic_router as settings_router -from danswer.server.settings.store import load_settings -from danswer.server.settings.store import store_settings from danswer.server.token_rate_limits.api import ( router as token_rate_limit_settings_router, ) -from danswer.tools.built_in_tools import auto_add_search_tool_to_personas -from danswer.tools.built_in_tools import load_builtin_tools -from danswer.tools.built_in_tools import refresh_built_in_tools_cache -from danswer.utils.gpu_utils import gpu_status_request +from danswer.setup import setup_danswer +from danswer.setup import setup_multitenant_danswer from danswer.utils.logger import setup_logger from danswer.utils.telemetry import get_or_generate_uuid from danswer.utils.telemetry import optional_telemetry @@ -127,8 +93,9 @@ from danswer.utils.variable_functionality import global_version from danswer.utils.variable_functionality import set_is_ee_based_on_env_variable from shared_configs.configs import CORS_ALLOWED_ORIGIN -from shared_configs.configs import MODEL_SERVER_HOST -from shared_configs.configs import MODEL_SERVER_PORT +from shared_configs.configs import MULTI_TENANT +from shared_configs.configs import SENTRY_DSN + logger = setup_logger() @@ -181,184 +148,24 @@ def include_router_with_global_prefix_prepended( application.include_router(router, **final_kwargs) -def setup_postgres(db_session: Session) -> None: - logger.notice("Verifying default connector/credential exist.") - create_initial_public_credential(db_session) - create_initial_default_connector(db_session) - associate_default_cc_pair(db_session) - - logger.notice("Loading default Prompts and Personas") - delete_old_default_personas(db_session) - load_chat_yamls() - - logger.notice("Loading built-in tools") - load_builtin_tools(db_session) - refresh_built_in_tools_cache(db_session) - auto_add_search_tool_to_personas(db_session) - - if GEN_AI_API_KEY and fetch_default_provider(db_session) is None: - # Only for dev flows - logger.notice("Setting up default OpenAI LLM for dev.") - llm_model = GEN_AI_MODEL_VERSION or "gpt-4o-mini" - fast_model = FAST_GEN_AI_MODEL_VERSION or "gpt-4o-mini" - model_req = LLMProviderUpsertRequest( - name="DevEnvPresetOpenAI", - provider="openai", - api_key=GEN_AI_API_KEY, - api_base=None, - api_version=None, - custom_config=None, - default_model_name=llm_model, - fast_default_model_name=fast_model, - is_public=True, - groups=[], - display_model_names=[llm_model, fast_model], - model_names=[llm_model, fast_model], - ) - new_llm_provider = upsert_llm_provider( - llm_provider=model_req, db_session=db_session - ) - update_default_provider(provider_id=new_llm_provider.id, db_session=db_session) - - -def update_default_multipass_indexing(db_session: Session) -> None: - docs_exist = check_docs_exist(db_session) - connectors_exist = check_connectors_exist(db_session) - logger.debug(f"Docs exist: {docs_exist}, Connectors exist: {connectors_exist}") - - if not docs_exist and not connectors_exist: - logger.info( - "No existing docs or connectors found. Checking GPU availability for multipass indexing." - ) - gpu_available = gpu_status_request() - logger.info(f"GPU available: {gpu_available}") - - current_settings = get_current_search_settings(db_session) - - logger.notice(f"Updating multipass indexing setting to: {gpu_available}") - updated_settings = SavedSearchSettings.from_db_model(current_settings) - # Enable multipass indexing if GPU is available or if using a cloud provider - updated_settings.multipass_indexing = ( - gpu_available or current_settings.cloud_provider is not None - ) - update_current_search_settings(db_session, updated_settings) - - # Update settings with GPU availability - settings = load_settings() - settings.gpu_enabled = gpu_available - store_settings(settings) - logger.notice(f"Updated settings with GPU availability: {gpu_available}") - - else: - logger.debug( - "Existing docs or connectors found. Skipping multipass indexing update." - ) - - -def translate_saved_search_settings(db_session: Session) -> None: - kv_store = get_dynamic_config_store() - - try: - search_settings_dict = kv_store.load(KV_SEARCH_SETTINGS) - if isinstance(search_settings_dict, dict): - # Update current search settings - current_settings = get_current_search_settings(db_session) - - # Update non-preserved fields - if current_settings: - current_settings_dict = SavedSearchSettings.from_db_model( - current_settings - ).dict() - - new_current_settings = SavedSearchSettings( - **{**current_settings_dict, **search_settings_dict} - ) - update_current_search_settings(db_session, new_current_settings) - - # Update secondary search settings - secondary_settings = get_secondary_search_settings(db_session) - if secondary_settings: - secondary_settings_dict = SavedSearchSettings.from_db_model( - secondary_settings - ).dict() - - new_secondary_settings = SavedSearchSettings( - **{**secondary_settings_dict, **search_settings_dict} - ) - update_secondary_search_settings( - db_session, - new_secondary_settings, - ) - # Delete the KV store entry after successful update - kv_store.delete(KV_SEARCH_SETTINGS) - logger.notice("Search settings updated and KV store entry deleted.") - else: - logger.notice("KV store search settings is empty.") - except ConfigNotFoundError: - logger.notice("No search config found in KV store.") - - -def mark_reindex_flag(db_session: Session) -> None: - kv_store = get_dynamic_config_store() - try: - value = kv_store.load(KV_REINDEX_KEY) - logger.debug(f"Re-indexing flag has value {value}") - return - except ConfigNotFoundError: - # Only need to update the flag if it hasn't been set - pass - - # If their first deployment is after the changes, it will - # enable this when the other changes go in, need to avoid - # this being set to False, then the user indexes things on the old version - docs_exist = check_docs_exist(db_session) - connectors_exist = check_connectors_exist(db_session) - if docs_exist or connectors_exist: - kv_store.store(KV_REINDEX_KEY, True) - else: - kv_store.store(KV_REINDEX_KEY, False) - - -def setup_vespa( - document_index: DocumentIndex, - index_setting: IndexingSetting, - secondary_index_setting: IndexingSetting | None, -) -> bool: - # Vespa startup is a bit slow, so give it a few seconds - WAIT_SECONDS = 5 - VESPA_ATTEMPTS = 5 - for x in range(VESPA_ATTEMPTS): - try: - logger.notice(f"Setting up Vespa (attempt {x+1}/{VESPA_ATTEMPTS})...") - document_index.ensure_indices_exist( - index_embedding_dim=index_setting.model_dim, - secondary_index_embedding_dim=secondary_index_setting.model_dim - if secondary_index_setting - else None, - ) - - logger.notice("Vespa setup complete.") - return True - except Exception: - logger.notice( - f"Vespa setup did not succeed. The Vespa service may not be ready yet. Retrying in {WAIT_SECONDS} seconds." - ) - time.sleep(WAIT_SECONDS) - - logger.error( - f"Vespa setup did not succeed. Attempt limit reached. ({VESPA_ATTEMPTS})" - ) - return False - - @asynccontextmanager async def lifespan(app: FastAPI) -> AsyncGenerator: - init_sqlalchemy_engine(POSTGRES_WEB_APP_NAME) - engine = get_sqlalchemy_engine() + # Set recursion limit + if SYSTEM_RECURSION_LIMIT is not None: + sys.setrecursionlimit(SYSTEM_RECURSION_LIMIT) + logger.notice(f"System recursion limit set to {SYSTEM_RECURSION_LIMIT}") + + SqlEngine.set_app_name(POSTGRES_WEB_APP_NAME) + SqlEngine.init_engine( + pool_size=POSTGRES_API_SERVER_POOL_SIZE, + max_overflow=POSTGRES_API_SERVER_POOL_OVERFLOW, + ) + engine = SqlEngine.get_engine() verify_auth = fetch_versioned_implementation( "danswer.auth.users", "verify_auth_setting" ) + # Will throw exception if an issue is found verify_auth() @@ -371,93 +178,15 @@ async def lifespan(app: FastAPI) -> AsyncGenerator: # fill up Postgres connection pools await warm_up_connections() - # We cache this at the beginning so there is no delay in the first telemetry - get_or_generate_uuid() - - with Session(engine) as db_session: - check_index_swap(db_session=db_session) - search_settings = get_current_search_settings(db_session) - secondary_search_settings = get_secondary_search_settings(db_session) - - # Break bad state for thrashing indexes - if secondary_search_settings and DISABLE_INDEX_UPDATE_ON_SWAP: - expire_index_attempts( - search_settings_id=search_settings.id, db_session=db_session - ) - - for cc_pair in get_connector_credential_pairs(db_session): - resync_cc_pair(cc_pair, db_session=db_session) - - # Expire all old embedding models indexing attempts, technically redundant - cancel_indexing_attempts_past_model(db_session) - - logger.notice(f'Using Embedding model: "{search_settings.model_name}"') - if search_settings.query_prefix or search_settings.passage_prefix: - logger.notice(f'Query embedding prefix: "{search_settings.query_prefix}"') - logger.notice( - f'Passage embedding prefix: "{search_settings.passage_prefix}"' - ) - - if search_settings: - if not search_settings.disable_rerank_for_streaming: - logger.notice("Reranking is enabled.") - - if search_settings.multilingual_expansion: - logger.notice( - f"Multilingual query expansion is enabled with {search_settings.multilingual_expansion}." - ) - if ( - search_settings.rerank_model_name - and not search_settings.provider_type - and not search_settings.rerank_provider_type - ): - warm_up_cross_encoder(search_settings.rerank_model_name) - - logger.notice("Verifying query preprocessing (NLTK) data is downloaded") - download_nltk_data() - - # setup Postgres with default credential, llm providers, etc. - setup_postgres(db_session) - - translate_saved_search_settings(db_session) - - # Does the user need to trigger a reindexing to bring the document index - # into a good state, marked in the kv store - mark_reindex_flag(db_session) - - # ensure Vespa is setup correctly - logger.notice("Verifying Document Index(s) is/are available.") - document_index = get_default_document_index( - primary_index_name=search_settings.index_name, - secondary_index_name=secondary_search_settings.index_name - if secondary_search_settings - else None, - ) + if not MULTI_TENANT: + # We cache this at the beginning so there is no delay in the first telemetry + get_or_generate_uuid() - success = setup_vespa( - document_index, - IndexingSetting.from_db_model(search_settings), - IndexingSetting.from_db_model(secondary_search_settings) - if secondary_search_settings - else None, - ) - if not success: - raise RuntimeError( - "Could not connect to Vespa within the specified timeout." - ) - - logger.notice(f"Model Server: http://{MODEL_SERVER_HOST}:{MODEL_SERVER_PORT}") - if search_settings.provider_type is None: - warm_up_bi_encoder( - embedding_model=EmbeddingModel.from_db_model( - search_settings=search_settings, - server_host=MODEL_SERVER_HOST, - server_port=MODEL_SERVER_PORT, - ), - ) - - # update multipass indexing setting based on GPU availability - update_default_multipass_indexing(db_session) + # If we are multi-tenant, we need to only set up initial public tables + with Session(engine) as db_session: + setup_danswer(db_session, None) + else: + setup_multitenant_danswer() optional_telemetry(record_type=RecordType.VERSION, data={"version": __version__}) yield @@ -481,6 +210,15 @@ def get_application() -> FastAPI: application = FastAPI( title="Danswer Backend", version=__version__, lifespan=lifespan ) + if SENTRY_DSN: + sentry_sdk.init( + dsn=SENTRY_DSN, + integrations=[StarletteIntegration(), FastApiIntegration()], + traces_sample_rate=0.1, + ) + logger.info("Sentry initialized") + else: + logger.debug("Sentry DSN not provided, skipping Sentry initialization") # Add the custom exception handler application.add_exception_handler(status.HTTP_400_BAD_REQUEST, log_http_error) @@ -510,6 +248,7 @@ def get_application() -> FastAPI: include_router_with_global_prefix_prepended(application, admin_persona_router) include_router_with_global_prefix_prepended(application, input_prompt_router) include_router_with_global_prefix_prepended(application, admin_input_prompt_router) + include_router_with_global_prefix_prepended(application, notification_router) include_router_with_global_prefix_prepended(application, prompt_router) include_router_with_global_prefix_prepended(application, tool_router) include_router_with_global_prefix_prepended(application, admin_tool_router) @@ -531,7 +270,7 @@ def get_application() -> FastAPI: # Server logs this during auth setup verification step pass - elif AUTH_TYPE == AuthType.BASIC: + if AUTH_TYPE == AuthType.BASIC or AUTH_TYPE == AuthType.CLOUD: include_router_with_global_prefix_prepended( application, fastapi_users.get_auth_router(auth_backend), @@ -563,7 +302,7 @@ def get_application() -> FastAPI: tags=["users"], ) - elif AUTH_TYPE == AuthType.GOOGLE_OAUTH: + if AUTH_TYPE == AuthType.GOOGLE_OAUTH or AUTH_TYPE == AuthType.CLOUD: oauth_client = GoogleOAuth2(OAUTH_CLIENT_ID, OAUTH_CLIENT_SECRET) include_router_with_global_prefix_prepended( application, @@ -579,6 +318,7 @@ def get_application() -> FastAPI: prefix="/auth/oauth", tags=["auth"], ) + # Need basic auth router for `logout` endpoint include_router_with_global_prefix_prepended( application, @@ -620,7 +360,7 @@ def get_application() -> FastAPI: f"Starting Danswer Backend version {__version__} on http://{APP_HOST}:{str(APP_PORT)}/" ) - if global_version.get_is_ee_version(): + if global_version.is_ee_version(): logger.notice("Running Enterprise Edition") uvicorn.run(app, host=APP_HOST, port=APP_PORT) diff --git a/backend/danswer/natural_language_processing/search_nlp_models.py b/backend/danswer/natural_language_processing/search_nlp_models.py index 2fbf94a5be2..d75fce304d6 100644 --- a/backend/danswer/natural_language_processing/search_nlp_models.py +++ b/backend/danswer/natural_language_processing/search_nlp_models.py @@ -50,23 +50,26 @@ def clean_model_name(model_str: str) -> str: return model_str.replace("/", "_").replace("-", "_").replace(".", "_") -_WHITELIST = set( - " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\n\t" -) _INITIAL_FILTER = re.compile( "[" - "\U00000080-\U0000FFFF" # All Unicode characters beyond ASCII - "\U00010000-\U0010FFFF" # All Unicode characters in supplementary planes + "\U0000FFF0-\U0000FFFF" # Specials + "\U0001F000-\U0001F9FF" # Emoticons + "\U00002000-\U0000206F" # General Punctuation + "\U00002190-\U000021FF" # Arrows + "\U00002700-\U000027BF" # Dingbats "]+", flags=re.UNICODE, ) def clean_openai_text(text: str) -> str: - # First, remove all weird characters + # Remove specific Unicode ranges that might cause issues cleaned = _INITIAL_FILTER.sub("", text) - # Then, keep only whitelisted characters - return "".join(char for char in cleaned if char in _WHITELIST) + + # Remove any control characters except for newline and tab + cleaned = "".join(ch for ch in cleaned if ch >= " " or ch in "\n\t") + + return cleaned def build_model_server_url( @@ -97,6 +100,8 @@ def __init__( provider_type: EmbeddingProvider | None, retrim_content: bool = False, heartbeat: Heartbeat | None = None, + api_version: str | None = None, + deployment_name: str | None = None, ) -> None: self.api_key = api_key self.provider_type = provider_type @@ -106,6 +111,8 @@ def __init__( self.model_name = model_name self.retrim_content = retrim_content self.api_url = api_url + self.api_version = api_version + self.deployment_name = deployment_name self.tokenizer = get_tokenizer( model_name=model_name, provider_type=provider_type ) @@ -157,6 +164,8 @@ def _batch_encode_texts( embed_request = EmbedRequest( model_name=self.model_name, texts=text_batch, + api_version=self.api_version, + deployment_name=self.deployment_name, max_context_length=max_seq_length, normalize_embeddings=self.normalize, api_key=self.api_key, @@ -239,6 +248,8 @@ def from_db_model( provider_type=search_settings.provider_type, api_url=search_settings.api_url, retrim_content=retrim_content, + api_version=search_settings.api_version, + deployment_name=search_settings.deployment_name, ) diff --git a/backend/danswer/one_shot_answer/answer_question.py b/backend/danswer/one_shot_answer/answer_question.py index f051da82f14..f3cbe2b60af 100644 --- a/backend/danswer/one_shot_answer/answer_question.py +++ b/backend/danswer/one_shot_answer/answer_question.py @@ -52,12 +52,16 @@ from danswer.server.query_and_chat.models import ChatMessageDetail from danswer.server.utils import get_json_line from danswer.tools.force import ForceUseTool -from danswer.tools.search.search_tool import SEARCH_DOC_CONTENT_ID -from danswer.tools.search.search_tool import SEARCH_RESPONSE_SUMMARY_ID -from danswer.tools.search.search_tool import SearchResponseSummary -from danswer.tools.search.search_tool import SearchTool -from danswer.tools.search.search_tool import SECTION_RELEVANCE_LIST_ID -from danswer.tools.tool import ToolResponse +from danswer.tools.models import ToolResponse +from danswer.tools.tool_implementations.search.search_tool import SEARCH_DOC_CONTENT_ID +from danswer.tools.tool_implementations.search.search_tool import ( + SEARCH_RESPONSE_SUMMARY_ID, +) +from danswer.tools.tool_implementations.search.search_tool import SearchResponseSummary +from danswer.tools.tool_implementations.search.search_tool import SearchTool +from danswer.tools.tool_implementations.search.search_tool import ( + SECTION_RELEVANCE_LIST_ID, +) from danswer.tools.tool_runner import ToolCallKickoff from danswer.utils.logger import setup_logger from danswer.utils.timing import log_generator_function_time @@ -129,7 +133,19 @@ def stream_answer_objects( persona = temporary_persona if temporary_persona else chat_session.persona - llm, fast_llm = get_llms_for_persona(persona=persona) + try: + llm, fast_llm = get_llms_for_persona(persona=persona) + except ValueError as e: + logger.error( + f"Failed to initialize LLMs for persona '{persona.name}': {str(e)}" + ) + if "No LLM provider" in str(e): + raise ValueError( + "Please configure a Generative AI model to use this feature." + ) from e + raise ValueError( + "Failed to initialize the AI model. Please check your configuration and try again." + ) from e llm_tokenizer = get_tokenizer( model_name=llm.config.model_name, @@ -190,30 +206,33 @@ def stream_answer_objects( max_tokens=max_document_tokens, ) + answer_config = AnswerStyleConfig( + citation_config=CitationConfig() if use_citations else None, + quotes_config=QuotesConfig() if not use_citations else None, + document_pruning_config=document_pruning_config, + ) + search_tool = SearchTool( db_session=db_session, user=user, - evaluation_type=LLMEvaluationType.SKIP - if DISABLE_LLM_DOC_RELEVANCE - else query_req.evaluation_type, + evaluation_type=( + LLMEvaluationType.SKIP + if DISABLE_LLM_DOC_RELEVANCE + else query_req.evaluation_type + ), persona=persona, retrieval_options=query_req.retrieval_options, prompt_config=prompt_config, llm=llm, fast_llm=fast_llm, pruning_config=document_pruning_config, + answer_style_config=answer_config, bypass_acl=bypass_acl, chunks_above=query_req.chunks_above, chunks_below=query_req.chunks_below, full_doc=query_req.full_doc, ) - answer_config = AnswerStyleConfig( - citation_config=CitationConfig() if use_citations else None, - quotes_config=QuotesConfig() if not use_citations else None, - document_pruning_config=document_pruning_config, - ) - answer = Answer( question=query_msg.message, answer_style_config=answer_config, @@ -234,7 +253,7 @@ def stream_answer_objects( return_contexts=query_req.return_contexts, skip_gen_ai_answer_generation=query_req.skip_gen_ai_answer_generation, ) - # won't be any ImageGenerationDisplay responses since that tool is never passed in + # won't be any FileChatDisplay responses since that tool is never passed in for packet in cast(AnswerObjectIterator, answer.processed_streamed_output): # for one-shot flow, don't currently do anything with these if isinstance(packet, ToolResponse): diff --git a/backend/danswer/prompts/chat_prompts.py b/backend/danswer/prompts/chat_prompts.py index a5fa973f37c..a9653254f9a 100644 --- a/backend/danswer/prompts/chat_prompts.py +++ b/backend/danswer/prompts/chat_prompts.py @@ -110,8 +110,8 @@ and additional information or details would provide little or no value. - The query is some task that does not require additional information to handle. -{GENERAL_SEP_PAT} Conversation History: +{GENERAL_SEP_PAT} {{chat_history}} {GENERAL_SEP_PAT} @@ -135,8 +135,8 @@ Strip out any information that is not relevant for the retrieval task. If the follow up message is an error or code snippet, repeat the same input back EXACTLY. -{GENERAL_SEP_PAT} Chat History: +{GENERAL_SEP_PAT} {{chat_history}} {GENERAL_SEP_PAT} @@ -152,8 +152,8 @@ If there is a clear change in topic, ensure the query reflects the new topic accurately. Strip out any information that is not relevant for the internet search. -{GENERAL_SEP_PAT} Chat History: +{GENERAL_SEP_PAT} {{chat_history}} {GENERAL_SEP_PAT} @@ -210,6 +210,7 @@ Focus the name on the important keywords to convey the topic of the conversation. Chat History: +{GENERAL_SEP_PAT} {{chat_history}} {GENERAL_SEP_PAT} diff --git a/backend/danswer/prompts/direct_qa_prompts.py b/backend/danswer/prompts/direct_qa_prompts.py index 0139da13e88..b1229b896a7 100644 --- a/backend/danswer/prompts/direct_qa_prompts.py +++ b/backend/danswer/prompts/direct_qa_prompts.py @@ -72,7 +72,8 @@ JSON_PROMPT = f""" {{system_prompt}} {REQUIRE_JSON} -{{context_block}}{{history_block}}{{task_prompt}} +{{context_block}}{{history_block}} +{{task_prompt}} SAMPLE RESPONSE: ``` @@ -91,6 +92,7 @@ # "conversation history" block CITATIONS_PROMPT = f""" Refer to the following context documents when responding to me.{DEFAULT_IGNORE_STATEMENT} + CONTEXT: {GENERAL_SEP_PAT} {{context_docs_str}} @@ -109,10 +111,7 @@ Refer to the provided context documents when responding to me.{DEFAULT_IGNORE_STATEMENT} \ You should always get right to the point, and never use extraneous language. -CHAT HISTORY: -{{history_block}} - -{{task_prompt}} +{{history_block}}{{task_prompt}} {QUESTION_PAT.upper()} {{user_query}} diff --git a/backend/danswer/prompts/prompt_utils.py b/backend/danswer/prompts/prompt_utils.py index cd59e97061f..5e1a3e298fb 100644 --- a/backend/danswer/prompts/prompt_utils.py +++ b/backend/danswer/prompts/prompt_utils.py @@ -13,6 +13,10 @@ from danswer.prompts.chat_prompts import CITATION_REMINDER from danswer.prompts.constants import CODE_BLOCK_PAT from danswer.search.models import InferenceChunk +from danswer.utils.logger import setup_logger + + +logger = setup_logger() MOST_BASIC_PROMPT = "You are a helpful AI assistant." @@ -136,14 +140,23 @@ def find_last_index(lst: list[int], max_prompt_tokens: int) -> int: before the list exceeds the maximum""" running_sum = 0 + if not lst: + logger.warning("Empty message history passed to find_last_index") + return 0 + last_ind = 0 for i in range(len(lst) - 1, -1, -1): running_sum += lst[i] + _PER_MESSAGE_TOKEN_BUFFER if running_sum > max_prompt_tokens: last_ind = i + 1 break + if last_ind >= len(lst): + logger.error( + f"Last message alone is too large! max_prompt_tokens: {max_prompt_tokens}, message_token_counts: {lst}" + ) raise ValueError("Last message alone is too large!") + return last_ind diff --git a/backend/danswer/redis/redis_connector.py b/backend/danswer/redis/redis_connector.py new file mode 100644 index 00000000000..df61f986ede --- /dev/null +++ b/backend/danswer/redis/redis_connector.py @@ -0,0 +1,72 @@ +import redis + +from danswer.redis.redis_connector_delete import RedisConnectorDelete +from danswer.redis.redis_connector_index import RedisConnectorIndex +from danswer.redis.redis_connector_prune import RedisConnectorPrune +from danswer.redis.redis_connector_stop import RedisConnectorStop +from danswer.redis.redis_pool import get_redis_client + + +class RedisConnector: + """Composes several classes to simplify interacting with a connector and its + associated background tasks / associated redis interactions.""" + + def __init__(self, tenant_id: str | None, id: int) -> None: + self.tenant_id: str | None = tenant_id + self.id: int = id + self.redis: redis.Redis = get_redis_client(tenant_id=tenant_id) + + self.stop = RedisConnectorStop(tenant_id, id, self.redis) + self.prune = RedisConnectorPrune(tenant_id, id, self.redis) + self.delete = RedisConnectorDelete(tenant_id, id, self.redis) + + def new_index(self, search_settings_id: int) -> RedisConnectorIndex: + return RedisConnectorIndex( + self.tenant_id, self.id, search_settings_id, self.redis + ) + + @staticmethod + def get_id_from_fence_key(key: str) -> str | None: + """ + Extracts the object ID from a fence key in the format `PREFIX_fence_X`. + + Args: + key (str): The fence key string. + + Returns: + Optional[int]: The extracted ID if the key is in the correct format, otherwise None. + """ + parts = key.split("_") + if len(parts) != 3: + return None + + object_id = parts[2] + return object_id + + @staticmethod + def get_id_from_task_id(task_id: str) -> str | None: + """ + Extracts the object ID from a task ID string. + + This method assumes the task ID is formatted as `prefix_objectid_suffix`, where: + - `prefix` is an arbitrary string (e.g., the name of the task or entity), + - `objectid` is the ID you want to extract, + - `suffix` is another arbitrary string (e.g., a UUID). + + Example: + If the input `task_id` is `documentset_1_cbfdc96a-80ca-4312-a242-0bb68da3c1dc`, + this method will return the string `"1"`. + + Args: + task_id (str): The task ID string from which to extract the object ID. + + Returns: + str | None: The extracted object ID if the task ID is in the correct format, otherwise None. + """ + # example: task_id=documentset_1_cbfdc96a-80ca-4312-a242-0bb68da3c1dc + parts = task_id.split("_") + if len(parts) != 3: + return None + + object_id = parts[1] + return object_id diff --git a/backend/danswer/redis/redis_connector_credential_pair.py b/backend/danswer/redis/redis_connector_credential_pair.py new file mode 100644 index 00000000000..bbad3700111 --- /dev/null +++ b/backend/danswer/redis/redis_connector_credential_pair.py @@ -0,0 +1,97 @@ +import time +from uuid import uuid4 + +import redis +from celery import Celery +from redis import Redis +from sqlalchemy.orm import Session + +from danswer.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT +from danswer.configs.constants import DanswerCeleryPriority +from danswer.configs.constants import DanswerCeleryQueues +from danswer.db.connector_credential_pair import get_connector_credential_pair_from_id +from danswer.db.document import ( + construct_document_select_for_connector_credential_pair_by_needs_sync, +) +from danswer.redis.redis_object_helper import RedisObjectHelper + + +class RedisConnectorCredentialPair(RedisObjectHelper): + """This class is used to scan documents by cc_pair in the db and collect them into + a unified set for syncing. + + It differs from the other redis helpers in that the taskset used spans + all connectors and is not per connector.""" + + PREFIX = "connectorsync" + FENCE_PREFIX = PREFIX + "_fence" + TASKSET_PREFIX = PREFIX + "_taskset" + + def __init__(self, tenant_id: str | None, id: int) -> None: + super().__init__(tenant_id, str(id)) + + @classmethod + def get_fence_key(cls) -> str: + return RedisConnectorCredentialPair.FENCE_PREFIX + + @classmethod + def get_taskset_key(cls) -> str: + return RedisConnectorCredentialPair.TASKSET_PREFIX + + @property + def taskset_key(self) -> str: + """Notice that this is intentionally reusing the same taskset for all + connector syncs""" + # example: connector_taskset + return f"{self.TASKSET_PREFIX}" + + def generate_tasks( + self, + celery_app: Celery, + db_session: Session, + redis_client: Redis, + lock: redis.lock.Lock, + tenant_id: str | None, + ) -> int | None: + last_lock_time = time.monotonic() + + async_results = [] + cc_pair = get_connector_credential_pair_from_id(int(self._id), db_session) + if not cc_pair: + return None + + stmt = construct_document_select_for_connector_credential_pair_by_needs_sync( + cc_pair.connector_id, cc_pair.credential_id + ) + for doc in db_session.scalars(stmt).yield_per(1): + current_time = time.monotonic() + if current_time - last_lock_time >= ( + CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4 + ): + lock.reacquire() + last_lock_time = current_time + + # celery's default task id format is "dd32ded3-00aa-4884-8b21-42f8332e7fac" + # the key for the result is "celery-task-meta-dd32ded3-00aa-4884-8b21-42f8332e7fac" + # we prefix the task id so it's easier to keep track of who created the task + # aka "documentset_1_6dd32ded3-00aa-4884-8b21-42f8332e7fac" + custom_task_id = f"{self.task_id_prefix}_{uuid4()}" + + # add to the tracking taskset in redis BEFORE creating the celery task. + # note that for the moment we are using a single taskset key, not differentiated by cc_pair id + redis_client.sadd( + RedisConnectorCredentialPair.get_taskset_key(), custom_task_id + ) + + # Priority on sync's triggered by new indexing should be medium + result = celery_app.send_task( + "vespa_metadata_sync_task", + kwargs=dict(document_id=doc.id, tenant_id=tenant_id), + queue=DanswerCeleryQueues.VESPA_METADATA_SYNC, + task_id=custom_task_id, + priority=DanswerCeleryPriority.MEDIUM, + ) + + async_results.append(result) + + return len(async_results) diff --git a/backend/danswer/redis/redis_connector_delete.py b/backend/danswer/redis/redis_connector_delete.py new file mode 100644 index 00000000000..ba250c5b0f7 --- /dev/null +++ b/backend/danswer/redis/redis_connector_delete.py @@ -0,0 +1,145 @@ +import time +from datetime import datetime +from typing import cast +from uuid import uuid4 + +import redis +from celery import Celery +from pydantic import BaseModel +from sqlalchemy.orm import Session + +from danswer.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT +from danswer.configs.constants import DanswerCeleryPriority +from danswer.configs.constants import DanswerCeleryQueues +from danswer.db.connector_credential_pair import get_connector_credential_pair_from_id +from danswer.db.document import construct_document_select_for_connector_credential_pair + + +class RedisConnectorDeletionFenceData(BaseModel): + num_tasks: int | None + submitted: datetime + + +class RedisConnectorDelete: + """Manages interactions with redis for deletion tasks. Should only be accessed + through RedisConnector.""" + + PREFIX = "connectordeletion" + FENCE_PREFIX = f"{PREFIX}_fence" # "connectordeletion_fence" + TASKSET_PREFIX = f"{PREFIX}_taskset" # "connectordeletion_taskset" + + def __init__(self, tenant_id: str | None, id: int, redis: redis.Redis) -> None: + self.tenant_id: str | None = tenant_id + self.id = id + self.redis = redis + + self.fence_key: str = f"{self.FENCE_PREFIX}_{id}" + self.taskset_key = f"{self.TASKSET_PREFIX}_{id}" + + def taskset_clear(self) -> None: + self.redis.delete(self.taskset_key) + + def get_remaining(self) -> int: + # todo: move into fence + remaining = cast(int, self.redis.scard(self.taskset_key)) + return remaining + + @property + def fenced(self) -> bool: + if self.redis.exists(self.fence_key): + return True + + return False + + @property + def payload(self) -> RedisConnectorDeletionFenceData | None: + # read related data and evaluate/print task progress + fence_bytes = cast(bytes, self.redis.get(self.fence_key)) + if fence_bytes is None: + return None + + fence_str = fence_bytes.decode("utf-8") + payload = RedisConnectorDeletionFenceData.model_validate_json( + cast(str, fence_str) + ) + + return payload + + def set_fence(self, payload: RedisConnectorDeletionFenceData | None) -> None: + if not payload: + self.redis.delete(self.fence_key) + return + + self.redis.set(self.fence_key, payload.model_dump_json()) + + def _generate_task_id(self) -> str: + # celery's default task id format is "dd32ded3-00aa-4884-8b21-42f8332e7fac" + # we prefix the task id so it's easier to keep track of who created the task + # aka "connectordeletion_1_6dd32ded3-00aa-4884-8b21-42f8332e7fac" + + return f"{self.PREFIX}_{self.id}_{uuid4()}" + + def generate_tasks( + self, + celery_app: Celery, + db_session: Session, + lock: redis.lock.Lock, + ) -> int | None: + """Returns None if the cc_pair doesn't exist. + Otherwise, returns an int with the number of generated tasks.""" + last_lock_time = time.monotonic() + + async_results = [] + cc_pair = get_connector_credential_pair_from_id(int(self.id), db_session) + if not cc_pair: + return None + + stmt = construct_document_select_for_connector_credential_pair( + cc_pair.connector_id, cc_pair.credential_id + ) + for doc in db_session.scalars(stmt).yield_per(1): + current_time = time.monotonic() + if current_time - last_lock_time >= ( + CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4 + ): + lock.reacquire() + last_lock_time = current_time + + custom_task_id = self._generate_task_id() + + # add to the tracking taskset in redis BEFORE creating the celery task. + # note that for the moment we are using a single taskset key, not differentiated by cc_pair id + self.redis.sadd(self.taskset_key, custom_task_id) + + # Priority on sync's triggered by new indexing should be medium + result = celery_app.send_task( + "document_by_cc_pair_cleanup_task", + kwargs=dict( + document_id=doc.id, + connector_id=cc_pair.connector_id, + credential_id=cc_pair.credential_id, + tenant_id=self.tenant_id, + ), + queue=DanswerCeleryQueues.CONNECTOR_DELETION, + task_id=custom_task_id, + priority=DanswerCeleryPriority.MEDIUM, + ) + + async_results.append(result) + + return len(async_results) + + @staticmethod + def remove_from_taskset(id: int, task_id: str, r: redis.Redis) -> None: + taskset_key = f"{RedisConnectorDelete.TASKSET_PREFIX}_{id}" + r.srem(taskset_key, task_id) + return + + @staticmethod + def reset_all(r: redis.Redis) -> None: + """Deletes all redis values for all connectors""" + for key in r.scan_iter(RedisConnectorDelete.TASKSET_PREFIX + "*"): + r.delete(key) + + for key in r.scan_iter(RedisConnectorDelete.FENCE_PREFIX + "*"): + r.delete(key) diff --git a/backend/danswer/redis/redis_connector_index.py b/backend/danswer/redis/redis_connector_index.py new file mode 100644 index 00000000000..3883ddceaa3 --- /dev/null +++ b/backend/danswer/redis/redis_connector_index.py @@ -0,0 +1,146 @@ +from datetime import datetime +from typing import cast +from uuid import uuid4 + +import redis +from pydantic import BaseModel + + +class RedisConnectorIndexingFenceData(BaseModel): + index_attempt_id: int | None + started: datetime | None + submitted: datetime + celery_task_id: str | None + + +class RedisConnectorIndex: + """Manages interactions with redis for indexing tasks. Should only be accessed + through RedisConnector.""" + + PREFIX = "connectorindexing" + FENCE_PREFIX = f"{PREFIX}_fence" # "connectorindexing_fence" + GENERATOR_TASK_PREFIX = PREFIX + "+generator" # "connectorindexing+generator_fence" + GENERATOR_PROGRESS_PREFIX = ( + PREFIX + "_generator_progress" + ) # connectorindexing_generator_progress + GENERATOR_COMPLETE_PREFIX = ( + PREFIX + "_generator_complete" + ) # connectorindexing_generator_complete + + GENERATOR_LOCK_PREFIX = "da_lock:indexing" + + def __init__( + self, + tenant_id: str | None, + id: int, + search_settings_id: int, + redis: redis.Redis, + ) -> None: + self.tenant_id: str | None = tenant_id + self.id = id + self.search_settings_id = search_settings_id + self.redis = redis + + self.fence_key: str = f"{self.FENCE_PREFIX}_{id}/{search_settings_id}" + self.generator_progress_key = ( + f"{self.GENERATOR_PROGRESS_PREFIX}_{id}/{search_settings_id}" + ) + self.generator_complete_key = ( + f"{self.GENERATOR_COMPLETE_PREFIX}_{id}/{search_settings_id}" + ) + self.generator_lock_key = ( + f"{self.GENERATOR_LOCK_PREFIX}_{id}/{search_settings_id}" + ) + + @classmethod + def fence_key_with_ids(cls, cc_pair_id: int, search_settings_id: int) -> str: + return f"{cls.FENCE_PREFIX}_{cc_pair_id}/{search_settings_id}" + + def generate_generator_task_id(self) -> str: + # celery's default task id format is "dd32ded3-00aa-4884-8b21-42f8332e7fac" + # we prefix the task id so it's easier to keep track of who created the task + # aka "connectorindexing+generator_1_6dd32ded3-00aa-4884-8b21-42f8332e7fac" + + return f"{self.GENERATOR_TASK_PREFIX}_{self.id}/{self.search_settings_id}_{uuid4()}" + + @property + def fenced(self) -> bool: + if self.redis.exists(self.fence_key): + return True + + return False + + @property + def payload(self) -> RedisConnectorIndexingFenceData | None: + # read related data and evaluate/print task progress + fence_bytes = cast(bytes, self.redis.get(self.fence_key)) + if fence_bytes is None: + return None + + fence_str = fence_bytes.decode("utf-8") + payload = RedisConnectorIndexingFenceData.model_validate_json( + cast(str, fence_str) + ) + + return payload + + def set_fence( + self, + payload: RedisConnectorIndexingFenceData | None, + ) -> None: + if not payload: + self.redis.delete(self.fence_key) + return + + self.redis.set(self.fence_key, payload.model_dump_json()) + + def set_generator_complete(self, payload: int | None) -> None: + if not payload: + self.redis.delete(self.generator_complete_key) + return + + self.redis.set(self.generator_complete_key, payload) + + def generator_clear(self) -> None: + self.redis.delete(self.generator_progress_key) + self.redis.delete(self.generator_complete_key) + + def get_progress(self) -> int | None: + """Returns None if the key doesn't exist. The""" + # TODO: move into fence? + bytes = self.redis.get(self.generator_progress_key) + if bytes is None: + return None + + progress = int(cast(int, bytes)) + return progress + + def get_completion(self) -> int | None: + # TODO: move into fence? + bytes = self.redis.get(self.generator_complete_key) + if bytes is None: + return None + + status = int(cast(int, bytes)) + return status + + def reset(self) -> None: + self.redis.delete(self.generator_lock_key) + self.redis.delete(self.generator_progress_key) + self.redis.delete(self.generator_complete_key) + self.redis.delete(self.fence_key) + + @staticmethod + def reset_all(r: redis.Redis) -> None: + """Deletes all redis values for all connectors""" + for key in r.scan_iter(RedisConnectorIndex.GENERATOR_LOCK_PREFIX + "*"): + r.delete(key) + + for key in r.scan_iter(RedisConnectorIndex.GENERATOR_COMPLETE_PREFIX + "*"): + r.delete(key) + + for key in r.scan_iter(RedisConnectorIndex.GENERATOR_PROGRESS_PREFIX + "*"): + r.delete(key) + + for key in r.scan_iter(RedisConnectorIndex.FENCE_PREFIX + "*"): + r.delete(key) diff --git a/backend/danswer/redis/redis_connector_prune.py b/backend/danswer/redis/redis_connector_prune.py new file mode 100644 index 00000000000..8892c12b647 --- /dev/null +++ b/backend/danswer/redis/redis_connector_prune.py @@ -0,0 +1,171 @@ +import time +from typing import cast +from uuid import uuid4 + +import redis +from celery import Celery +from sqlalchemy.orm import Session + +from danswer.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT +from danswer.configs.constants import DanswerCeleryPriority +from danswer.configs.constants import DanswerCeleryQueues +from danswer.db.connector_credential_pair import get_connector_credential_pair_from_id + + +class RedisConnectorPrune: + """Manages interactions with redis for pruning tasks. Should only be accessed + through RedisConnector.""" + + PREFIX = "connectorpruning" + + FENCE_PREFIX = f"{PREFIX}_fence" + + # phase 1 - geneartor task and progress signals + GENERATORTASK_PREFIX = f"{PREFIX}+generator" # connectorpruning+generator + GENERATOR_PROGRESS_PREFIX = ( + PREFIX + "_generator_progress" + ) # connectorpruning_generator_progress + GENERATOR_COMPLETE_PREFIX = ( + PREFIX + "_generator_complete" + ) # connectorpruning_generator_complete + + TASKSET_PREFIX = f"{PREFIX}_taskset" # connectorpruning_taskset + SUBTASK_PREFIX = f"{PREFIX}+sub" # connectorpruning+sub + + def __init__(self, tenant_id: str | None, id: int, redis: redis.Redis) -> None: + self.tenant_id: str | None = tenant_id + self.id = id + self.redis = redis + + self.fence_key: str = f"{self.FENCE_PREFIX}_{id}" + self.generator_task_key = f"{self.GENERATORTASK_PREFIX}_{id}" + self.generator_progress_key = f"{self.GENERATOR_PROGRESS_PREFIX}_{id}" + self.generator_complete_key = f"{self.GENERATOR_COMPLETE_PREFIX}_{id}" + + self.taskset_key = f"{self.TASKSET_PREFIX}_{id}" + + self.subtask_prefix: str = f"{self.SUBTASK_PREFIX}_{id}" + + def taskset_clear(self) -> None: + self.redis.delete(self.taskset_key) + + def generator_clear(self) -> None: + self.redis.delete(self.generator_progress_key) + self.redis.delete(self.generator_complete_key) + + def get_remaining(self) -> int: + # todo: move into fence + remaining = cast(int, self.redis.scard(self.taskset_key)) + return remaining + + def get_active_task_count(self) -> int: + """Count of active pruning tasks""" + count = 0 + for key in self.redis.scan_iter(RedisConnectorPrune.FENCE_PREFIX + "*"): + count += 1 + return count + + @property + def fenced(self) -> bool: + if self.redis.exists(self.fence_key): + return True + + return False + + def set_fence(self, value: bool) -> None: + if not value: + self.redis.delete(self.fence_key) + return + + self.redis.set(self.fence_key, 0) + + @property + def generator_complete(self) -> int | None: + """the fence payload is an int representing the starting number of + pruning tasks to be processed ... just after the generator completes.""" + fence_bytes = self.redis.get(self.generator_complete_key) + if fence_bytes is None: + return None + + fence_int = cast(int, fence_bytes) + return fence_int + + @generator_complete.setter + def generator_complete(self, payload: int | None) -> None: + """Set the payload to an int to set the fence, otherwise if None it will + be deleted""" + if payload is None: + self.redis.delete(self.generator_complete_key) + return + + self.redis.set(self.generator_complete_key, payload) + + def generate_tasks( + self, + documents_to_prune: set[str], + celery_app: Celery, + db_session: Session, + lock: redis.lock.Lock | None, + ) -> int | None: + last_lock_time = time.monotonic() + + async_results = [] + cc_pair = get_connector_credential_pair_from_id(int(self.id), db_session) + if not cc_pair: + return None + + for doc_id in documents_to_prune: + current_time = time.monotonic() + if lock and current_time - last_lock_time >= ( + CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4 + ): + lock.reacquire() + last_lock_time = current_time + + # celery's default task id format is "dd32ded3-00aa-4884-8b21-42f8332e7fac" + # the actual redis key is "celery-task-meta-dd32ded3-00aa-4884-8b21-42f8332e7fac" + # we prefix the task id so it's easier to keep track of who created the task + # aka "documentset_1_6dd32ded3-00aa-4884-8b21-42f8332e7fac" + custom_task_id = f"{self.subtask_prefix}_{uuid4()}" + + # add to the tracking taskset in redis BEFORE creating the celery task. + self.redis.sadd(self.taskset_key, custom_task_id) + + # Priority on sync's triggered by new indexing should be medium + result = celery_app.send_task( + "document_by_cc_pair_cleanup_task", + kwargs=dict( + document_id=doc_id, + connector_id=cc_pair.connector_id, + credential_id=cc_pair.credential_id, + tenant_id=self.tenant_id, + ), + queue=DanswerCeleryQueues.CONNECTOR_DELETION, + task_id=custom_task_id, + priority=DanswerCeleryPriority.MEDIUM, + ) + + async_results.append(result) + + return len(async_results) + + @staticmethod + def remove_from_taskset(id: int, task_id: str, r: redis.Redis) -> None: + taskset_key = f"{RedisConnectorPrune.TASKSET_PREFIX}_{id}" + r.srem(taskset_key, task_id) + return + + @staticmethod + def reset_all(r: redis.Redis) -> None: + """Deletes all redis values for all connectors""" + for key in r.scan_iter(RedisConnectorPrune.TASKSET_PREFIX + "*"): + r.delete(key) + + for key in r.scan_iter(RedisConnectorPrune.GENERATOR_COMPLETE_PREFIX + "*"): + r.delete(key) + + for key in r.scan_iter(RedisConnectorPrune.GENERATOR_PROGRESS_PREFIX + "*"): + r.delete(key) + + for key in r.scan_iter(RedisConnectorPrune.FENCE_PREFIX + "*"): + r.delete(key) diff --git a/backend/danswer/redis/redis_connector_stop.py b/backend/danswer/redis/redis_connector_stop.py new file mode 100644 index 00000000000..c65c57ff7f4 --- /dev/null +++ b/backend/danswer/redis/redis_connector_stop.py @@ -0,0 +1,34 @@ +import redis + + +class RedisConnectorStop: + """Manages interactions with redis for stop signaling. Should only be accessed + through RedisConnector.""" + + FENCE_PREFIX = "connectorstop_fence" + + def __init__(self, tenant_id: str | None, id: int, redis: redis.Redis) -> None: + self.tenant_id: str | None = tenant_id + self.id: int = id + self.redis = redis + + self.fence_key: str = f"{self.FENCE_PREFIX}_{id}" + + @property + def fenced(self) -> bool: + if self.redis.exists(self.fence_key): + return True + + return False + + def set_fence(self, value: bool) -> None: + if not value: + self.redis.delete(self.fence_key) + return + + self.redis.set(self.fence_key, 0) + + @staticmethod + def reset_all(r: redis.Redis) -> None: + for key in r.scan_iter(RedisConnectorStop.FENCE_PREFIX + "*"): + r.delete(key) diff --git a/backend/danswer/redis/redis_document_set.py b/backend/danswer/redis/redis_document_set.py new file mode 100644 index 00000000000..102e910feec --- /dev/null +++ b/backend/danswer/redis/redis_document_set.py @@ -0,0 +1,99 @@ +import time +from typing import cast +from uuid import uuid4 + +import redis +from celery import Celery +from redis import Redis +from sqlalchemy.orm import Session + +from danswer.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT +from danswer.configs.constants import DanswerCeleryPriority +from danswer.configs.constants import DanswerCeleryQueues +from danswer.db.document_set import construct_document_select_by_docset +from danswer.redis.redis_object_helper import RedisObjectHelper + + +class RedisDocumentSet(RedisObjectHelper): + PREFIX = "documentset" + FENCE_PREFIX = PREFIX + "_fence" + TASKSET_PREFIX = PREFIX + "_taskset" + + def __init__(self, tenant_id: str | None, id: int) -> None: + super().__init__(tenant_id, str(id)) + + @property + def fenced(self) -> bool: + if self.redis.exists(self.fence_key): + return True + + return False + + def set_fence(self, payload: int | None) -> None: + if payload is None: + self.redis.delete(self.fence_key) + return + + self.redis.set(self.fence_key, payload) + + @property + def payload(self) -> int | None: + bytes = self.redis.get(self.fence_key) + if bytes is None: + return None + + progress = int(cast(int, bytes)) + return progress + + def generate_tasks( + self, + celery_app: Celery, + db_session: Session, + redis_client: Redis, + lock: redis.lock.Lock, + tenant_id: str | None, + ) -> int | None: + last_lock_time = time.monotonic() + + async_results = [] + stmt = construct_document_select_by_docset(int(self._id), current_only=False) + for doc in db_session.scalars(stmt).yield_per(1): + current_time = time.monotonic() + if current_time - last_lock_time >= ( + CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4 + ): + lock.reacquire() + last_lock_time = current_time + + # celery's default task id format is "dd32ded3-00aa-4884-8b21-42f8332e7fac" + # the key for the result is "celery-task-meta-dd32ded3-00aa-4884-8b21-42f8332e7fac" + # we prefix the task id so it's easier to keep track of who created the task + # aka "documentset_1_6dd32ded3-00aa-4884-8b21-42f8332e7fac" + custom_task_id = f"{self.task_id_prefix}_{uuid4()}" + + # add to the set BEFORE creating the task. + redis_client.sadd(self.taskset_key, custom_task_id) + + result = celery_app.send_task( + "vespa_metadata_sync_task", + kwargs=dict(document_id=doc.id, tenant_id=tenant_id), + queue=DanswerCeleryQueues.VESPA_METADATA_SYNC, + task_id=custom_task_id, + priority=DanswerCeleryPriority.LOW, + ) + + async_results.append(result) + + return len(async_results) + + def reset(self) -> None: + self.redis.delete(self.taskset_key) + self.redis.delete(self.fence_key) + + @staticmethod + def reset_all(r: redis.Redis) -> None: + for key in r.scan_iter(RedisDocumentSet.TASKSET_PREFIX + "*"): + r.delete(key) + + for key in r.scan_iter(RedisDocumentSet.FENCE_PREFIX + "*"): + r.delete(key) diff --git a/backend/danswer/redis/redis_object_helper.py b/backend/danswer/redis/redis_object_helper.py new file mode 100644 index 00000000000..629f15e6058 --- /dev/null +++ b/backend/danswer/redis/redis_object_helper.py @@ -0,0 +1,91 @@ +from abc import ABC +from abc import abstractmethod + +import redis +from celery import Celery +from redis import Redis +from sqlalchemy.orm import Session + +from danswer.redis.redis_pool import get_redis_client + + +class RedisObjectHelper(ABC): + PREFIX = "base" + FENCE_PREFIX = PREFIX + "_fence" + TASKSET_PREFIX = PREFIX + "_taskset" + + def __init__(self, tenant_id: str | None, id: str): + self._tenant_id: str | None = tenant_id + self._id: str = id + self.redis = get_redis_client(tenant_id=tenant_id) + + @property + def task_id_prefix(self) -> str: + return f"{self.PREFIX}_{self._id}" + + @property + def fence_key(self) -> str: + # example: documentset_fence_1 + return f"{self.FENCE_PREFIX}_{self._id}" + + @property + def taskset_key(self) -> str: + # example: documentset_taskset_1 + return f"{self.TASKSET_PREFIX}_{self._id}" + + @staticmethod + def get_id_from_fence_key(key: str) -> str | None: + """ + Extracts the object ID from a fence key in the format `PREFIX_fence_X`. + + Args: + key (str): The fence key string. + + Returns: + Optional[int]: The extracted ID if the key is in the correct format, otherwise None. + """ + parts = key.split("_") + if len(parts) != 3: + return None + + object_id = parts[2] + return object_id + + @staticmethod + def get_id_from_task_id(task_id: str) -> str | None: + """ + Extracts the object ID from a task ID string. + + This method assumes the task ID is formatted as `prefix_objectid_suffix`, where: + - `prefix` is an arbitrary string (e.g., the name of the task or entity), + - `objectid` is the ID you want to extract, + - `suffix` is another arbitrary string (e.g., a UUID). + + Example: + If the input `task_id` is `documentset_1_cbfdc96a-80ca-4312-a242-0bb68da3c1dc`, + this method will return the string `"1"`. + + Args: + task_id (str): The task ID string from which to extract the object ID. + + Returns: + str | None: The extracted object ID if the task ID is in the correct format, otherwise None. + """ + # example: task_id=documentset_1_cbfdc96a-80ca-4312-a242-0bb68da3c1dc + parts = task_id.split("_") + if len(parts) != 3: + return None + + object_id = parts[1] + return object_id + + @abstractmethod + def generate_tasks( + self, + celery_app: Celery, + db_session: Session, + redis_client: Redis, + lock: redis.lock.Lock, + tenant_id: str | None, + ) -> int | None: + pass diff --git a/backend/danswer/redis/redis_pool.py b/backend/danswer/redis/redis_pool.py index 54cb8d918e4..0fe5c0ddd42 100644 --- a/backend/danswer/redis/redis_pool.py +++ b/backend/danswer/redis/redis_pool.py @@ -1,4 +1,7 @@ +import functools import threading +from collections.abc import Callable +from typing import Any from typing import Optional import redis @@ -14,6 +17,101 @@ from danswer.configs.app_configs import REDIS_SSL_CA_CERTS from danswer.configs.app_configs import REDIS_SSL_CERT_REQS from danswer.configs.constants import REDIS_SOCKET_KEEPALIVE_OPTIONS +from danswer.utils.logger import setup_logger + +logger = setup_logger() + + +class TenantRedis(redis.Redis): + def __init__(self, tenant_id: str, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) + self.tenant_id: str = tenant_id + + def _prefixed(self, key: str | bytes | memoryview) -> str | bytes | memoryview: + prefix: str = f"{self.tenant_id}:" + if isinstance(key, str): + if key.startswith(prefix): + return key + else: + return prefix + key + elif isinstance(key, bytes): + prefix_bytes = prefix.encode() + if key.startswith(prefix_bytes): + return key + else: + return prefix_bytes + key + elif isinstance(key, memoryview): + key_bytes = key.tobytes() + prefix_bytes = prefix.encode() + if key_bytes.startswith(prefix_bytes): + return key + else: + return memoryview(prefix_bytes + key_bytes) + else: + raise TypeError(f"Unsupported key type: {type(key)}") + + def _prefix_method(self, method: Callable) -> Callable: + @functools.wraps(method) + def wrapper(*args: Any, **kwargs: Any) -> Any: + if "name" in kwargs: + kwargs["name"] = self._prefixed(kwargs["name"]) + elif len(args) > 0: + args = (self._prefixed(args[0]),) + args[1:] + return method(*args, **kwargs) + + return wrapper + + def _prefix_scan_iter(self, method: Callable) -> Callable: + @functools.wraps(method) + def wrapper(*args: Any, **kwargs: Any) -> Any: + # Prefix the match pattern if provided + if "match" in kwargs: + kwargs["match"] = self._prefixed(kwargs["match"]) + elif len(args) > 0: + args = (self._prefixed(args[0]),) + args[1:] + + # Get the iterator + iterator = method(*args, **kwargs) + + # Remove prefix from returned keys + prefix = f"{self.tenant_id}:".encode() + prefix_len = len(prefix) + + for key in iterator: + if isinstance(key, bytes) and key.startswith(prefix): + yield key[prefix_len:] + else: + yield key + + return wrapper + + def __getattribute__(self, item: str) -> Any: + original_attr = super().__getattribute__(item) + methods_to_wrap = [ + "lock", + "unlock", + "get", + "set", + "delete", + "exists", + "incrby", + "hset", + "hget", + "getset", + "owned", + "reacquire", + "create_lock", + "startswith", + "sadd", + "srem", + "scard", + ] # Regular methods that need simple prefixing + + if item == "scan_iter": + return self._prefix_scan_iter(original_attr) + elif item in methods_to_wrap and callable(original_attr): + return self._prefix_method(original_attr) + return original_attr class RedisPool: @@ -32,8 +130,10 @@ def __new__(cls) -> "RedisPool": def _init_pool(self) -> None: self._pool = RedisPool.create_pool(ssl=REDIS_SSL) - def get_client(self) -> Redis: - return redis.Redis(connection_pool=self._pool) + def get_client(self, tenant_id: str | None) -> Redis: + if tenant_id is None: + tenant_id = "public" + return TenantRedis(tenant_id, connection_pool=self._pool) @staticmethod def create_pool( @@ -81,6 +181,13 @@ def create_pool( ) +redis_pool = RedisPool() + + +def get_redis_client(*, tenant_id: str | None) -> Redis: + return redis_pool.get_client(tenant_id) + + # # Usage example # redis_pool = RedisPool() # redis_client = redis_pool.get_client() diff --git a/backend/danswer/redis/redis_usergroup.py b/backend/danswer/redis/redis_usergroup.py new file mode 100644 index 00000000000..53d2d4fc0a9 --- /dev/null +++ b/backend/danswer/redis/redis_usergroup.py @@ -0,0 +1,112 @@ +import time +from typing import cast +from uuid import uuid4 + +import redis +from celery import Celery +from redis import Redis +from sqlalchemy.orm import Session + +from danswer.configs.constants import CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT +from danswer.configs.constants import DanswerCeleryPriority +from danswer.configs.constants import DanswerCeleryQueues +from danswer.redis.redis_object_helper import RedisObjectHelper +from danswer.utils.variable_functionality import fetch_versioned_implementation +from danswer.utils.variable_functionality import global_version + + +class RedisUserGroup(RedisObjectHelper): + PREFIX = "usergroup" + FENCE_PREFIX = PREFIX + "_fence" + TASKSET_PREFIX = PREFIX + "_taskset" + + def __init__(self, tenant_id: str | None, id: int) -> None: + super().__init__(tenant_id, str(id)) + + @property + def fenced(self) -> bool: + if self.redis.exists(self.fence_key): + return True + + return False + + def set_fence(self, payload: int | None) -> None: + if payload is None: + self.redis.delete(self.fence_key) + return + + self.redis.set(self.fence_key, payload) + + @property + def payload(self) -> int | None: + bytes = self.redis.get(self.fence_key) + if bytes is None: + return None + + progress = int(cast(int, bytes)) + return progress + + def generate_tasks( + self, + celery_app: Celery, + db_session: Session, + redis_client: Redis, + lock: redis.lock.Lock, + tenant_id: str | None, + ) -> int | None: + last_lock_time = time.monotonic() + + async_results = [] + + if not global_version.is_ee_version(): + return 0 + + try: + construct_document_select_by_usergroup = fetch_versioned_implementation( + "danswer.db.user_group", + "construct_document_select_by_usergroup", + ) + except ModuleNotFoundError: + return 0 + + stmt = construct_document_select_by_usergroup(int(self._id)) + for doc in db_session.scalars(stmt).yield_per(1): + current_time = time.monotonic() + if current_time - last_lock_time >= ( + CELERY_VESPA_SYNC_BEAT_LOCK_TIMEOUT / 4 + ): + lock.reacquire() + last_lock_time = current_time + + # celery's default task id format is "dd32ded3-00aa-4884-8b21-42f8332e7fac" + # the key for the result is "celery-task-meta-dd32ded3-00aa-4884-8b21-42f8332e7fac" + # we prefix the task id so it's easier to keep track of who created the task + # aka "documentset_1_6dd32ded3-00aa-4884-8b21-42f8332e7fac" + custom_task_id = f"{self.task_id_prefix}_{uuid4()}" + + # add to the set BEFORE creating the task. + redis_client.sadd(self.taskset_key, custom_task_id) + + result = celery_app.send_task( + "vespa_metadata_sync_task", + kwargs=dict(document_id=doc.id, tenant_id=tenant_id), + queue=DanswerCeleryQueues.VESPA_METADATA_SYNC, + task_id=custom_task_id, + priority=DanswerCeleryPriority.LOW, + ) + + async_results.append(result) + + return len(async_results) + + def reset(self) -> None: + self.redis.delete(self.taskset_key) + self.redis.delete(self.fence_key) + + @staticmethod + def reset_all(r: redis.Redis) -> None: + for key in r.scan_iter(RedisUserGroup.TASKSET_PREFIX + "*"): + r.delete(key) + + for key in r.scan_iter(RedisUserGroup.FENCE_PREFIX + "*"): + r.delete(key) diff --git a/backend/danswer/search/models.py b/backend/danswer/search/models.py index 503b07653ef..815fa9d885f 100644 --- a/backend/danswer/search/models.py +++ b/backend/danswer/search/models.py @@ -102,6 +102,7 @@ class BaseFilters(BaseModel): class IndexFilters(BaseFilters): access_control_list: list[str] | None + tenant_id: str | None = None class ChunkMetric(BaseModel): diff --git a/backend/danswer/search/preprocessing/preprocessing.py b/backend/danswer/search/preprocessing/preprocessing.py index 37fb254884a..77098dac053 100644 --- a/backend/danswer/search/preprocessing/preprocessing.py +++ b/backend/danswer/search/preprocessing/preprocessing.py @@ -9,6 +9,7 @@ from danswer.configs.chat_configs import HYBRID_ALPHA_KEYWORD from danswer.configs.chat_configs import NUM_POSTPROCESSED_RESULTS from danswer.configs.chat_configs import NUM_RETURNED_HITS +from danswer.db.engine import CURRENT_TENANT_ID_CONTEXTVAR from danswer.db.models import User from danswer.db.search_settings import get_current_search_settings from danswer.llm.interfaces import LLM @@ -29,6 +30,7 @@ from danswer.utils.threadpool_concurrency import FunctionCall from danswer.utils.threadpool_concurrency import run_functions_in_parallel from danswer.utils.timing import log_function_time +from shared_configs.configs import MULTI_TENANT logger = setup_logger() @@ -160,6 +162,7 @@ def retrieval_preprocessing( time_cutoff=time_filter or predicted_time_cutoff, tags=preset_filters.tags, # Tags are never auto-extracted access_control_list=user_acl_filters, + tenant_id=CURRENT_TENANT_ID_CONTEXTVAR.get() if MULTI_TENANT else None, ) llm_evaluation_type = LLMEvaluationType.BASIC diff --git a/backend/danswer/search/search_settings.py b/backend/danswer/search/search_settings.py index d502205dfe7..f5870de83f1 100644 --- a/backend/danswer/search/search_settings.py +++ b/backend/danswer/search/search_settings.py @@ -1,8 +1,8 @@ from typing import cast from danswer.configs.constants import KV_SEARCH_SETTINGS -from danswer.dynamic_configs.factory import get_dynamic_config_store -from danswer.dynamic_configs.interface import ConfigNotFoundError +from danswer.key_value_store.factory import get_kv_store +from danswer.key_value_store.interface import KvKeyNotFoundError from danswer.search.models import SavedSearchSettings from danswer.utils.logger import setup_logger @@ -17,10 +17,10 @@ def get_kv_search_settings() -> SavedSearchSettings | None: if the value is updated by another process/instance of the API server. If this reads from an in memory cache like reddis then it will be ok. Until then this has some performance implications (though minor) """ - kv_store = get_dynamic_config_store() + kv_store = get_kv_store() try: return SavedSearchSettings(**cast(dict, kv_store.load(KV_SEARCH_SETTINGS))) - except ConfigNotFoundError: + except KvKeyNotFoundError: return None except Exception as e: logger.error(f"Error loading search settings: {e}") diff --git a/backend/danswer/seeding/__init__.py b/backend/danswer/seeding/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/backend/danswer/seeding/initial_docs.json b/backend/danswer/seeding/initial_docs.json new file mode 100644 index 00000000000..cded36721b3 --- /dev/null +++ b/backend/danswer/seeding/initial_docs.json @@ -0,0 +1,10824 @@ +[ + { + "url": "https://docs.danswer.dev/more/use_cases/overview", + "title": "Use Cases Overview", + "content": "How to leverage Danswer in your organization\n\nDanswer Overview\nDanswer is the AI Assistant connected to your organization's docs, apps, and people. Danswer makes Generative AI more versatile for work by enabling new types of questions like \"What is the most common feature request we've heard from customers this month\". Whereas other AI systems have no context of your team and are generally unhelpful with work related questions, Danswer makes it possible to ask these questions in natural language and get back answers in seconds.\n\nDanswer can connect to +30 different tools and the use cases are not limited to the ones in the following pages. The highlighted use cases are for inspiration and come from feedback gathered from our users and customers.\n\n\nCommon Getting Started Questions:\n\nWhy are these docs connected in my Danswer deployment?\nAnswer: This is just an example of how connectors work in Danswer. You can connect up your own team's knowledge and you will be able to ask questions unique to your organization. Danswer will keep all of the knowledge up to date and in sync with your connected applications.\n\nIs my data being sent anywhere when I connect it up to Danswer?\nAnswer: No! Danswer is built with data security as our highest priority. We open sourced it so our users can know exactly what is going on with their data. By default all of the document processing happens within Danswer. The only time it is sent outward is for the GenAI call to generate answers.\n\nWhere is the feature for auto sync-ing document level access permissions from all connected sources?\nAnswer: This falls under the Enterprise Edition set of Danswer features built on top of the MIT/community edition. If you are on Danswer Cloud, you have access to them by default. If you're running it yourself, reach out to the Danswer team to receive access.", + "title_embedding": [ + 0.013585364446043968, + 0.06531507521867752, + -0.0001728831703076139, + -0.003940831869840622, + 0.044078364968299866, + -0.006206007208675146, + -0.008377128280699253, + -0.0193742997944355, + -0.018904175609350204, + 0.00868070125579834, + -0.005770757794380188, + 0.018564216792583466, + 0.030414527282118797, + 0.0327068567276001, + -0.0336286760866642, + -0.0517829954624176, + 0.0029869426507502794, + -3.836356700048782e-05, + -0.006240167189389467, + 0.011168955825269222, + -0.04732134938240051, + -0.05257624015212059, + 0.017937077209353447, + -0.029843101277947426, + 0.03417196497321129, + 0.008637758903205395, + -0.016427641734480858, + 0.017053034156560898, + -0.02532368339598179, + -0.016002299264073372, + 0.04696495085954666, + 0.03518024459481239, + 0.02884317748248577, + -0.06098122522234917, + -0.024405447766184807, + -0.07693222165107727, + 0.026796545833349228, + -0.01345108263194561, + 0.030546706169843674, + 0.00011662459291983396, + 0.0362892709672451, + 0.020864704623818398, + -0.0030571012757718563, + -0.014566082507371902, + 0.056138113141059875, + -0.01727251335978508, + 0.05477291718125343, + 0.019774138927459717, + 0.01646329089999199, + -0.020768629387021065, + -0.025477997958660126, + 0.010012250393629074, + 0.0037975965533405542, + -0.076152004301548, + 0.032632406800985336, + -0.00799622479826212, + 0.029365766793489456, + 0.02749769017100334, + 0.030807621777057648, + -0.031911835074424744, + 0.029850834980607033, + 0.05788583680987358, + -0.07022606581449509, + 0.057270754128694534, + -0.012120618484914303, + -0.0351836234331131, + 0.002640453167259693, + 0.01869964227080345, + 0.010610891506075859, + -0.06439802795648575, + 0.06629050523042679, + 0.00746738538146019, + 0.01690092496573925, + -0.025001635774970055, + -0.04047262296080589, + -0.04058482125401497, + 0.01863245666027069, + -0.021404679864645004, + -0.006766777019947767, + 0.05071299150586128, + 0.02962121181190014, + -0.06122489646077156, + 0.019276190549135208, + -0.03599821403622627, + 0.07253828644752502, + -0.001938252942636609, + -0.00785142183303833, + -0.015793368220329285, + -0.06096868962049484, + -0.022668933495879173, + 0.014094856567680836, + 0.03107546642422676, + 0.030937792733311653, + 0.04295564815402031, + -0.06191089749336243, + 0.010305442847311497, + 0.006790813058614731, + -0.05027518793940544, + 0.026334501802921295, + -0.03490821272134781, + -0.03132909536361694, + 0.00332127814181149, + 0.03326006978750229, + 0.05009974539279938, + 0.05102463811635971, + 0.080863356590271, + 0.008220085874199867, + 0.015490916557610035, + -0.029478086158633232, + 0.004051747731864452, + -0.05233829841017723, + 0.032338161021471024, + 0.016430383548140526, + 0.033909399062395096, + -0.0069341897033154964, + -0.01092524453997612, + 0.08201614767313004, + -0.061916135251522064, + -0.0202189888805151, + 0.06966497749090195, + 0.01732165738940239, + 0.020277557894587517, + 0.005021766293793917, + 0.03159264847636223, + 0.027503052726387978, + -0.03912171721458435, + -0.03356969729065895, + 0.018767613917589188, + 0.02705945260822773, + -0.06412986665964127, + 0.01829575188457966, + 0.030256258323788643, + 0.0074773263186216354, + -0.059738945215940475, + 0.042067185044288635, + 0.05707620456814766, + -0.02195868454873562, + -0.018797017633914948, + 0.07043126970529556, + -0.007300470490008593, + 0.04988619685173035, + -0.01761087030172348, + 0.024358391761779785, + 0.00520830973982811, + 0.007853846065700054, + -0.040487151592969894, + 0.013271456584334373, + 0.01356235146522522, + -0.07054886221885681, + 0.046908896416425705, + 0.0032781255431473255, + 0.022826792672276497, + 0.016794828698039055, + -0.0015572791453450918, + -0.03220144659280777, + -0.05249498412013054, + -0.023642878979444504, + -0.0013240000698715448, + 0.020749850198626518, + -0.032788924872875214, + 0.01523237768560648, + 0.03563430905342102, + -0.011741658672690392, + 0.08171892166137695, + -0.04966546595096588, + -0.02209051325917244, + 0.005958004854619503, + 0.02892436273396015, + 0.03561494126915932, + 0.02638504095375538, + 0.041660238057374954, + -0.05757247656583786, + 0.027456382289528847, + -0.011119958013296127, + 0.05332427844405174, + 0.03419065102934837, + 0.09803897142410278, + -0.0104225380346179, + 0.06645305454730988, + 0.02061033807694912, + -0.0188175980001688, + -0.03409148380160332, + -0.010915222577750683, + 0.016926083713769913, + -0.01010509766638279, + -0.031197063624858856, + 0.064297154545784, + -0.047280170023441315, + -0.022006161510944366, + 0.04089798405766487, + 0.0013760487781837583, + 0.0012917317217215896, + -0.010142299346625805, + -0.05629380792379379, + -0.058489665389060974, + -0.06434599310159683, + 0.04390622675418854, + 0.03466123342514038, + -0.002495409222319722, + -0.01867988333106041, + -0.012142776511609554, + 0.025940915569663048, + -0.05517507344484329, + 0.026919366791844368, + -0.05310383439064026, + 0.0020175466779619455, + 0.0407392643392086, + -0.0055900681763887405, + 0.028038354590535164, + 0.10158932954072952, + 0.056325044482946396, + 0.016724230721592903, + 0.005659179296344519, + 0.04764577001333237, + -0.03514963388442993, + 0.03311126306653023, + -0.05855907127261162, + -0.007677929475903511, + -0.0368916280567646, + 0.02390834502875805, + 0.021506410092115402, + -0.022855432704091072, + 0.02669590339064598, + 0.03190927952528, + 0.026299884542822838, + 0.04545223340392113, + -0.04817903786897659, + 0.08401483297348022, + -0.0017600113060325384, + -0.0026402128860354424, + -0.06519021838903427, + -0.08366627246141434, + 0.025473223999142647, + -0.03265143185853958, + -0.026078224182128906, + -0.014162144623696804, + -0.024846363812685013, + 0.042588867247104645, + -0.00620845053344965, + 0.0341552197933197, + -0.005032224114984274, + 0.039284951984882355, + 0.02678983099758625, + -0.02592509239912033, + -0.0334317646920681, + -0.017748532816767693, + -0.03175748512148857, + -0.03699929639697075, + -0.0009614137816242874, + 0.029971860349178314, + 0.03400350362062454, + 0.03034038282930851, + 0.005932188127189875, + 0.05225752666592598, + -0.032566532492637634, + -0.04808121547102928, + -0.023166747763752937, + 0.02398361638188362, + -0.03062198869884014, + -0.046609822660684586, + 0.019089654088020325, + 0.0017758660251274705, + 0.015751969069242477, + -0.029143214225769043, + 0.0024112602695822716, + -0.02520643174648285, + 0.023578567430377007, + -0.023457514122128487, + 0.013982303440570831, + -0.016386305913329124, + -0.002706830855458975, + -0.016093457117676735, + -0.037887830287218094, + 0.06287679821252823, + 0.00989477802067995, + -0.026199528947472572, + 0.0037161086220294237, + -0.027242952957749367, + -0.03319230675697327, + -0.026061702519655228, + 0.015717124566435814, + -0.055130068212747574, + -0.02499731443822384, + -0.014253406785428524, + 0.046770382672548294, + 0.008143531158566475, + 0.005510109476745129, + -0.02712511457502842, + -0.03787349909543991, + 0.013756319880485535, + 0.00579818757250905, + 0.008403831161558628, + 0.029755474999547005, + -0.0032762265764176846, + 0.0044027529656887054, + 0.03601987659931183, + 0.0909135565161705, + -0.007387327961623669, + -0.005328672006726265, + -0.03983118385076523, + -0.045495130121707916, + 0.022088482975959778, + -0.04040846228599548, + -0.0028130451682955027, + 0.03781481459736824, + 0.03704448416829109, + 0.03319826349616051, + 0.0018484846223145723, + 0.0547247938811779, + 0.019755663350224495, + -0.07568438351154327, + 0.05122718587517738, + -0.02555399388074875, + 0.06782808154821396, + -0.0756291076540947, + 0.05646894872188568, + 0.06156547740101814, + -0.0010290262289345264, + 0.02769431658089161, + 0.003549074986949563, + 0.02812255173921585, + -0.016937678679823875, + 0.018674779683351517, + -0.037636883556842804, + -0.002664038445800543, + 0.023414231836795807, + 0.040655869990587234, + 0.027929119765758514, + 0.03510269895195961, + -0.012380925938487053, + 0.024845607578754425, + 0.027425218373537064, + -0.05437726899981499, + 0.015804018825292587, + 0.05077793449163437, + -0.0003959169262088835, + 0.016312288120388985, + -0.007089096121490002, + -0.018367605283856392, + 0.02974492497742176, + 0.08662278950214386, + -0.021586472168564796, + -0.01729869470000267, + -0.04846135899424553, + -0.03031736984848976, + 0.002749247709289193, + 0.02350122295320034, + -0.0211945790797472, + 0.03907554969191551, + -0.023193899542093277, + -0.017260679975152016, + -0.03159818798303604, + -0.03952740877866745, + 0.010126064531505108, + -0.04888703301548958, + 0.06297406554222107, + 0.03254289552569389, + 0.004272142890840769, + -0.03231256827712059, + -0.04512784630060196, + 0.0043722353875637054, + -0.02994321472942829, + 0.05848870426416397, + 0.003534652292728424, + 0.007630845997482538, + 0.017482444643974304, + 0.04071490466594696, + 0.008809284307062626, + -0.03566472604870796, + -0.029327288269996643, + -0.017241651192307472, + -0.012668757699429989, + 0.05879855155944824, + 0.05893324315547943, + 0.09900359064340591, + 0.028096094727516174, + -0.036374326795339584, + 0.06244330108165741, + -0.03114512376487255, + -0.028666621074080467, + 0.06343588978052139, + 0.025132114067673683, + -0.01625697687268257, + 0.019650116562843323, + -0.049646493047475815, + -0.03520796075463295, + 0.03757908195257187, + 0.002519423607736826, + 0.03556838259100914, + -0.017592694610357285, + 0.0010467531392350793, + -0.06738362461328506, + -0.025265797972679138, + 0.008135112002491951, + -0.01762012392282486, + -0.024728305637836456, + -0.03567385673522949, + 0.018016908317804337, + 0.06866948306560516, + 0.03130311518907547, + -0.0297296904027462, + -0.006176969967782497, + 0.04329727590084076, + 0.044129278510808945, + -0.020673662424087524, + 0.06023940071463585, + -0.004932863637804985, + -0.050380971282720566, + -0.034760732203722, + 0.00199303706176579, + 0.05686243996024132, + -0.0148441381752491, + -0.012425840832293034, + -0.011634211987257004, + 0.022722775116562843, + -0.008717222139239311, + 0.020749682560563087, + -0.0277851615101099, + 0.0007777228020131588, + 0.013342801481485367, + 0.03622204810380936, + -0.023042850196361542, + -0.026700101792812347, + -0.034892451018095016, + -0.028433026745915413, + 0.06670085340738297, + 0.013445812277495861, + 0.03833755850791931, + 0.01010140310972929, + -0.03759188950061798, + -0.05855119228363037, + 0.00781426765024662, + -0.04906706139445305, + 0.03342912718653679, + -0.05243462324142456, + 0.040698058903217316, + 0.06868159025907516, + 0.022752607241272926, + -0.005430352408438921, + -0.006812892388552427, + -0.04902511462569237, + -0.006551826372742653, + -0.03979682922363281, + -0.013956423848867416, + -0.06136368587613106, + 0.0740000531077385, + 0.05603933334350586, + 0.02190348319709301, + -0.043786339461803436, + -0.0392116904258728, + -0.01866808719933033, + 0.01707339473068714, + -0.026303859427571297, + -0.01817542500793934, + 0.03552285581827164, + 0.0276781152933836, + 0.05265122279524803, + -0.03358357027173042, + -0.020007848739624023, + 0.04865119233727455, + 0.02959197200834751, + -0.0032693049870431423, + 0.02495887503027916, + 0.03446371853351593, + -0.011217310093343258, + -0.09030335396528244, + 0.014422472566366196, + -0.008989378809928894, + -0.011282369494438171, + 0.049398381263017654, + -0.01687331311404705, + 0.025424139574170113, + 0.024985041469335556, + -0.009084195829927921, + 0.004050575662404299, + 0.0007717382395640016, + -0.03172731399536133, + -0.017505444586277008, + -0.014687484130263329, + 0.03803866356611252, + 0.016156280413269997, + -0.010017951019108295, + -0.026353944092988968, + 0.019050614908337593, + -0.03580506518483162, + 0.02924525737762451, + 0.02443450316786766, + -0.01770329475402832, + 0.04581848904490471, + -0.01908605918288231, + 0.012714254669845104, + 0.08363562822341919, + 0.037286512553691864, + -0.003420531051233411, + -0.06909038126468658, + -0.0591881163418293, + -0.007654525339603424, + 0.053144630044698715, + 0.03045589104294777, + -0.04600578546524048, + 0.026682959869503975, + -0.0019753179512917995, + -0.017073772847652435, + -0.012071357108652592, + 0.028171954676508904, + 0.00024773634504526854, + -0.03256797045469284, + -0.09742321819067001, + 0.040483273565769196, + -0.025031624361872673, + 0.03650636970996857, + 0.0011886897264048457, + 0.016929153352975845, + 0.054483890533447266, + 0.03752107173204422, + 0.019491281360387802, + 0.006253591738641262, + 0.02451430633664131, + -0.05976274237036705, + 0.060739971697330475, + -0.04400366172194481, + 0.028709039092063904, + -0.02141660451889038, + 0.08152823895215988, + -0.00450171809643507, + -0.03484562411904335, + -0.00046958858729340136, + -0.017397938296198845, + 0.07823023945093155, + -0.011110293678939342, + 0.004524719901382923, + 0.03619854897260666, + -0.02478216402232647, + -0.011563056148588657, + -0.012815544381737709, + -0.03503820300102234, + -0.04771020635962486, + -0.030619489029049873, + 0.0669066309928894, + 0.030025487765669823, + -0.011697783134877682, + -0.006708705797791481, + -0.0061534675769507885, + 0.0365905724465847, + -0.006860053166747093, + -0.07040797173976898, + -0.057646144181489944, + 0.04284966364502907, + -0.01533683855086565, + -0.06859996914863586, + 0.009425769560039043, + -9.838528785621747e-05, + 0.010796179063618183, + -0.06541100144386292, + 0.01059884112328291, + -0.028843343257904053, + 0.029019653797149658, + -0.005446962546557188, + -0.0120149040594697, + -0.0471968911588192, + 0.008648250252008438, + 0.021961573511362076, + -0.010606558993458748, + 0.0008718566386960447, + -0.014988702721893787, + -0.11522816866636276, + -0.023671753704547882, + -0.004968483000993729, + 0.0307041984051466, + -0.0020613274537026882, + -0.03271760419011116, + -0.04547363147139549, + -0.00812614057213068, + -0.013890305534005165, + 0.048099175095558167, + -0.015408700332045555, + 0.06658884882926941, + 0.012633614242076874, + -0.05337975174188614, + 0.0033035692758858204, + 0.03610198199748993, + -0.0405871607363224, + 0.008806376717984676, + -0.017653945833444595, + -0.05865860357880592, + 0.03825455904006958, + -0.00478429114446044, + -0.04127506911754608, + 0.01231306791305542, + 0.0008735111332498491, + 0.02923770435154438, + 0.005922738928347826, + -0.01829770766198635, + -0.00685579888522625, + -0.03903493285179138, + 0.009158597327768803, + -0.03491708263754845, + 0.04114120453596115, + -0.0014327293029055, + -0.019274454563856125, + 0.02704671025276184, + 0.01738886535167694, + -0.02327372133731842, + -0.03135831654071808, + -0.01305293757468462, + 0.04163745418190956, + 0.01710107922554016, + 0.06454417109489441, + 0.020267069339752197, + -0.08408207446336746, + -0.010505065321922302, + -0.0073319086804986, + 0.004039655905216932, + -0.01633611135184765, + -0.02889716438949108, + -0.0806351900100708, + -0.023603465408086777, + -0.06304290890693665, + 0.007231221999973059, + -0.038828227669000626, + 0.014790577813982964, + -0.03915632143616676, + 0.05616161227226257, + 0.00311578088440001, + -0.02434428222477436, + 0.006431886460632086, + -0.06326194852590561, + -0.0166602972894907, + 0.03630464896559715, + 0.01622965931892395, + 0.026233987882733345, + 0.06605540215969086, + -0.05635184794664383, + -0.08930846303701401, + -0.05207853391766548, + 0.027004040777683258, + -0.031913693994283676, + -0.009139630943536758, + -0.028410566970705986, + 0.06700566411018372, + 0.0423152893781662, + -0.010422738268971443, + -0.04085265100002289, + 0.029382970184087753, + 0.052883222699165344, + 0.02239867113530636, + -0.0012815282680094242, + 0.014223611913621426, + 0.02597920596599579, + -0.015063034370541573, + 0.0828455239534378, + 0.03366050869226456, + -0.022025907412171364, + -0.0019613192416727543, + -0.02539178729057312, + 0.0399317741394043, + -0.006493750028312206, + -0.0013236093800514936, + -0.02036309242248535, + -0.0065197269432246685, + -0.030695058405399323, + -0.03585388883948326, + -0.045742426067590714, + 0.015121972188353539, + 0.08081705123186111, + 0.007906812243163586, + 0.059827424585819244, + -0.04740464314818382, + -0.0743480697274208, + -0.025416050106287003, + -0.05693193897604942, + -0.001481675892136991, + -0.017177585512399673, + -0.03311903402209282, + 0.022755322977900505, + -0.003895542584359646, + 0.02692737802863121, + -0.0032731543760746717, + -0.0031116430182009935, + -0.030539495870471, + -0.006427450571209192, + -0.0015021534636616707, + 0.0017666849307715893, + -0.03059082292020321, + 0.0005832729511894286, + -0.05637278035283089, + 0.04087543487548828, + 0.00220437441021204, + 0.0021564762573689222, + -0.0314127579331398, + -0.025518659502267838, + -0.07314060628414154, + -0.014426291920244694, + -0.08714891970157623, + -0.02331671305000782, + 0.013582085259258747, + -0.0025384915061295033, + -0.01540769450366497, + -0.0110056446865201, + 0.04654880613088608, + 0.010653696954250336, + 0.0018328200094401836, + 0.007387213874608278, + 0.07984212785959244, + -0.02893732860684395, + -0.04140201583504677, + -0.07618758082389832, + -0.00793982483446598, + -0.0377434641122818, + 0.032935332506895065, + -0.013259266503155231, + 0.02015708014369011, + 0.09388656169176102, + 0.0017843206878751516, + 0.034253206104040146, + -0.017240997403860092, + 0.009084933437407017, + -0.048595622181892395, + -0.03737767040729523, + -0.04036621376872063, + -0.009442481212317944, + 0.01705838553607464, + -0.03709835931658745, + 0.004579882137477398, + -0.02558705396950245, + -0.010287507437169552, + -0.00969093106687069, + 0.012930587865412235, + -0.0026530276518315077, + -0.055302973836660385, + -0.0007084248936735094, + 0.0027114865370094776, + -0.022337302565574646, + 0.049817051738500595, + -0.015339787118136883, + -0.01575980708003044, + -0.0211472287774086, + -0.041779227554798126, + -0.00043109135003760457, + -0.002638365374878049, + 0.0003785403096117079, + 0.04666115716099739, + -0.031109463423490524, + 0.03951709344983101, + 0.007409846410155296, + 0.032062821090221405, + -0.019406728446483612, + -0.03020879067480564, + 0.06802312284708023, + 0.018488138914108276, + -0.053909264504909515, + -0.007893281057476997, + 0.02533031813800335, + 0.03132852911949158, + -0.053483180701732635, + 0.025661734864115715, + 0.002397680189460516, + 0.04062856733798981, + 0.02830035611987114, + -0.00479720626026392, + -0.008729430846869946, + -0.0034766148310154676, + 0.03173350542783737, + 0.0071125393733382225, + -0.03040342777967453, + -0.006032709032297134, + -0.06926627457141876, + -0.0381772480905056 + ], + "content_embedding": [ + -0.02845170348882675, + 0.020628532394766808, + 0.003312832210212946, + -0.029765071347355843, + 0.016959644854068756, + -0.004110109526664019, + 0.054954126477241516, + -0.03696386516094208, + -0.06003747880458832, + -0.016737867146730423, + -0.04143841937184334, + 0.010524315759539604, + 0.01846286654472351, + 0.012900668196380138, + -0.01821877434849739, + -0.022333195433020592, + 0.016231827437877655, + -0.00692401546984911, + -0.009705417789518833, + 0.0043431734666228294, + -0.01035444438457489, + -0.03510449081659317, + -0.01101984828710556, + -0.029713021591305733, + 0.08172306418418884, + -0.008759400807321072, + -0.040999725461006165, + 0.04106973484158516, + -0.05768377706408501, + -0.008512589149177074, + 0.05944962799549103, + -0.012553821317851543, + 0.013645646162331104, + -0.02660560794174671, + -0.057905036956071854, + -0.054687026888132095, + 0.003909541759639978, + -0.04956740885972977, + -0.042125821113586426, + 0.06187684088945389, + 0.06303229182958603, + -0.012631679885089397, + -0.004673871211707592, + -0.02207319252192974, + 0.052802763879299164, + 0.014762785285711288, + 0.04115021601319313, + -0.006632254458963871, + 0.03773806244134903, + -0.03468457981944084, + -0.014101233333349228, + 0.013350501656532288, + -0.024982236325740814, + -0.009867328219115734, + -0.007960042916238308, + 0.005127797368913889, + 0.002303300891071558, + -0.004433336202055216, + 0.03658096119761467, + -0.04504770040512085, + 0.027889715507626534, + 0.05441499873995781, + -0.04908447712659836, + 0.041611816734075546, + -0.00782090611755848, + -0.05460766702890396, + 0.0005653056432493031, + 0.0009949197992682457, + 0.013009139336645603, + 0.004702548962086439, + -0.0066951001062989235, + -0.009612455032765865, + 0.027976926416158676, + 0.013144126161932945, + -0.009398404508829117, + -0.009249510243535042, + 0.02228953316807747, + -0.05003415420651436, + -0.03484565392136574, + 0.039622433483600616, + 0.03127755597233772, + -0.07711455225944519, + 0.026068583130836487, + -0.03025561012327671, + 0.03434577211737633, + 0.02756066806614399, + -0.016127552837133408, + 0.0031622813548892736, + -0.011191527359187603, + 0.10279087722301483, + 0.07508235424757004, + 0.014161021448671818, + 0.04303860291838646, + 0.02421264722943306, + -0.060081642121076584, + 0.08023173362016678, + -0.016117870807647705, + -0.040795255452394485, + -0.006737033370882273, + -0.02539793588221073, + -0.005812298972159624, + 0.027351481840014458, + 0.02652551420032978, + 0.034308984875679016, + 0.07952407002449036, + 0.012120738625526428, + -0.002102786907926202, + 0.02581837773323059, + 0.0036945617757737637, + 0.03335866704583168, + -0.05533025786280632, + -0.029806576669216156, + 0.014511525630950928, + 0.028494026511907578, + -0.028353745117783546, + -0.0015628034016117454, + 0.0542825423181057, + -0.06842301040887833, + 0.013071774505078793, + 0.035904042422771454, + -0.060427047312259674, + -0.010712354443967342, + -0.010741145350039005, + 0.00589279318228364, + 0.03916572034358978, + 0.0011838098289445043, + -0.04358551278710365, + -0.02426866628229618, + -0.02629699930548668, + -0.016508616507053375, + 0.0038987575098872185, + 0.00010461249621585011, + -0.06473322957754135, + 0.027538873255252838, + 0.03787471354007721, + 0.024383891373872757, + -0.04171127453446388, + -0.03238093852996826, + 0.007360804360359907, + -0.014501902274787426, + 0.014242740347981453, + -0.0012311796890571713, + -0.013716178946197033, + -0.009915472939610481, + 0.026615049690008163, + -0.07398053258657455, + 0.0030485496390610933, + 0.025813661515712738, + -0.022065768018364906, + 0.0349227599799633, + 0.0045135351829230785, + -0.053763143718242645, + -0.013968654908239841, + 0.016600387170910835, + 0.029198968783020973, + -0.03825172409415245, + -0.03900526836514473, + 0.02822844497859478, + 0.052716661244630814, + -0.00427692336961627, + 0.029389938339591026, + 0.01127107534557581, + -0.02288925088942051, + 0.06506737321615219, + -0.011876849457621574, + -0.009232635609805584, + 0.059180255979299545, + 0.060491811484098434, + 0.04768436402082443, + 0.04782063513994217, + -0.007591789122670889, + -0.012142209336161613, + -0.00854392908513546, + -0.03645598515868187, + 0.02366817742586136, + 0.028424806892871857, + 0.03254731744527817, + -0.0650848001241684, + 0.05803924798965454, + -0.006124107167124748, + 0.007514724973589182, + -0.06995245814323425, + 0.03610721975564957, + -0.025534681975841522, + -0.047099191695451736, + 0.0024543125182390213, + 0.013705895282328129, + -0.08660408854484558, + 0.013458521105349064, + -0.05938595533370972, + 0.025314588099718094, + -0.06279927492141724, + -0.008528811857104301, + -0.04051665961742401, + -0.02572588622570038, + -0.05028638243675232, + 0.029650729149580002, + 0.03656933456659317, + 0.027842504903674126, + -0.017784448340535164, + -0.06566111743450165, + -0.016097936779260635, + -0.07754653692245483, + 0.02611452341079712, + -0.012319186702370644, + 0.03830364719033241, + 0.05927351489663124, + -0.0005797847989015281, + 0.05858585610985756, + 0.013468705117702484, + 0.08553440123796463, + 0.010187739506363869, + -0.023877883329987526, + 0.027608737349510193, + -0.04135579988360405, + -0.004526825156062841, + 0.01695535145699978, + -0.043227668851614, + -0.03456792235374451, + 0.06477289646863937, + 0.031624119728803635, + -0.04087601602077484, + 0.0010430653346702456, + 0.017958510667085648, + 0.009248117916285992, + 0.010219916701316833, + -0.05485055223107338, + -0.01347501389682293, + -0.015884561464190483, + -0.008806952275335789, + -0.04478437826037407, + -0.09141774475574493, + 0.07184188067913055, + 0.02080371417105198, + 0.03414024040102959, + 0.02681431546807289, + -0.02171824313700199, + 0.023230157792568207, + 0.0034705817233771086, + 0.023832201957702637, + 0.04260754957795143, + -0.023710861802101135, + 0.017519451677799225, + -0.023114347830414772, + -0.07241662591695786, + 0.043135177344083786, + -0.03519831597805023, + 0.01728164590895176, + -0.007306656800210476, + 0.029966725036501884, + 0.005133960861712694, + 0.010730396956205368, + 0.014178331010043621, + 0.02290872484445572, + 0.04147600382566452, + -0.0711970180273056, + 0.011968120001256466, + 0.012014097534120083, + -0.00941413827240467, + -0.048221614211797714, + 0.02721494808793068, + -0.036967791616916656, + 0.03305060788989067, + -0.023104682564735413, + -0.0021078407298773527, + 0.01056760549545288, + 0.003825176041573286, + -0.02744617499411106, + -0.011484067887067795, + -0.019249368458986282, + 0.012087506242096424, + 0.016815317794680595, + 0.008888418786227703, + 0.09483875334262848, + 0.005403030198067427, + -0.006538084242492914, + -0.00812787376344204, + 0.010251098312437534, + 0.025141935795545578, + -0.016502706333994865, + -0.07583127170801163, + -0.059361476451158524, + -0.03975491225719452, + 0.005571363028138876, + 0.025980276986956596, + 0.06575164198875427, + -0.022391004487872124, + -0.0014668750809505582, + 0.0309857539832592, + -0.01333113107830286, + 0.024967554956674576, + 0.008801382035017014, + 0.004801678936928511, + 0.011097696609795094, + -0.02484068274497986, + 0.07475821673870087, + 0.06004296988248825, + 0.008063849061727524, + -0.007297527976334095, + -0.0225421991199255, + -0.020057078450918198, + 0.04824424162507057, + 0.019114485010504723, + -0.024972988292574883, + -0.013590608723461628, + -0.026848217472434044, + 0.013158710673451424, + 0.03205424174666405, + 0.06152794137597084, + 0.06059333309531212, + -0.03528637811541557, + 0.03574252501130104, + 0.011881774291396141, + -0.014821416698396206, + -0.03766554221510887, + 0.02960871160030365, + 0.043620847165584564, + -0.0008511713240295649, + -0.012452763505280018, + -0.008131926879286766, + 0.02682739496231079, + -0.027776895090937614, + -0.017724614590406418, + -0.074460469186306, + 0.007388352416455746, + 0.052085623145103455, + 0.005944994743913412, + 0.05980602651834488, + 0.004958854056894779, + 0.004315464291721582, + 0.009470906108617783, + 0.08363069593906403, + -0.06266297399997711, + -0.02252691425383091, + -0.0047216094098985195, + 0.01363289449363947, + 0.00803599413484335, + 0.017915869131684303, + 0.01683986745774746, + 0.0054694474674761295, + 0.00690553430467844, + -0.033289551734924316, + -0.041059620678424835, + -0.05957230553030968, + -0.07026804983615875, + 0.0026692922692745924, + 0.01784949004650116, + -0.0003522790502756834, + 0.044131647795438766, + 0.05823194235563278, + -0.01914701797068119, + 0.012501074001193047, + -0.0076722633093595505, + -0.040374498814344406, + 0.010002685710787773, + -0.006841403432190418, + 0.024179449304938316, + 0.01219668984413147, + -0.005650076549500227, + 0.010313056409358978, + 0.0951242670416832, + -0.012245064601302147, + 0.02261069230735302, + -0.02354615181684494, + 0.04120791703462601, + 0.03377315774559975, + 0.03468254953622818, + 0.033613745123147964, + 0.03305840864777565, + -0.033862534910440445, + 0.034367989748716354, + -0.022536078467965126, + 0.04874858632683754, + 0.0415329709649086, + 0.06666682660579681, + 0.0036932802759110928, + -0.09809356927871704, + -0.0017088145250454545, + 0.0024299651850014925, + -0.018637090921401978, + 0.06801039725542068, + 0.02409985661506653, + 0.009735392406582832, + -0.02749275043606758, + 0.030437270179390907, + -0.00898370798677206, + -0.020128484815359116, + -0.009687880054116249, + 0.01668565906584263, + -0.04497738555073738, + -0.019772959873080254, + -0.022632960230112076, + -0.02253716252744198, + 0.010271976701915264, + -0.00696501974016428, + 0.030849630013108253, + -0.04239751771092415, + 0.03944450616836548, + -0.013430316932499409, + 0.032022625207901, + -0.05952562019228935, + -0.043423160910606384, + -0.0024594010319560766, + -0.0009159342152997851, + -0.01097820233553648, + 0.02963317185640335, + -0.02188674546778202, + -0.048581305891275406, + 0.03341398760676384, + -0.011065436527132988, + 0.047042280435562134, + 0.04042183235287666, + -0.02600206807255745, + -0.05695529654622078, + 0.05499875172972679, + -0.03984459489583969, + 0.0649806335568428, + 0.02108422853052616, + -0.07841797918081284, + -0.02946053445339203, + -0.01747039519250393, + -0.013214156962931156, + 0.008581981062889099, + -0.0022455912549048662, + -0.022153383120894432, + 0.011744811199605465, + 0.017487658187747, + 0.008825760334730148, + 0.025580866262316704, + 0.0082536730915308, + 0.03269948437809944, + 0.012900054454803467, + 0.04077104851603508, + 0.0378829762339592, + -0.06819288432598114, + 0.02784581482410431, + 0.06434649229049683, + 0.03351795673370361, + 0.011211752891540527, + 0.01894824393093586, + 0.004370532464236021, + -0.014345336705446243, + 0.006097803357988596, + -0.08543102443218231, + -0.02757532149553299, + 0.06072119623422623, + 0.10378460586071014, + 0.009428516030311584, + -0.04370144382119179, + -0.01193047035485506, + 0.04444314166903496, + 0.011696353554725647, + -0.030421355739235878, + -0.014331959187984467, + 0.04900198057293892, + 0.027842359617352486, + -0.009193948470056057, + -0.06911113113164902, + -0.011863719671964645, + 0.035546496510505676, + 0.022603457793593407, + 0.017083479091525078, + 0.015593679621815681, + 0.021234845742583275, + 0.004202473908662796, + -0.0537634901702404, + 0.04333661124110222, + -0.025838447734713554, + -0.024251429364085197, + 0.03204401955008507, + -0.002707387087866664, + 0.03231189027428627, + -0.0030726506374776363, + 0.024067431688308716, + -0.03183548524975777, + 0.007890576496720314, + 0.054388418793678284, + -0.017090266570448875, + 0.01572081632912159, + 0.04539618268609047, + 0.01960766315460205, + 0.009962057694792747, + -0.06782109290361404, + 0.03449762612581253, + -0.004095849581062794, + 0.011451792903244495, + 0.01291556004434824, + 0.009039049968123436, + -0.023024702444672585, + -0.019358525052666664, + 0.004383507184684277, + 0.01303566712886095, + 0.008749599568545818, + -0.0019463954959064722, + -0.05059671401977539, + -0.03929319232702255, + -0.038611579686403275, + -0.0004830050456803292, + 0.03260226547718048, + -0.0484665147960186, + 0.04194365814328194, + 0.028087828308343887, + 0.0015577803133055568, + -0.0020338960457593203, + 0.00992380827665329, + 0.029156159609556198, + 0.013400931842625141, + -0.08322624862194061, + 0.03542347997426987, + 0.029148025438189507, + 0.03978026285767555, + 0.012040375731885433, + -0.015034311451017857, + 0.016610056161880493, + 0.030412640422582626, + -0.051336318254470825, + 0.057814277708530426, + -0.009117085486650467, + -0.055816203355789185, + 0.05705805867910385, + -0.013596060685813427, + 0.07485361397266388, + 0.04783453419804573, + 0.03237048164010048, + -0.1252431571483612, + -0.024511752650141716, + 0.009948192164301872, + -0.001071324571967125, + 0.03724309056997299, + 0.0011626302730292082, + -0.024294020608067513, + 0.09550263732671738, + 0.001125041046179831, + -0.032950934022665024, + 0.03814041614532471, + -0.015817970037460327, + -0.042719099670648575, + 0.02066672407090664, + 0.03320891037583351, + 0.04236403852701187, + 0.026531731709837914, + -0.026540761813521385, + 0.018515214323997498, + -0.0034993020817637444, + -0.019887158647179604, + -0.03968263417482376, + -0.048776015639305115, + 0.058169975876808167, + -0.033802736550569534, + -0.06506575644016266, + 0.06649087369441986, + -0.0008670967072248459, + 0.03118874505162239, + -0.04512554034590721, + 0.017889900133013725, + -0.0688585713505745, + -0.005668118130415678, + -0.04627913981676102, + -0.005911638960242271, + -0.004173378925770521, + 0.030776705592870712, + -0.003502912586554885, + 0.06485340744256973, + -0.002432354027405381, + 0.016381509602069855, + -0.11775369197130203, + -0.03817284479737282, + -0.0002618363650981337, + 0.020635321736335754, + -0.00496688112616539, + -0.025879040360450745, + -0.03508525341749191, + 0.03343107923865318, + 0.0065256268717348576, + 0.03733794391155243, + -0.05374712869524956, + 0.06329557299613953, + -0.012232488952577114, + -0.07295915484428406, + -0.03410065174102783, + -0.036963820457458496, + -0.040072306990623474, + 0.08864720910787582, + -0.025265397503972054, + 0.006373850163072348, + 0.03589979186654091, + -0.04953531548380852, + -0.014813058078289032, + 0.03135038912296295, + -0.045539740473032, + -0.013331865891814232, + -0.0077253468334674835, + 0.02402244322001934, + -0.01973932422697544, + -0.07107116281986237, + 0.029835300520062447, + 0.027613440528512, + 0.027596186846494675, + 0.0323333777487278, + -0.059915486723184586, + -0.03399650380015373, + -0.011191067285835743, + 0.04654889553785324, + 0.02089870348572731, + -0.02999742142856121, + 0.03440370410680771, + 0.007507022004574537, + 0.040571920573711395, + -0.03278252109885216, + -0.06050700321793556, + 0.021276405081152916, + -0.016155162826180458, + 0.00010897620086325333, + -0.0010203487472608685, + -0.03746471554040909, + -0.025609171018004417, + 0.005009123589843512, + -0.08643782883882523, + 0.031217029318213463, + -0.0032753287814557552, + -0.025097588077187538, + -0.03903978690505028, + 0.04100614786148071, + -0.015351627953350544, + -0.027274904772639275, + 0.03702486678957939, + -0.08083852380514145, + 0.005576241761445999, + -0.006957546342164278, + 0.09723483771085739, + 0.018242431804537773, + 0.05415903404355049, + -0.06101224571466446, + -0.025462020188570023, + -0.05338318645954132, + -0.014500913210213184, + 0.017682785168290138, + -0.017082780599594116, + 0.028426188975572586, + 0.039222750812768936, + 0.02545572631061077, + -0.03942421078681946, + -0.06022893264889717, + 0.04819706454873085, + 0.06749513745307922, + 0.01035595778375864, + 0.011470649391412735, + -0.0024080125149339437, + -0.033233992755413055, + 0.028164777904748917, + 0.0553852841258049, + 0.06263226270675659, + -0.016413886100053787, + 0.021145500242710114, + 0.01179521344602108, + 0.033495236188173294, + -0.013982322998344898, + 0.015615278854966164, + -0.04715987294912338, + 0.029921408742666245, + 0.00394752062857151, + -0.028457310050725937, + 0.018736237660050392, + -0.02897241711616516, + -0.006964333355426788, + 0.001459103194065392, + 0.020680002868175507, + -0.045486945658922195, + -0.0186879001557827, + 0.006018372252583504, + -0.005646763369441032, + 0.031949128955602646, + 0.015718143433332443, + -0.0680442824959755, + -0.040316201746463776, + 0.027103520929813385, + 0.007122257724404335, + -0.02554631046950817, + -0.015084164217114449, + -0.05808757618069649, + -0.01925673894584179, + -0.030870718881487846, + 0.04750153049826622, + -0.05464727431535721, + -0.03634507954120636, + -0.022956492379307747, + -0.001869303290732205, + 0.009947973303496838, + -0.0452248640358448, + -0.05064425989985466, + 0.0033088859636336565, + 0.032645225524902344, + 0.011128626763820648, + -0.07038814574480057, + -0.00398398470133543, + -0.029936205595731735, + -0.037302739918231964, + -0.026515178382396698, + -0.005808456335216761, + 0.011362303048372269, + 0.013931548222899437, + 0.053531426936388016, + -0.017702942714095116, + 0.04049023985862732, + 0.0490780733525753, + -0.0034894803538918495, + 0.0072046807035803795, + 0.05128946155309677, + -0.02248883992433548, + -0.016904350370168686, + -0.007111264392733574, + 0.013148962520062923, + 0.04168686643242836, + -0.011360356584191322, + -0.01462612859904766, + 0.036358367651700974, + -0.000562329194508493, + -0.037175074219703674, + 0.002623690525069833, + -0.04284300655126572, + 0.045784976333379745, + 0.017441436648368835, + -0.09851012378931046, + 0.06095545366406441, + -0.0374034121632576, + -0.02720695361495018, + -0.007703973911702633, + 0.03689894452691078, + 0.008105777204036713, + 0.019800135865807533, + -0.02071499079465866, + -0.022336198017001152, + -0.009109229780733585, + 0.02298821695148945, + -0.04302436485886574, + -0.023912018164992332, + 0.007846477441489697, + -0.04115860536694527, + -0.052512455731630325, + -0.0262643713504076, + 0.00893806479871273, + -0.032936349511146545, + -0.015261095948517323, + 0.005558508913964033, + -0.008528356440365314, + -0.023201653733849525, + -0.056550297886133194, + 0.025247847661376, + 0.04831540212035179, + -0.019267458468675613, + -0.03474835306406021, + -0.001712511875666678, + -0.04063638299703598, + -0.01707102544605732, + -0.01384702231734991, + -0.0023981977719813585, + 0.03153757378458977, + 0.030312344431877136, + 0.040820326656103134, + -0.013783353380858898, + 0.012157448567450047, + -0.015558870509266853, + -0.056085314601659775, + 0.03875841945409775, + 0.021351536735892296, + -0.021598603576421738, + -0.01058109663426876, + 0.001237297197803855 + ], + "chunk_ind": 0 + }, + { + "url": "https://docs.danswer.dev/more/use_cases/enterprise_search", + "title": "Enterprise Search", + "content": "Value of Enterprise Search with Danswer\n\nWhat is Enterprise Search and why is it Important?\nAn Enterprise Search system gives team members a single place to access all of the disparate knowledge of an organization. Critical information is saved across a host of channels like call transcripts with prospects, engineering design docs, IT runbooks, customer support email exchanges, project management tickets, and more. As fast moving teams scale up, information gets spread out and more disorganized.\n\nSince it quickly becomes infeasible to check across every source, decisions get made on incomplete information, employee satisfaction decreases, and the most valuable members of your team are tied up with constant distractions as junior teammates are unable to unblock themselves. Danswer solves this problem by letting anyone on the team access all of the knowledge across your organization in a permissioned and secure way. Users can ask questions in natural language and get back answers and documents across all of the connected sources instantly.\n\nWhat's the real cost?\nA typical knowledge worker spends over 2 hours a week on search, but more than that, the cost of incomplete or incorrect information can be extremely high. Customer support/success that isn't able to find the reference to similar cases could cause hours or even days of delay leading to lower customer satisfaction or in the worst case - churn. An account exec not realizing that a prospect had previously mentioned a specific need could lead to lost deals. An engineer not realizing a similar feature had previously been built could result in weeks of wasted development time and tech debt with duplicate implementation. With a lack of knowledge, your whole organization is navigating in the dark - inefficient and mistake prone.", + "title_embedding": [ + -0.011060578748583794, + 0.05994883179664612, + 0.008845113217830658, + 0.011364905163645744, + 0.03147757425904274, + 0.04506697878241539, + -0.025942707434296608, + -0.011002028360962868, + -0.03507396578788757, + -0.01727098599076271, + -0.016820134595036507, + 0.04671240597963333, + 0.023456331342458725, + -0.005752791650593281, + -0.011421029455959797, + -0.04169125109910965, + 0.0652366429567337, + -0.011136278510093689, + -0.013501451350748539, + -0.006273901090025902, + 0.0012236927868798375, + -0.030771249905228615, + 0.010098040103912354, + -0.02360220067203045, + 0.006734110414981842, + 0.001379420980811119, + -0.0047225081361830235, + -0.013901330530643463, + 0.014645840041339397, + -0.02156134508550167, + -0.026707857847213745, + -0.00676271365955472, + 0.056067030876874924, + -0.0455806590616703, + -0.02762053906917572, + -0.0965149849653244, + 0.04567359760403633, + 0.053895801305770874, + 0.029093541204929352, + 0.0199823547154665, + 0.047089505940675735, + 0.026028119027614594, + -0.0034626282285898924, + -0.021002190187573433, + 0.04217635095119476, + -0.015198000706732273, + 0.047393105924129486, + 0.008588545024394989, + 0.07058116048574448, + -0.09135723114013672, + -0.009591161273419857, + 0.014971816912293434, + -8.222273208957631e-07, + -0.05534408614039421, + -0.029965048655867577, + -0.028381407260894775, + 0.025547217577695847, + 0.038583844900131226, + 0.01984122209250927, + -0.02435469813644886, + 0.04955849424004555, + 0.0128632802516222, + -0.022573867812752724, + 0.025284791365265846, + 0.04496009647846222, + 0.0004200333496555686, + -0.0015001222491264343, + 0.02609623409807682, + 0.023677939549088478, + -0.05961468443274498, + 0.06799754500389099, + -0.014409428462386131, + -0.010374268516898155, + 0.019634529948234558, + -0.03720257058739662, + -0.04654879495501518, + 0.0056068566627800465, + -0.021244503557682037, + -0.03198164328932762, + 0.05707596614956856, + 0.043927326798439026, + -0.033356692641973495, + 0.015723733231425285, + -0.027493512257933617, + 0.04525380581617355, + 0.025808652862906456, + -0.007083983160555363, + -0.011038954369723797, + -0.004390218295156956, + -0.006583297159522772, + 0.003319315379485488, + 0.061810243874788284, + 0.05887124314904213, + 0.07722297310829163, + -0.06601747870445251, + 0.036486171185970306, + 0.05119618400931358, + 0.00945530366152525, + -0.03005358576774597, + -0.047870855778455734, + -0.0359003059566021, + -0.005699747242033482, + 0.053807053714990616, + -0.001554036163724959, + 0.060548700392246246, + 0.05476829782128334, + 0.00426551653072238, + -0.005215689539909363, + -0.0352163203060627, + -0.03284529969096184, + -0.03373449295759201, + 0.021254340186715126, + -0.010481598787009716, + 0.02651101164519787, + -0.00481840968132019, + 0.045160870999097824, + 0.09704204648733139, + -0.03473421558737755, + 0.015584945678710938, + 0.06579536944627762, + 0.0651017278432846, + 0.007380738854408264, + 0.00624364148825407, + 0.07893780618906021, + -0.019876087084412575, + 0.006619091611355543, + -0.030776498839259148, + -0.016426965594291687, + 0.014603338204324245, + -0.03326896205544472, + 0.003784433240070939, + 0.025205042213201523, + 0.03047170303761959, + -0.03364298865199089, + 0.005974944215267897, + 0.015994269400835037, + -0.10205432027578354, + -0.026497431099414825, + 0.07166463136672974, + -0.007370935752987862, + 0.034912627190351486, + -0.004656887147575617, + 0.03219066560268402, + -0.02239271067082882, + 0.012679396197199821, + -0.07867992669343948, + -0.026638884097337723, + -0.008346030488610268, + -0.027243634685873985, + 0.012043806724250317, + 0.024078860878944397, + 0.0006219774950295687, + 0.0032065671402961016, + -0.008564895950257778, + -0.03275461867451668, + -0.05791788548231125, + -0.044815272092819214, + 0.006680188700556755, + 0.04072298854589462, + 0.016144724562764168, + -0.0008583687013015151, + 0.03699830546975136, + -0.04675738513469696, + 0.06570404022932053, + -0.011776894330978394, + -0.011386243626475334, + -0.0003363603027537465, + 0.039485324174165726, + 0.014156803488731384, + 0.018139634281396866, + -0.014710970222949982, + -0.052651647478342056, + 0.02912742830812931, + 0.024101730436086655, + 0.0413704477250576, + 0.03631320223212242, + 0.046750932931900024, + -0.017062805593013763, + 0.017990263178944588, + -0.03954370319843292, + -0.006972718983888626, + -0.03784368559718132, + 0.02746269851922989, + 0.04107878357172012, + -0.005694024730473757, + -0.03896583244204521, + 0.026265999302268028, + -0.035318680107593536, + -0.018394622951745987, + 0.013594037853181362, + 0.0381510891020298, + -0.010223937220871449, + 0.032417282462120056, + 0.01610656827688217, + -0.013205642811954021, + -0.03757423907518387, + 0.03799910470843315, + -0.039449408650398254, + -0.011290505528450012, + -0.016824476420879364, + 0.007128347177058458, + 0.030213234946131706, + -0.09385695308446884, + 0.014417118392884731, + -0.021249795332551003, + -0.021371016278862953, + 0.031582340598106384, + -0.015021033585071564, + 0.03207740932703018, + 0.04465494304895401, + 0.051139406859874725, + -0.004539252258837223, + -0.004026447422802448, + 0.036198731511831284, + 0.002513982355594635, + -0.022555746138095856, + -0.023142442107200623, + 0.026506206020712852, + -0.0208470169454813, + 0.01958189532160759, + 0.02606782503426075, + -0.050900157541036606, + 0.001175468903966248, + 0.0026185859460383654, + 0.01644700951874256, + 0.047048378735780716, + -0.006155692040920258, + 0.013264120556414127, + -0.004277337808161974, + 0.022337032482028008, + -0.030710609629750252, + -0.06784506887197495, + 0.010662010870873928, + -0.020733945071697235, + -0.01206474844366312, + -0.0005046974983997643, + -0.004159707576036453, + 0.028128545731306076, + -0.011551725678145885, + 0.057953692972660065, + 0.028500419110059738, + 0.02070418931543827, + 0.029373178258538246, + -0.053878165781497955, + -0.03885475918650627, + -0.011427262797951698, + -0.040592946112155914, + 0.019192807376384735, + -0.013966009952127934, + 0.002324307570233941, + 0.027266129851341248, + 0.02721570059657097, + -0.013851913623511791, + 0.06292124837636948, + -0.019983768463134766, + -0.06498263776302338, + -0.014787066727876663, + 0.07545251399278641, + 0.009921795688569546, + -0.02266773208975792, + -0.0174646507948637, + -0.0037801002617925406, + 0.037214070558547974, + -0.033669255673885345, + -0.0033054312225431204, + -0.004362864885479212, + -0.010861773043870926, + -0.041649043560028076, + 0.02711806818842888, + -0.001099557732231915, + 0.0007163260015659034, + 0.01317980233579874, + -0.011158796027302742, + 0.03966476768255234, + 0.023275790736079216, + -0.011645027436316013, + 0.0030634249560534954, + -0.01243121363222599, + 0.01271719578653574, + 0.003938829991966486, + -0.00769989937543869, + -0.039121564477682114, + 0.0005735178128816187, + 0.02157283015549183, + 0.005828005261719227, + 0.03934130072593689, + 0.015216797590255737, + 0.017237801104784012, + -0.037648268043994904, + -0.007132838945835829, + -0.018956882879137993, + -0.0597093440592289, + 0.058341480791568756, + -0.0008284997311420739, + 0.02095271646976471, + 0.043099164962768555, + 0.09887702018022537, + -0.01221393421292305, + -0.02239784225821495, + 0.016775032505393028, + 0.013331425376236439, + -0.004451168701052666, + -0.02870352193713188, + -0.020854242146015167, + 0.05349724739789963, + 0.03315908834338188, + 0.018541062250733376, + -0.03136591613292694, + 0.03549784794449806, + -0.0076525891199707985, + -0.06454484909772873, + 0.049847088754177094, + 0.012184737250208855, + 0.03575005754828453, + -0.050804175436496735, + 0.09406977146863937, + 0.05103312432765961, + -0.0036910600028932095, + 0.10705005377531052, + 0.011394658125936985, + -0.014218435622751713, + -0.042272791266441345, + 0.018426422029733658, + -0.08213183283805847, + -0.010240674018859863, + 0.051353540271520615, + 0.016103247180581093, + 0.04293083772063255, + -0.00462630670517683, + 0.001971749123185873, + -0.05101824551820755, + -0.017815101891756058, + -0.0788436159491539, + -0.019784294068813324, + 0.006863154470920563, + 0.04096531495451927, + 0.016416994854807854, + 0.018884461373090744, + -0.03645262494683266, + -0.02363709919154644, + 0.08447448164224625, + 0.027652855962514877, + -0.005039512179791927, + -0.05533800646662712, + 0.006148343440145254, + -0.03248206898570061, + -0.015117023140192032, + -0.056908346712589264, + 0.057090409100055695, + 0.02987913228571415, + -0.0642392635345459, + -0.01212853193283081, + -0.04195745661854744, + -0.008033841848373413, + -0.05249612778425217, + 0.05965931713581085, + 0.08591161668300629, + -0.012983623892068863, + 0.002055486897006631, + -0.002928174799308181, + -0.023014886304736137, + -0.05307631567120552, + 0.0325687900185585, + -0.008586175739765167, + -0.005393583793193102, + 0.009566529653966427, + 0.06500132381916046, + -0.02100509963929653, + -0.018470296636223793, + 0.001247459789738059, + 0.007388024125248194, + 0.012469757348299026, + 0.08475572615861893, + 0.06918514519929886, + 0.054265547543764114, + 0.03292711451649666, + -0.08437038213014603, + 0.07744771242141724, + -0.0004291488730814308, + -0.020394261926412582, + 0.039096955209970474, + 0.015851527452468872, + -0.009922537952661514, + 0.02087295800447464, + -0.019477976486086845, + -0.06510577350854874, + 0.008559669367969036, + 0.015032066963613033, + -0.022979427129030228, + -0.017166415229439735, + -0.014456263743340969, + -0.034205030649900436, + -0.04903494939208031, + 0.073653943836689, + -0.041798241436481476, + 0.0035302129108458757, + 0.031043095514178276, + 0.038764648139476776, + 0.03582717105746269, + -0.003121789079159498, + 0.03909862041473389, + -0.03283870965242386, + 0.06343409419059753, + 0.085169717669487, + 0.0037416887935250998, + 0.043896209448575974, + -0.02215113304555416, + -0.04062772914767265, + -0.029482074081897736, + 0.0013964198296889663, + 0.04621904715895653, + 0.030072476714849472, + -0.023583346977829933, + -0.016047311946749687, + -0.04016166180372238, + -0.026690224185585976, + 0.034725841134786606, + -0.08011004328727722, + -0.023635270074009895, + -0.01675681211054325, + 0.02217511460185051, + -0.018720457330346107, + 0.0413116030395031, + -0.0045730252750217915, + -0.08402986079454422, + 0.03641941770911217, + 0.028000695630908012, + 0.042173273861408234, + 0.024761751294136047, + -0.051845893263816833, + -0.07877497375011444, + -0.020710380747914314, + -0.035789184272289276, + 0.04824375733733177, + -0.04493764415383339, + -0.0014088008319959044, + 0.09272980690002441, + -0.030772028490900993, + 0.027623610571026802, + -0.0008853759500198066, + -0.015347420237958431, + -0.0006863650633022189, + 0.02924676053225994, + 0.03864092007279396, + -0.043402496725320816, + 0.11410719156265259, + 0.01606914773583412, + 0.03158045932650566, + -0.049648500978946686, + -0.026801105588674545, + 0.013934214599430561, + -0.04582132399082184, + -0.02133217453956604, + 0.013296819292008877, + 0.030687933787703514, + 0.0014671665849164128, + 0.005454834550619125, + -0.024595070630311966, + 0.036868833005428314, + -0.003586424048990011, + -0.007300499361008406, + 0.00619609747081995, + 0.004614396020770073, + 0.06406176835298538, + 0.010256785899400711, + -0.050202082842588425, + -0.013155301101505756, + -0.04005127400159836, + -0.027943719178438187, + 0.05738724395632744, + -0.002920332597568631, + -0.00731270294636488, + 0.04419538751244545, + 0.024069754406809807, + 0.012176074087619781, + 0.004615467507392168, + -0.04112132638692856, + -0.04844773933291435, + -0.012684458866715431, + 0.0071298484690487385, + -0.010914848186075687, + -0.03592529892921448, + -0.05016973987221718, + -0.011797907762229443, + -0.043843258172273636, + -0.03715396672487259, + 0.016528192907571793, + 0.024301515892148018, + 0.01335576456040144, + 0.021006477996706963, + -0.021391959860920906, + 0.05299517139792442, + 0.0070807491429150105, + -0.08096124231815338, + -0.07334060221910477, + -0.034530941396951675, + -0.04421507194638252, + 0.010524646379053593, + 0.009575314819812775, + -0.031711090356111526, + 0.023479584604501724, + -0.04212309420108795, + 0.016264619305729866, + 0.03907531499862671, + -0.0011187525233253837, + -0.03998023644089699, + -0.027464834973216057, + -0.07113838940858841, + -0.028915319591760635, + -0.01282753050327301, + -0.0033073138911277056, + 0.026715606451034546, + -0.002769897459074855, + 0.020033732056617737, + 0.014502385631203651, + -0.017903830856084824, + 0.06932531297206879, + 0.0432068407535553, + 0.01685408502817154, + 0.04834728315472603, + -0.009553197771310806, + 0.019799189642071724, + 0.01173039898276329, + 0.04158413037657738, + -0.018829666078090668, + -0.008410722948610783, + 0.008009687066078186, + 0.034592460840940475, + 0.07790639251470566, + -0.022050900384783745, + 0.04081638529896736, + 0.046872470527887344, + 0.0010260086273774505, + -0.05322079360485077, + 0.009096509777009487, + -0.06831686198711395, + -0.01390997413545847, + -0.020475609228014946, + 0.017393099144101143, + -0.007532020565122366, + -0.06435851007699966, + -0.014785194769501686, + 0.02654031664133072, + 0.004753720946609974, + 0.026440177112817764, + -0.028890414163470268, + -0.011440729722380638, + 0.003554105758666992, + -0.0022926912643015385, + -0.02393224649131298, + 0.03711748123168945, + -0.06023703143000603, + -0.008778683841228485, + -0.05984162166714668, + -0.024247022345662117, + -0.036919932812452316, + 0.05249374359846115, + 0.03022468276321888, + -0.011348876170814037, + 0.0008303995127789676, + 0.001597013440914452, + -0.015491127036511898, + -0.035073015838861465, + -0.024477796629071236, + -0.030328145250678062, + -0.09301470220088959, + -0.046440113335847855, + 0.036719027906656265, + -0.021899227052927017, + 0.04666316881775856, + -0.07481305301189423, + -0.04928148165345192, + -0.01480096485465765, + 0.0014140848070383072, + 0.016779841855168343, + -0.04318199306726456, + 0.011910341680049896, + -0.04019855335354805, + -0.027363713830709457, + 0.006433602888137102, + 0.023732252418994904, + -0.013081788085401058, + 0.02489032782614231, + 0.005415213759988546, + -0.058724161237478256, + 0.032487478107213974, + -0.014332194812595844, + -0.020952431485056877, + 0.055405858904123306, + -0.02239573374390602, + 0.016315918415784836, + 0.04710645601153374, + 0.006866136100143194, + -0.019589263945817947, + -0.046199049800634384, + 0.04977096989750862, + -0.03211359679698944, + 0.06759121268987656, + -0.007805021945387125, + 0.009877636097371578, + -0.003194598713889718, + -0.0014034705236554146, + 0.024012917652726173, + 0.0007609894964843988, + 0.04028927534818649, + 0.047299597412347794, + 0.04644732549786568, + 0.06253348290920258, + -0.03101237863302231, + -0.04797065258026123, + -0.02459110878407955, + -0.06663094460964203, + -0.012946722097694874, + -0.046321313828229904, + -0.03617801144719124, + -0.06608668714761734, + 0.01371682621538639, + -0.040183935314416885, + 0.027353622019290924, + -0.013125114142894745, + 0.020482128486037254, + -0.10186963528394699, + 0.03741387277841568, + -0.048566944897174835, + 0.0017904090927913785, + 0.0444694422185421, + -0.02355058304965496, + -0.04245513305068016, + 0.01599632203578949, + -0.00974870752543211, + -0.02246273122727871, + 0.011107604950666428, + -0.006354854442179203, + -0.08260829746723175, + -0.054969724267721176, + -0.038703542202711105, + -0.02590899169445038, + -0.012424441985785961, + 0.033952418714761734, + 0.032632969319820404, + 0.03585505858063698, + -0.027734532952308655, + -0.05185376852750778, + 0.005663866177201271, + 0.01415393128991127, + 0.007472912315279245, + -0.0325092077255249, + -0.0008526426972821355, + 0.05909401550889015, + -0.006496420595794916, + 0.06674317270517349, + 0.06033811718225479, + -0.04705937206745148, + 0.01221691444516182, + -0.005195186473429203, + 0.017006050795316696, + 0.015768419951200485, + -0.02346021682024002, + -0.04318040981888771, + -0.00965888798236847, + -0.012831253930926323, + -0.023086808621883392, + -0.043478451669216156, + 0.02215973101556301, + 0.01018955372273922, + -0.0029477940406650305, + -0.026364397257566452, + -0.04219489544630051, + -0.0690244510769844, + 0.0017003740649670362, + -0.03498053178191185, + -0.01891854591667652, + -0.020380523055791855, + -0.07183944433927536, + 0.01474913302809, + 0.012818068265914917, + 0.02298390306532383, + 0.006645163521170616, + -0.014497633092105389, + -0.05751577392220497, + -0.01127719134092331, + 0.014469895511865616, + 0.039319343864917755, + -0.002891098614782095, + 0.0038161359261721373, + -0.0176107045263052, + -0.02695712260901928, + 0.023520348593592644, + 0.053624920547008514, + -0.0472102165222168, + -0.021724319085478783, + -0.04204733297228813, + 0.004941252060234547, + -0.07744265347719193, + -0.028974706307053566, + -6.1493665270973e-05, + -0.020630594342947006, + -0.014794640243053436, + -0.045572925359010696, + 0.03233763575553894, + 0.00969443004578352, + 0.03665856271982193, + 0.027483846992254257, + 0.074271060526371, + -0.07454165071249008, + -0.034101732075214386, + -0.07216823101043701, + -0.001424514572136104, + -0.0025912360288202763, + -0.002444307319819927, + -0.012540637515485287, + 0.009027975611388683, + 0.06855443120002747, + -0.0013480151537805796, + 0.027303414419293404, + -0.019723499193787575, + 0.033644214272499084, + -0.04313155263662338, + -0.016152892261743546, + -0.020085612311959267, + 0.029526935890316963, + 0.0004591972683556378, + -0.013712934218347073, + 0.015895912423729897, + -0.046559300273656845, + -0.00015638815239071846, + 0.0015497541753575206, + -0.0015048328787088394, + 0.06692329794168472, + 0.0013934546150267124, + 0.008921030908823013, + -0.010347972624003887, + -0.039798807352781296, + 0.06892028450965881, + 0.021145053207874298, + 0.007431029342114925, + -0.05281573906540871, + 0.015844792127609253, + 0.014578046277165413, + -0.0020482230465859175, + 0.03509555384516716, + -0.021227506920695305, + -0.03619229048490524, + 0.004116897005587816, + 0.02835669554769993, + -0.0028248224407434464, + 0.00836214143782854, + -0.004688165616244078, + 0.04566347226500511, + -0.0352579727768898, + -0.007859165780246258, + -0.003958444111049175, + 0.023938892409205437, + 0.04262895882129669, + -0.02836589328944683, + 0.0456448458135128, + -0.062015753239393234, + 0.03518408164381981, + 0.06333593279123306, + -0.0155468275770545, + 0.013991734012961388, + 0.02207978442311287, + 0.0032898876816034317, + 0.05948015674948692, + 0.010670959949493408, + -0.00624996330589056, + -0.04401599243283272, + -0.0022705462761223316 + ], + "content_embedding": [ + -0.02403288148343563, + 0.08599621057510376, + -0.003520619124174118, + -0.002186001278460026, + -0.002845448674634099, + 0.005484029185026884, + 0.016410797834396362, + -0.02119613252580166, + -0.04811510443687439, + -0.013274849392473698, + -0.043441254645586014, + 0.009376521222293377, + 0.019551504403352737, + 0.0036566888447850943, + 0.004842979833483696, + -0.006052782759070396, + 0.025645455345511436, + -0.012315846979618073, + -0.027703408151865005, + 0.02152254432439804, + 0.011010917834937572, + -0.012258552014827728, + -0.01729186624288559, + -0.02833859808743, + 0.04027653858065605, + -0.009032614529132843, + -0.017036406323313713, + 0.003077560570091009, + -0.04315951466560364, + 0.029963837936520576, + 0.01716647669672966, + -0.02696092799305916, + -0.006828296463936567, + -0.026644738391041756, + -0.0889354720711708, + -0.05207673832774162, + 0.05015599727630615, + -0.019386274740099907, + -0.03684607893228531, + 0.046758152544498444, + 0.05076799914240837, + 0.007221075240522623, + 0.016260067000985146, + -0.03850802034139633, + 0.054756514728069305, + -0.012776038609445095, + 0.036689598113298416, + -0.02616005390882492, + 0.033269986510276794, + -0.05991198495030403, + -0.00043452056706883013, + -0.004230191465467215, + -0.008319171145558357, + 0.0068639665842056274, + -0.016133679077029228, + 0.005670355167239904, + -0.005839305464178324, + 0.027315128594636917, + 0.04275438189506531, + -0.05024448782205582, + 0.05199997127056122, + 0.05441230162978172, + -0.049353599548339844, + -0.008365850895643234, + 0.0066192797385156155, + -0.055218830704689026, + 0.01654958724975586, + 0.013456150889396667, + -0.01148252934217453, + -0.014086110517382622, + 0.024422504007816315, + -0.001102397684007883, + -0.022180721163749695, + 0.022023402154445648, + -0.03140854462981224, + 0.0038229606579989195, + 0.01081792451441288, + -0.006220541428774595, + -0.02462770976126194, + 0.051595576107501984, + 0.04433179274201393, + -0.05173564329743385, + 0.00422016903758049, + 0.01004322525113821, + 0.01985878124833107, + 0.028202056884765625, + -0.0016253730282187462, + 0.01617475040256977, + 0.010118816047906876, + 0.0603351853787899, + 0.0140571603551507, + 0.0029350141994655132, + 0.04240429773926735, + 0.06991686671972275, + -0.10119865089654922, + 0.08219177275896072, + 0.02891121245920658, + 0.00130809610709548, + -0.016763439401984215, + -0.00509023480117321, + -0.0035567383747547865, + 0.02000737562775612, + -0.002168829319998622, + 0.01889166608452797, + 0.04547121003270149, + 0.04019659012556076, + 0.024593960493803024, + 0.017190879210829735, + 0.007603269536048174, + 0.007314966060221195, + -0.06791973859071732, + -0.036731328815221786, + 0.007499238010495901, + 0.02694091759622097, + -0.02129160799086094, + 0.021507520228624344, + 0.09310256689786911, + -0.03434507176280022, + 0.006634920369833708, + 0.03453971818089485, + 0.0155464056879282, + 0.003056582296267152, + -0.004192651715129614, + 0.03274714946746826, + 0.04909229651093483, + 0.025430802255868912, + -0.01268640998750925, + -0.047261349856853485, + 0.0018452038057148457, + -0.0002589405339676887, + -0.07050265371799469, + 0.004126655403524637, + -0.07842830568552017, + 0.00013916153693571687, + 0.047662656754255295, + 0.01362426858395338, + -0.07088430225849152, + -0.026547620072960854, + 0.010091855190694332, + -0.015962867066264153, + 0.03528159111738205, + 0.011798265390098095, + 0.020107097923755646, + -0.013524978421628475, + 0.016901858150959015, + -0.08753035962581635, + -0.062227677553892136, + -0.028078285977244377, + -0.03297634422779083, + 0.008013743907213211, + 0.018041228875517845, + -0.022115394473075867, + 0.00595641927793622, + 0.019160043448209763, + 0.008510938845574856, + -0.0474565215408802, + -0.038813307881355286, + -0.016643475741147995, + 0.06800767034292221, + 0.041471801698207855, + 0.03696686029434204, + 0.03421548008918762, + -0.03440884128212929, + 0.06769654899835587, + -0.01683412306010723, + 0.028133966028690338, + 0.018801912665367126, + 0.015075244940817356, + 0.012032945640385151, + 0.03569433093070984, + -0.021484674885869026, + -0.01213730126619339, + 0.023888660594820976, + -0.03447817265987396, + 0.03329891338944435, + -0.007350335828959942, + 0.06528840214014053, + -0.03317185863852501, + 0.024836916476488113, + -0.061747901141643524, + -0.01068184245377779, + -0.021780723705887794, + 0.06678029894828796, + 0.03825325518846512, + -0.02603997103869915, + 0.0200904980301857, + 0.025599440559744835, + -0.05657019838690758, + 0.028341008350253105, + -0.0439138226211071, + 0.05886855348944664, + -0.049358345568180084, + 0.014541592448949814, + 0.005707047879695892, + 0.008378228172659874, + -0.030232897028326988, + 0.06261618435382843, + -0.013355602510273457, + -0.036993358284235, + -0.028917213901877403, + -0.0680958554148674, + -0.027451951056718826, + -0.07567653805017471, + 0.014718701131641865, + -0.009075576439499855, + 0.013478180393576622, + 0.03208685666322708, + 0.031021032482385635, + 0.016195151954889297, + 0.008199494332075119, + 0.08357387781143188, + -0.01200099941343069, + 0.022620532661676407, + 0.05445336923003197, + -0.018056273460388184, + -0.04719870164990425, + 0.04062207415699959, + 0.0009855915559455752, + -0.0462096631526947, + 0.05879806727170944, + 0.03913828358054161, + -0.05726383253931999, + 0.02152623049914837, + 0.002137464936822653, + 0.01444965973496437, + -0.019534891471266747, + -0.0375588983297348, + -0.03905639797449112, + -0.027474306523799896, + -0.001400938956066966, + -0.033295221626758575, + -0.06691068410873413, + 0.0620984211564064, + 0.020130982622504234, + 0.027853885665535927, + 0.028353361412882805, + 0.007000260055065155, + -0.015244328416883945, + 0.028457706794142723, + 0.05079026147723198, + 0.0265045203268528, + -0.008008715696632862, + 0.011166643351316452, + -0.02545643411576748, + -0.09122578054666519, + -0.000896137673407793, + 0.0055070980452001095, + 0.023860882967710495, + -0.056958671659231186, + 0.002000730484724045, + 0.000531299039721489, + 0.01964678056538105, + 0.02459172159433365, + 0.010496687144041061, + 0.032775767147541046, + -0.040455516427755356, + -0.01898832432925701, + 0.048115238547325134, + 0.008294769562780857, + -0.02248159423470497, + -0.0020450311712920666, + -0.02413240633904934, + 0.0423247255384922, + -0.02917350083589554, + -0.0197658222168684, + 0.009233975782990456, + -0.02438087947666645, + -0.057745061814785004, + 0.020396480336785316, + -0.028454614803195, + -0.007276479620486498, + -0.0060751899145543575, + 0.016126802191138268, + 0.07733260095119476, + 0.0055052717216312885, + -0.0241200253367424, + -0.009856182150542736, + -0.01288821641355753, + 0.021394196897745132, + -0.0027394252829253674, + -0.057746946811676025, + -0.055244673043489456, + -0.03518827632069588, + 0.020108383148908615, + -0.037429675459861755, + 0.06402620673179626, + 0.014570947736501694, + 0.0011715830769389868, + 0.04670550301671028, + -0.03730842098593712, + -0.002726265462115407, + -0.03393309563398361, + 0.03357642516493797, + 0.006151925306767225, + 0.027046309784054756, + 0.06079886853694916, + 0.08915705978870392, + -0.040912795811891556, + -0.009531376883387566, + -0.008656186051666737, + -0.010746185667812824, + 0.011325616389513016, + 0.00910742674022913, + -0.00870103295892477, + -0.02257593534886837, + -0.008474824018776417, + -0.01126043125987053, + -0.006183316465467215, + 0.03318650647997856, + -0.005288233514875174, + -0.031032271683216095, + 0.02630523219704628, + 0.02767125330865383, + -0.01024201512336731, + -0.02395681105554104, + 0.07798302173614502, + 0.06453987956047058, + -0.005852920934557915, + 0.08618523925542831, + -0.009387078694999218, + 0.007869970984756947, + -0.03182069584727287, + -0.022106602787971497, + -0.0868132933974266, + 0.028115050867199898, + 0.07332660257816315, + -0.0037628302816301584, + 0.03760993853211403, + -0.027132470160722733, + 0.030093027278780937, + -0.037918947637081146, + 0.039932165294885635, + -0.07345228642225266, + -0.046965666115283966, + -0.0013359235599637032, + 0.00791996717453003, + 0.03006441704928875, + 0.04222951829433441, + -0.0141807422041893, + -0.021912341937422752, + -0.0065930006094276905, + -0.0038735137786716223, + -0.038659993559122086, + -0.057126715779304504, + 0.006521300878375769, + -0.030727874487638474, + -0.022539950907230377, + -0.06316803395748138, + 0.06865260004997253, + 0.031939368695020676, + -0.055947039276361465, + 0.0066061182878911495, + -0.014607742428779602, + -0.02204318344593048, + -0.05172397196292877, + 0.02495967596769333, + 0.07759078592061996, + 0.0027070387732237577, + 0.008220532909035683, + 0.02342107705771923, + 0.03180982172489166, + -0.03099866956472397, + 0.03512701019644737, + -0.03168865293264389, + 0.012847676873207092, + 0.06514899432659149, + 0.08987598121166229, + 0.0024377063382416964, + 0.02394464798271656, + -0.041963983327150345, + -0.004438851028680801, + 0.015682004392147064, + 0.0410960391163826, + 0.05460710451006889, + 0.057952240109443665, + 0.020986247807741165, + -0.08822161704301834, + 0.01074486318975687, + 0.014192330650985241, + -0.025726256892085075, + 0.0719577744603157, + 0.0021957557182759047, + 0.022048326209187508, + -0.04020603001117706, + 0.0014428661670535803, + -0.0357256680727005, + -0.030243121087551117, + -0.0376482829451561, + -0.020463477820158005, + -0.022432789206504822, + -0.03096373938024044, + -0.01816924288868904, + -0.05358648672699928, + 0.07382772862911224, + -0.014173741452395916, + 0.0201816875487566, + 0.006632740143686533, + 0.0025384302716702223, + 0.04055432602763176, + 0.0069578299298882484, + -0.019879184663295746, + -0.059168167412281036, + 0.028969064354896545, + 0.05784929171204567, + -0.002147398190572858, + 0.043272342532873154, + -0.004542165901511908, + -0.0482858382165432, + -0.004174860659986734, + 0.020621255040168762, + 0.04293094202876091, + 0.0292718093842268, + -0.00980047881603241, + -0.021710650995373726, + -0.02639775723218918, + -0.019148416817188263, + 0.09012293070554733, + -0.045379895716905594, + -0.023026002570986748, + -0.030370570719242096, + 0.008383749052882195, + 0.014925302006304264, + -0.0011008139699697495, + 0.006763918325304985, + -0.025421440601348877, + -0.004525069613009691, + 0.03806034475564957, + 0.005547006148844957, + 0.05000557750463486, + -0.005488873925060034, + 0.021936720237135887, + 0.020678924396634102, + -0.004737663082778454, + 0.040749210864305496, + -0.0533074289560318, + 0.025417080149054527, + 0.08257681876420975, + -0.005508026573807001, + -0.009805315174162388, + 0.07595512270927429, + -0.0018210643902420998, + -0.029579052701592445, + 0.009883117862045765, + -0.015399829484522343, + -0.017134232446551323, + 0.03538937494158745, + 0.0827752947807312, + 0.012051745317876339, + -0.07159247249364853, + -0.02079680748283863, + 0.03473742678761482, + 0.018268825486302376, + 0.023407628759741783, + -0.036390434950590134, + 0.07932467013597488, + 0.004754354711622, + -0.012676632963120937, + -0.06851805001497269, + 0.02255256660282612, + 0.03780437260866165, + 0.04691546410322189, + 0.018480120226740837, + 0.0005508657777681947, + 0.05573705583810806, + -0.009221675805747509, + -0.06587770581245422, + 0.015470701269805431, + -0.012271493673324585, + -0.025784730911254883, + 0.022757982835173607, + -0.01213389914482832, + 0.017422374337911606, + 0.012241406366229057, + 0.04379018396139145, + 0.01124424859881401, + 0.002584748901426792, + 0.02793707512319088, + -0.04307323694229126, + 0.03207562863826752, + 0.05286982282996178, + 0.01086041983217001, + 0.009665313176810741, + -0.054988693445920944, + 0.01324005052447319, + -0.04261464625597, + -0.02707112766802311, + -0.002658748533576727, + 0.03499991446733475, + -0.005491453222930431, + 0.006562606431543827, + 0.018722862005233765, + 0.07151596993207932, + -0.003824777202680707, + -0.04148973524570465, + -0.06528852880001068, + -0.018773429095745087, + -0.023220594972372055, + 0.021337825804948807, + 0.003552130190655589, + -0.07254927605390549, + 0.030997687950730324, + 0.009675328619778156, + -0.007739027962088585, + -0.001004970632493496, + -0.0009698161156848073, + -0.03183043375611305, + -0.003764253342524171, + -0.06521959602832794, + 0.0077109793201088905, + 0.008421109057962894, + 0.02024395577609539, + 0.06566902250051498, + 0.011374534107744694, + 0.040655992925167084, + 0.0274888314306736, + -0.0748000368475914, + 0.06930309534072876, + 0.014980202540755272, + -0.03328235074877739, + 0.07670122385025024, + -0.013236696831882, + 0.09516690671443939, + 0.0004450292617548257, + 0.01539886835962534, + -0.11376772075891495, + -0.0004633136559277773, + -0.023844275623559952, + 0.023186970502138138, + 0.0542912632226944, + 0.006978484336286783, + 0.03704620897769928, + 0.0761408805847168, + 0.0018389171455055475, + -0.02292831614613533, + 0.035566531121730804, + -0.06125196814537048, + -0.01740599237382412, + -0.03189321979880333, + -0.023606419563293457, + 0.0002929234178736806, + -0.032161861658096313, + -0.02417462132871151, + 0.007371667306870222, + 0.01384897343814373, + 0.0011207011993974447, + -0.054523780941963196, + -0.03664090484380722, + 0.012376014143228531, + 0.005946264136582613, + -0.05214802548289299, + 0.06363234668970108, + -0.01850913278758526, + 0.03264418616890907, + -0.08298838883638382, + 0.028580913320183754, + -0.06874261051416397, + 0.04560680687427521, + -0.01221420057117939, + -0.015291322953999043, + 0.011163976043462753, + -0.01707146316766739, + -0.021233493462204933, + 0.0009499920415692031, + -0.011884773150086403, + 0.031535957008600235, + -0.07693900167942047, + -0.030928723514080048, + 0.02938068099319935, + 0.013103127479553223, + 0.009228850714862347, + -0.04399878904223442, + -0.038614701479673386, + 0.021263988688588142, + 0.0270336102694273, + -0.0022124540992081165, + -0.032499391585588455, + 0.029354240745306015, + -0.028516946360468864, + -0.03277367725968361, + -0.04755333065986633, + -0.03938357159495354, + -0.029368583112955093, + 0.06943269073963165, + 0.017946777865290642, + -0.01990826241672039, + 0.014896579086780548, + -0.06675421446561813, + -0.04962918534874916, + 0.10290152579545975, + -0.05442032590508461, + 0.0268304031342268, + 0.01750801131129265, + 0.0006768505554646254, + -0.007724875118583441, + -0.05064627528190613, + 0.03560181334614754, + 0.005476392339915037, + 0.008490868844091892, + -0.005553610157221556, + -0.04698188602924347, + -0.025146158412098885, + 0.0026807712856680155, + 0.0254969522356987, + 0.005350390914827585, + 0.004036207217723131, + 0.02843003161251545, + 0.008211316540837288, + 0.03748054802417755, + -0.05300099402666092, + -0.012365839444100857, + -0.0130928261205554, + -0.03939966484904289, + -0.026050617918372154, + -0.04415596276521683, + -0.03128521516919136, + -0.0388399139046669, + 0.05186399444937706, + -0.049164481461048126, + 0.043122462928295135, + -0.0315178819000721, + 0.012280933558940887, + -0.0792573019862175, + 0.05075725167989731, + -0.04304235801100731, + 0.018651138991117477, + 0.03076835162937641, + -0.060538437217473984, + -0.023055853322148323, + 0.01177286822348833, + 0.058492839336395264, + 0.025716299191117287, + 0.009599392302334309, + -0.012054546736180782, + -0.027742642909288406, + -0.05367058888077736, + -0.026801493018865585, + -0.014112668111920357, + -0.006383270025253296, + 1.2056754712830298e-05, + 0.039540693163871765, + 0.02213987335562706, + -0.08540242910385132, + -0.04058465361595154, + 0.008699232712388039, + 0.031218260526657104, + 0.0021884969901293516, + 0.011582552455365658, + 0.025049764662981033, + 0.04276714473962784, + 0.009781924076378345, + 0.05123818293213844, + 0.07441077381372452, + -0.029336893931031227, + 0.02714505046606064, + 0.041163086891174316, + -0.006217346992343664, + 0.025060802698135376, + 0.023126818239688873, + -0.07503696531057358, + -0.0020585027523338795, + 0.005981603171676397, + -0.027166299521923065, + -0.020568007603287697, + 0.005853605456650257, + -0.006091856863349676, + -0.033637579530477524, + -0.039759427309036255, + -0.06260950118303299, + -0.024897020310163498, + 0.02462431788444519, + 0.01859314925968647, + 0.010398009791970253, + -0.00020126033632550389, + -0.06035298481583595, + -0.019108809530735016, + 0.042335279285907745, + -0.03559218347072601, + -0.02529655024409294, + -0.02809930220246315, + -0.05607590824365616, + -0.026691321283578873, + -0.026792382821440697, + 0.04120280221104622, + -0.015540994703769684, + -0.005803580395877361, + 0.020203134045004845, + -0.05952906608581543, + -0.004206392448395491, + -0.011308858171105385, + -0.037488050758838654, + 0.007830106653273106, + -0.009608179330825806, + 0.00015318443183787167, + -0.0684049054980278, + 0.0018899703864008188, + -0.023107590153813362, + -0.015158215537667274, + -0.030714333057403564, + 0.025599345564842224, + 0.018543586134910583, + -0.0075812251307070255, + 0.04323196783661842, + -0.005424505099654198, + 0.06189188361167908, + -0.01650432124733925, + 0.0035911088343709707, + 0.01841658726334572, + 0.012203766033053398, + -0.015994010493159294, + -0.0018007376929745078, + 0.011197488754987717, + -0.01184547133743763, + 0.06119342893362045, + -0.04449119791388512, + -0.010956074111163616, + 0.015267443843185902, + 0.03397256135940552, + -0.022375188767910004, + -0.010562969371676445, + -0.030176721513271332, + 0.0649082288146019, + -0.026252834126353264, + -0.043264783918857574, + 0.020383840426802635, + -0.014332938008010387, + -0.019906938076019287, + 0.002487052930518985, + 0.016441592946648598, + 0.05937374755740166, + 0.029459767043590546, + -0.03393784165382385, + -0.016614725813269615, + -0.03633803129196167, + 0.04786395654082298, + -0.014543719589710236, + 0.0030611655674874783, + -0.03296193480491638, + 0.024570109322667122, + -0.08628548681735992, + -0.008082202635705471, + 0.05895440652966499, + -0.05567137897014618, + -0.05882163718342781, + -0.005672273691743612, + -0.022155780345201492, + -0.03165644034743309, + -0.04472680389881134, + 0.025338545441627502, + 0.053867027163505554, + -0.020717058330774307, + -0.019026240333914757, + -0.03037080727517605, + -0.005734192673116922, + -0.014109884388744831, + -0.005240253172814846, + 0.056925658136606216, + 0.006881027482450008, + 0.006321505177766085, + 0.025533199310302734, + 0.0066923401318490505, + 0.014867548830807209, + 0.01877731829881668, + -0.03573253005743027, + 0.05504361167550087, + 0.044875118881464005, + 0.008996511809527874, + -0.020861415192484856, + 0.0196152962744236 + ], + "chunk_ind": 0 + }, + { + "url": "https://docs.danswer.dev/more/use_cases/enterprise_search", + "title": "Enterprise Search", + "content": "More than Search\nWhen analyzing the entire corpus of knowledge within your company is as easy as asking a question in a search bar, your entire team can stay informed and up to date. Danswer also makes it trivial to identify where knowledge is well documented and where it is lacking. Team members who are centers of knowledge can begin to effectively document their expertise since it is no longer being thrown into a black hole. All of this allows the organization to achieve higher efficiency and drive business outcomes.\n\nWith Generative AI, the entire user experience has evolved as well. For example, instead of just finding similar cases for your customer support team to reference, Danswer breaks down the issue and explains it so that even the most junior members can understand it. This in turn lets them give the most holistic and technically accurate response possible to your customers. On the other end, even the super stars of your sales team will not be able to review 10 hours of transcripts before hopping on that critical call, but Danswer can easily parse through it in mere seconds and give crucial context to help your team close.", + "title_embedding": [ + -0.011060578748583794, + 0.05994883179664612, + 0.008845113217830658, + 0.011364905163645744, + 0.03147757425904274, + 0.04506697878241539, + -0.025942707434296608, + -0.011002028360962868, + -0.03507396578788757, + -0.01727098599076271, + -0.016820134595036507, + 0.04671240597963333, + 0.023456331342458725, + -0.005752791650593281, + -0.011421029455959797, + -0.04169125109910965, + 0.0652366429567337, + -0.011136278510093689, + -0.013501451350748539, + -0.006273901090025902, + 0.0012236927868798375, + -0.030771249905228615, + 0.010098040103912354, + -0.02360220067203045, + 0.006734110414981842, + 0.001379420980811119, + -0.0047225081361830235, + -0.013901330530643463, + 0.014645840041339397, + -0.02156134508550167, + -0.026707857847213745, + -0.00676271365955472, + 0.056067030876874924, + -0.0455806590616703, + -0.02762053906917572, + -0.0965149849653244, + 0.04567359760403633, + 0.053895801305770874, + 0.029093541204929352, + 0.0199823547154665, + 0.047089505940675735, + 0.026028119027614594, + -0.0034626282285898924, + -0.021002190187573433, + 0.04217635095119476, + -0.015198000706732273, + 0.047393105924129486, + 0.008588545024394989, + 0.07058116048574448, + -0.09135723114013672, + -0.009591161273419857, + 0.014971816912293434, + -8.222273208957631e-07, + -0.05534408614039421, + -0.029965048655867577, + -0.028381407260894775, + 0.025547217577695847, + 0.038583844900131226, + 0.01984122209250927, + -0.02435469813644886, + 0.04955849424004555, + 0.0128632802516222, + -0.022573867812752724, + 0.025284791365265846, + 0.04496009647846222, + 0.0004200333496555686, + -0.0015001222491264343, + 0.02609623409807682, + 0.023677939549088478, + -0.05961468443274498, + 0.06799754500389099, + -0.014409428462386131, + -0.010374268516898155, + 0.019634529948234558, + -0.03720257058739662, + -0.04654879495501518, + 0.0056068566627800465, + -0.021244503557682037, + -0.03198164328932762, + 0.05707596614956856, + 0.043927326798439026, + -0.033356692641973495, + 0.015723733231425285, + -0.027493512257933617, + 0.04525380581617355, + 0.025808652862906456, + -0.007083983160555363, + -0.011038954369723797, + -0.004390218295156956, + -0.006583297159522772, + 0.003319315379485488, + 0.061810243874788284, + 0.05887124314904213, + 0.07722297310829163, + -0.06601747870445251, + 0.036486171185970306, + 0.05119618400931358, + 0.00945530366152525, + -0.03005358576774597, + -0.047870855778455734, + -0.0359003059566021, + -0.005699747242033482, + 0.053807053714990616, + -0.001554036163724959, + 0.060548700392246246, + 0.05476829782128334, + 0.00426551653072238, + -0.005215689539909363, + -0.0352163203060627, + -0.03284529969096184, + -0.03373449295759201, + 0.021254340186715126, + -0.010481598787009716, + 0.02651101164519787, + -0.00481840968132019, + 0.045160870999097824, + 0.09704204648733139, + -0.03473421558737755, + 0.015584945678710938, + 0.06579536944627762, + 0.0651017278432846, + 0.007380738854408264, + 0.00624364148825407, + 0.07893780618906021, + -0.019876087084412575, + 0.006619091611355543, + -0.030776498839259148, + -0.016426965594291687, + 0.014603338204324245, + -0.03326896205544472, + 0.003784433240070939, + 0.025205042213201523, + 0.03047170303761959, + -0.03364298865199089, + 0.005974944215267897, + 0.015994269400835037, + -0.10205432027578354, + -0.026497431099414825, + 0.07166463136672974, + -0.007370935752987862, + 0.034912627190351486, + -0.004656887147575617, + 0.03219066560268402, + -0.02239271067082882, + 0.012679396197199821, + -0.07867992669343948, + -0.026638884097337723, + -0.008346030488610268, + -0.027243634685873985, + 0.012043806724250317, + 0.024078860878944397, + 0.0006219774950295687, + 0.0032065671402961016, + -0.008564895950257778, + -0.03275461867451668, + -0.05791788548231125, + -0.044815272092819214, + 0.006680188700556755, + 0.04072298854589462, + 0.016144724562764168, + -0.0008583687013015151, + 0.03699830546975136, + -0.04675738513469696, + 0.06570404022932053, + -0.011776894330978394, + -0.011386243626475334, + -0.0003363603027537465, + 0.039485324174165726, + 0.014156803488731384, + 0.018139634281396866, + -0.014710970222949982, + -0.052651647478342056, + 0.02912742830812931, + 0.024101730436086655, + 0.0413704477250576, + 0.03631320223212242, + 0.046750932931900024, + -0.017062805593013763, + 0.017990263178944588, + -0.03954370319843292, + -0.006972718983888626, + -0.03784368559718132, + 0.02746269851922989, + 0.04107878357172012, + -0.005694024730473757, + -0.03896583244204521, + 0.026265999302268028, + -0.035318680107593536, + -0.018394622951745987, + 0.013594037853181362, + 0.0381510891020298, + -0.010223937220871449, + 0.032417282462120056, + 0.01610656827688217, + -0.013205642811954021, + -0.03757423907518387, + 0.03799910470843315, + -0.039449408650398254, + -0.011290505528450012, + -0.016824476420879364, + 0.007128347177058458, + 0.030213234946131706, + -0.09385695308446884, + 0.014417118392884731, + -0.021249795332551003, + -0.021371016278862953, + 0.031582340598106384, + -0.015021033585071564, + 0.03207740932703018, + 0.04465494304895401, + 0.051139406859874725, + -0.004539252258837223, + -0.004026447422802448, + 0.036198731511831284, + 0.002513982355594635, + -0.022555746138095856, + -0.023142442107200623, + 0.026506206020712852, + -0.0208470169454813, + 0.01958189532160759, + 0.02606782503426075, + -0.050900157541036606, + 0.001175468903966248, + 0.0026185859460383654, + 0.01644700951874256, + 0.047048378735780716, + -0.006155692040920258, + 0.013264120556414127, + -0.004277337808161974, + 0.022337032482028008, + -0.030710609629750252, + -0.06784506887197495, + 0.010662010870873928, + -0.020733945071697235, + -0.01206474844366312, + -0.0005046974983997643, + -0.004159707576036453, + 0.028128545731306076, + -0.011551725678145885, + 0.057953692972660065, + 0.028500419110059738, + 0.02070418931543827, + 0.029373178258538246, + -0.053878165781497955, + -0.03885475918650627, + -0.011427262797951698, + -0.040592946112155914, + 0.019192807376384735, + -0.013966009952127934, + 0.002324307570233941, + 0.027266129851341248, + 0.02721570059657097, + -0.013851913623511791, + 0.06292124837636948, + -0.019983768463134766, + -0.06498263776302338, + -0.014787066727876663, + 0.07545251399278641, + 0.009921795688569546, + -0.02266773208975792, + -0.0174646507948637, + -0.0037801002617925406, + 0.037214070558547974, + -0.033669255673885345, + -0.0033054312225431204, + -0.004362864885479212, + -0.010861773043870926, + -0.041649043560028076, + 0.02711806818842888, + -0.001099557732231915, + 0.0007163260015659034, + 0.01317980233579874, + -0.011158796027302742, + 0.03966476768255234, + 0.023275790736079216, + -0.011645027436316013, + 0.0030634249560534954, + -0.01243121363222599, + 0.01271719578653574, + 0.003938829991966486, + -0.00769989937543869, + -0.039121564477682114, + 0.0005735178128816187, + 0.02157283015549183, + 0.005828005261719227, + 0.03934130072593689, + 0.015216797590255737, + 0.017237801104784012, + -0.037648268043994904, + -0.007132838945835829, + -0.018956882879137993, + -0.0597093440592289, + 0.058341480791568756, + -0.0008284997311420739, + 0.02095271646976471, + 0.043099164962768555, + 0.09887702018022537, + -0.01221393421292305, + -0.02239784225821495, + 0.016775032505393028, + 0.013331425376236439, + -0.004451168701052666, + -0.02870352193713188, + -0.020854242146015167, + 0.05349724739789963, + 0.03315908834338188, + 0.018541062250733376, + -0.03136591613292694, + 0.03549784794449806, + -0.0076525891199707985, + -0.06454484909772873, + 0.049847088754177094, + 0.012184737250208855, + 0.03575005754828453, + -0.050804175436496735, + 0.09406977146863937, + 0.05103312432765961, + -0.0036910600028932095, + 0.10705005377531052, + 0.011394658125936985, + -0.014218435622751713, + -0.042272791266441345, + 0.018426422029733658, + -0.08213183283805847, + -0.010240674018859863, + 0.051353540271520615, + 0.016103247180581093, + 0.04293083772063255, + -0.00462630670517683, + 0.001971749123185873, + -0.05101824551820755, + -0.017815101891756058, + -0.0788436159491539, + -0.019784294068813324, + 0.006863154470920563, + 0.04096531495451927, + 0.016416994854807854, + 0.018884461373090744, + -0.03645262494683266, + -0.02363709919154644, + 0.08447448164224625, + 0.027652855962514877, + -0.005039512179791927, + -0.05533800646662712, + 0.006148343440145254, + -0.03248206898570061, + -0.015117023140192032, + -0.056908346712589264, + 0.057090409100055695, + 0.02987913228571415, + -0.0642392635345459, + -0.01212853193283081, + -0.04195745661854744, + -0.008033841848373413, + -0.05249612778425217, + 0.05965931713581085, + 0.08591161668300629, + -0.012983623892068863, + 0.002055486897006631, + -0.002928174799308181, + -0.023014886304736137, + -0.05307631567120552, + 0.0325687900185585, + -0.008586175739765167, + -0.005393583793193102, + 0.009566529653966427, + 0.06500132381916046, + -0.02100509963929653, + -0.018470296636223793, + 0.001247459789738059, + 0.007388024125248194, + 0.012469757348299026, + 0.08475572615861893, + 0.06918514519929886, + 0.054265547543764114, + 0.03292711451649666, + -0.08437038213014603, + 0.07744771242141724, + -0.0004291488730814308, + -0.020394261926412582, + 0.039096955209970474, + 0.015851527452468872, + -0.009922537952661514, + 0.02087295800447464, + -0.019477976486086845, + -0.06510577350854874, + 0.008559669367969036, + 0.015032066963613033, + -0.022979427129030228, + -0.017166415229439735, + -0.014456263743340969, + -0.034205030649900436, + -0.04903494939208031, + 0.073653943836689, + -0.041798241436481476, + 0.0035302129108458757, + 0.031043095514178276, + 0.038764648139476776, + 0.03582717105746269, + -0.003121789079159498, + 0.03909862041473389, + -0.03283870965242386, + 0.06343409419059753, + 0.085169717669487, + 0.0037416887935250998, + 0.043896209448575974, + -0.02215113304555416, + -0.04062772914767265, + -0.029482074081897736, + 0.0013964198296889663, + 0.04621904715895653, + 0.030072476714849472, + -0.023583346977829933, + -0.016047311946749687, + -0.04016166180372238, + -0.026690224185585976, + 0.034725841134786606, + -0.08011004328727722, + -0.023635270074009895, + -0.01675681211054325, + 0.02217511460185051, + -0.018720457330346107, + 0.0413116030395031, + -0.0045730252750217915, + -0.08402986079454422, + 0.03641941770911217, + 0.028000695630908012, + 0.042173273861408234, + 0.024761751294136047, + -0.051845893263816833, + -0.07877497375011444, + -0.020710380747914314, + -0.035789184272289276, + 0.04824375733733177, + -0.04493764415383339, + -0.0014088008319959044, + 0.09272980690002441, + -0.030772028490900993, + 0.027623610571026802, + -0.0008853759500198066, + -0.015347420237958431, + -0.0006863650633022189, + 0.02924676053225994, + 0.03864092007279396, + -0.043402496725320816, + 0.11410719156265259, + 0.01606914773583412, + 0.03158045932650566, + -0.049648500978946686, + -0.026801105588674545, + 0.013934214599430561, + -0.04582132399082184, + -0.02133217453956604, + 0.013296819292008877, + 0.030687933787703514, + 0.0014671665849164128, + 0.005454834550619125, + -0.024595070630311966, + 0.036868833005428314, + -0.003586424048990011, + -0.007300499361008406, + 0.00619609747081995, + 0.004614396020770073, + 0.06406176835298538, + 0.010256785899400711, + -0.050202082842588425, + -0.013155301101505756, + -0.04005127400159836, + -0.027943719178438187, + 0.05738724395632744, + -0.002920332597568631, + -0.00731270294636488, + 0.04419538751244545, + 0.024069754406809807, + 0.012176074087619781, + 0.004615467507392168, + -0.04112132638692856, + -0.04844773933291435, + -0.012684458866715431, + 0.0071298484690487385, + -0.010914848186075687, + -0.03592529892921448, + -0.05016973987221718, + -0.011797907762229443, + -0.043843258172273636, + -0.03715396672487259, + 0.016528192907571793, + 0.024301515892148018, + 0.01335576456040144, + 0.021006477996706963, + -0.021391959860920906, + 0.05299517139792442, + 0.0070807491429150105, + -0.08096124231815338, + -0.07334060221910477, + -0.034530941396951675, + -0.04421507194638252, + 0.010524646379053593, + 0.009575314819812775, + -0.031711090356111526, + 0.023479584604501724, + -0.04212309420108795, + 0.016264619305729866, + 0.03907531499862671, + -0.0011187525233253837, + -0.03998023644089699, + -0.027464834973216057, + -0.07113838940858841, + -0.028915319591760635, + -0.01282753050327301, + -0.0033073138911277056, + 0.026715606451034546, + -0.002769897459074855, + 0.020033732056617737, + 0.014502385631203651, + -0.017903830856084824, + 0.06932531297206879, + 0.0432068407535553, + 0.01685408502817154, + 0.04834728315472603, + -0.009553197771310806, + 0.019799189642071724, + 0.01173039898276329, + 0.04158413037657738, + -0.018829666078090668, + -0.008410722948610783, + 0.008009687066078186, + 0.034592460840940475, + 0.07790639251470566, + -0.022050900384783745, + 0.04081638529896736, + 0.046872470527887344, + 0.0010260086273774505, + -0.05322079360485077, + 0.009096509777009487, + -0.06831686198711395, + -0.01390997413545847, + -0.020475609228014946, + 0.017393099144101143, + -0.007532020565122366, + -0.06435851007699966, + -0.014785194769501686, + 0.02654031664133072, + 0.004753720946609974, + 0.026440177112817764, + -0.028890414163470268, + -0.011440729722380638, + 0.003554105758666992, + -0.0022926912643015385, + -0.02393224649131298, + 0.03711748123168945, + -0.06023703143000603, + -0.008778683841228485, + -0.05984162166714668, + -0.024247022345662117, + -0.036919932812452316, + 0.05249374359846115, + 0.03022468276321888, + -0.011348876170814037, + 0.0008303995127789676, + 0.001597013440914452, + -0.015491127036511898, + -0.035073015838861465, + -0.024477796629071236, + -0.030328145250678062, + -0.09301470220088959, + -0.046440113335847855, + 0.036719027906656265, + -0.021899227052927017, + 0.04666316881775856, + -0.07481305301189423, + -0.04928148165345192, + -0.01480096485465765, + 0.0014140848070383072, + 0.016779841855168343, + -0.04318199306726456, + 0.011910341680049896, + -0.04019855335354805, + -0.027363713830709457, + 0.006433602888137102, + 0.023732252418994904, + -0.013081788085401058, + 0.02489032782614231, + 0.005415213759988546, + -0.058724161237478256, + 0.032487478107213974, + -0.014332194812595844, + -0.020952431485056877, + 0.055405858904123306, + -0.02239573374390602, + 0.016315918415784836, + 0.04710645601153374, + 0.006866136100143194, + -0.019589263945817947, + -0.046199049800634384, + 0.04977096989750862, + -0.03211359679698944, + 0.06759121268987656, + -0.007805021945387125, + 0.009877636097371578, + -0.003194598713889718, + -0.0014034705236554146, + 0.024012917652726173, + 0.0007609894964843988, + 0.04028927534818649, + 0.047299597412347794, + 0.04644732549786568, + 0.06253348290920258, + -0.03101237863302231, + -0.04797065258026123, + -0.02459110878407955, + -0.06663094460964203, + -0.012946722097694874, + -0.046321313828229904, + -0.03617801144719124, + -0.06608668714761734, + 0.01371682621538639, + -0.040183935314416885, + 0.027353622019290924, + -0.013125114142894745, + 0.020482128486037254, + -0.10186963528394699, + 0.03741387277841568, + -0.048566944897174835, + 0.0017904090927913785, + 0.0444694422185421, + -0.02355058304965496, + -0.04245513305068016, + 0.01599632203578949, + -0.00974870752543211, + -0.02246273122727871, + 0.011107604950666428, + -0.006354854442179203, + -0.08260829746723175, + -0.054969724267721176, + -0.038703542202711105, + -0.02590899169445038, + -0.012424441985785961, + 0.033952418714761734, + 0.032632969319820404, + 0.03585505858063698, + -0.027734532952308655, + -0.05185376852750778, + 0.005663866177201271, + 0.01415393128991127, + 0.007472912315279245, + -0.0325092077255249, + -0.0008526426972821355, + 0.05909401550889015, + -0.006496420595794916, + 0.06674317270517349, + 0.06033811718225479, + -0.04705937206745148, + 0.01221691444516182, + -0.005195186473429203, + 0.017006050795316696, + 0.015768419951200485, + -0.02346021682024002, + -0.04318040981888771, + -0.00965888798236847, + -0.012831253930926323, + -0.023086808621883392, + -0.043478451669216156, + 0.02215973101556301, + 0.01018955372273922, + -0.0029477940406650305, + -0.026364397257566452, + -0.04219489544630051, + -0.0690244510769844, + 0.0017003740649670362, + -0.03498053178191185, + -0.01891854591667652, + -0.020380523055791855, + -0.07183944433927536, + 0.01474913302809, + 0.012818068265914917, + 0.02298390306532383, + 0.006645163521170616, + -0.014497633092105389, + -0.05751577392220497, + -0.01127719134092331, + 0.014469895511865616, + 0.039319343864917755, + -0.002891098614782095, + 0.0038161359261721373, + -0.0176107045263052, + -0.02695712260901928, + 0.023520348593592644, + 0.053624920547008514, + -0.0472102165222168, + -0.021724319085478783, + -0.04204733297228813, + 0.004941252060234547, + -0.07744265347719193, + -0.028974706307053566, + -6.1493665270973e-05, + -0.020630594342947006, + -0.014794640243053436, + -0.045572925359010696, + 0.03233763575553894, + 0.00969443004578352, + 0.03665856271982193, + 0.027483846992254257, + 0.074271060526371, + -0.07454165071249008, + -0.034101732075214386, + -0.07216823101043701, + -0.001424514572136104, + -0.0025912360288202763, + -0.002444307319819927, + -0.012540637515485287, + 0.009027975611388683, + 0.06855443120002747, + -0.0013480151537805796, + 0.027303414419293404, + -0.019723499193787575, + 0.033644214272499084, + -0.04313155263662338, + -0.016152892261743546, + -0.020085612311959267, + 0.029526935890316963, + 0.0004591972683556378, + -0.013712934218347073, + 0.015895912423729897, + -0.046559300273656845, + -0.00015638815239071846, + 0.0015497541753575206, + -0.0015048328787088394, + 0.06692329794168472, + 0.0013934546150267124, + 0.008921030908823013, + -0.010347972624003887, + -0.039798807352781296, + 0.06892028450965881, + 0.021145053207874298, + 0.007431029342114925, + -0.05281573906540871, + 0.015844792127609253, + 0.014578046277165413, + -0.0020482230465859175, + 0.03509555384516716, + -0.021227506920695305, + -0.03619229048490524, + 0.004116897005587816, + 0.02835669554769993, + -0.0028248224407434464, + 0.00836214143782854, + -0.004688165616244078, + 0.04566347226500511, + -0.0352579727768898, + -0.007859165780246258, + -0.003958444111049175, + 0.023938892409205437, + 0.04262895882129669, + -0.02836589328944683, + 0.0456448458135128, + -0.062015753239393234, + 0.03518408164381981, + 0.06333593279123306, + -0.0155468275770545, + 0.013991734012961388, + 0.02207978442311287, + 0.0032898876816034317, + 0.05948015674948692, + 0.010670959949493408, + -0.00624996330589056, + -0.04401599243283272, + -0.0022705462761223316 + ], + "content_embedding": [ + -0.01892169564962387, + 0.0662541389465332, + 0.008976679295301437, + -0.03809165209531784, + 0.02344459854066372, + 0.012984057888388634, + 0.016158411279320717, + 0.0040777078829705715, + -0.0321662537753582, + -0.0026544055435806513, + -0.03179372847080231, + 0.019741656258702278, + 0.049423426389694214, + 0.019327590242028236, + 0.01367267221212387, + -0.042058881372213364, + 0.023155249655246735, + -0.015003002248704433, + 0.01056167297065258, + 0.0032619787380099297, + -0.014582481235265732, + -0.01262009609490633, + -0.009695992805063725, + -0.025683948770165443, + 0.010330218821763992, + -0.043577518314123154, + -0.03799012303352356, + 0.03159527853131294, + -0.046592168509960175, + 0.03461733087897301, + 0.029929379001259804, + -0.02696100063621998, + 0.01958872564136982, + -0.04882275313138962, + -0.04835181683301926, + -0.07444816827774048, + 0.0615590400993824, + -0.018079139292240143, + -0.02907492406666279, + 0.03256160393357277, + 0.052772294729948044, + 0.0014335751766338944, + 0.02048010565340519, + -0.01859121397137642, + 0.0436980240046978, + -0.028847631067037582, + 0.06271578371524811, + -0.04908007010817528, + 0.04021253436803818, + -0.07390867173671722, + 0.0029745057690888643, + -0.01733274944126606, + -0.005066753830760717, + -0.006927797570824623, + -0.01495048776268959, + 0.020951012149453163, + -0.02161789871752262, + 0.004997345618903637, + 0.02517000213265419, + -0.03955457732081413, + 0.038905348628759384, + 0.008108963258564472, + -0.04058837890625, + 0.03415047749876976, + -0.004129728768020868, + -0.07600218057632446, + -0.008998502045869827, + 0.012445643544197083, + -0.005613638553768396, + -0.015701062977313995, + 0.010493642650544643, + -0.01511659286916256, + -0.007434363476932049, + 0.04920893907546997, + -0.044436678290367126, + -0.015229232609272003, + -0.009392009116709232, + -0.004889432340860367, + -0.03250344097614288, + 0.05671893432736397, + 0.03468514233827591, + -0.04985000938177109, + 0.021073583513498306, + 0.005558345932513475, + 0.04397028684616089, + -0.011105467565357685, + -0.0010204907739534974, + -0.0013343892060220242, + 0.010888955555856228, + 0.11187340319156647, + 0.05144372954964638, + 0.014714346267282963, + 0.03652629256248474, + 0.08354610204696655, + -0.050587598234415054, + 0.07670528441667557, + 0.022823045030236244, + -0.010303523391485214, + -0.00016479991609230638, + -0.015029380097985268, + -0.010333288460969925, + 0.03660477325320244, + 0.013327172957360744, + -0.008142965845763683, + 0.04656663164496422, + 0.043171610683202744, + 0.027440473437309265, + 0.011585040017962456, + -0.008035292848944664, + -0.0008554590749554336, + -0.04715310037136078, + -0.013419345021247864, + -0.034535810351371765, + 0.028465399518609047, + -0.030552269890904427, + 0.02954002656042576, + 0.11263657361268997, + -0.060091886669397354, + -0.004718341864645481, + 0.02276463434100151, + -0.029855655506253242, + 1.136395258072298e-05, + 0.01254600752145052, + 0.030318304896354675, + 0.04609473794698715, + 0.04090471565723419, + -0.015202691778540611, + -0.025406358763575554, + 0.01403091847896576, + -0.01206378173083067, + -0.034794360399246216, + 0.021181223914027214, + -0.041345320641994476, + 0.026389217004179955, + 0.04634319990873337, + 0.05973498523235321, + -0.0791369080543518, + -0.018549518659710884, + 0.009269041940569878, + 0.005099988542497158, + 0.016017470508813858, + 0.016928445547819138, + 0.004272987134754658, + -0.03169683367013931, + 0.008137955330312252, + -0.07982300966978073, + -0.037415798753499985, + -0.0016467635286971927, + -0.016258487477898598, + 0.01855027861893177, + 0.012749083340168, + -0.015595809556543827, + 0.009437683038413525, + 0.005881224758923054, + 0.009153603576123714, + -0.035431332886219025, + -0.03822671249508858, + -0.007053021341562271, + 0.07195861637592316, + 0.03834277018904686, + 0.025282155722379684, + 0.03235918655991554, + -0.040675584226846695, + 0.06914123892784119, + -0.014681060798466206, + 0.04182145744562149, + 0.016547678038477898, + 0.0302575696259737, + 0.027968881651759148, + 0.028392894193530083, + -0.03601876646280289, + 0.011166741140186787, + 0.013932433910667896, + -0.024813517928123474, + 0.04876561462879181, + 0.03280804678797722, + 0.020525190979242325, + -0.04888831451535225, + 0.05333299934864044, + -0.01227282639592886, + 0.009397462010383606, + -0.062118303030729294, + 0.020511150360107422, + 0.03606007248163223, + -0.011546325869858265, + 0.02632950246334076, + 0.03558770939707756, + -0.04729287326335907, + -0.00040853474638424814, + -0.05594595894217491, + 0.03343893215060234, + -0.03624171018600464, + -0.01565496437251568, + 0.03419746086001396, + -0.014939344488084316, + -0.0346553735435009, + 0.02617849037051201, + -0.018064821138978004, + 0.00044916238402947783, + -0.029752276837825775, + -0.06982599943876266, + -0.01529014203697443, + -0.10238753259181976, + 0.056908924132585526, + -0.018579944968223572, + 0.032441046088933945, + 0.02623467892408371, + 0.0005816647899337113, + 0.024393916130065918, + -0.0010619793320074677, + 0.09054756909608841, + 0.012866330333054066, + 0.0110749127343297, + 0.060603830963373184, + -0.04485912621021271, + -0.035673510283231735, + 0.00880404282361269, + -0.0236192774027586, + -0.04651271551847458, + 0.04936773329973221, + 0.016861658543348312, + -0.026910705491900444, + 0.02507326751947403, + 0.0018011556239798665, + -0.01599423959851265, + 0.007061067037284374, + -0.028597962111234665, + -0.005096979904919863, + -0.003091734368354082, + -0.008610324002802372, + -0.03941959887742996, + -0.07249880582094193, + 0.07896454632282257, + -0.01282701175659895, + 0.03806105628609657, + -0.01628866419196129, + -0.00032510326127521694, + 0.007600210607051849, + 0.012463843449950218, + 0.07028777152299881, + 0.024854836985468864, + 0.00597741175442934, + 0.012146051973104477, + -0.04252159595489502, + -0.08857864141464233, + 0.005069843493402004, + -0.002303875982761383, + 0.007218160200864077, + -0.054320499300956726, + 0.01721455715596676, + -0.012323171831667423, + 0.029316846281290054, + 0.010660098865628242, + 0.01619168184697628, + 0.024796800687909126, + -0.06043343245983124, + -0.009076021611690521, + 0.05426326394081116, + 0.024232488125562668, + -0.025832876563072205, + 0.024366402998566628, + -0.04501958563923836, + 0.04263340309262276, + -0.01757700741291046, + 0.0240378025919199, + 0.007873878814280033, + -0.019204245880246162, + -0.04099274054169655, + -0.0028695412911474705, + -0.02336733788251877, + -0.009908018633723259, + 0.01244357880204916, + 0.014616346918046474, + 0.07263968884944916, + -0.006017595529556274, + 0.006593986880034208, + -0.017023928463459015, + -0.0008568437770009041, + 0.0393415242433548, + -0.03193742036819458, + -0.07265064865350723, + -0.056716252118349075, + -0.06321432441473007, + 0.0014871162129566073, + 0.015271728858351707, + 0.06799189001321793, + 0.002235528314486146, + 0.015148743987083435, + 0.029075419530272484, + -0.036075517535209656, + 0.03699851781129837, + 0.002699150936678052, + 0.029273545369505882, + 0.024833064526319504, + 0.02166113816201687, + 0.07822758704423904, + 0.0907154381275177, + -0.015422212891280651, + -0.004725399427115917, + -0.013691544532775879, + 0.00014949020987842232, + 0.003309824038296938, + 0.019388742744922638, + -0.01792132295668125, + -0.005919941700994968, + -0.009184692986309528, + -0.00453580915927887, + -0.017324700951576233, + 0.020368218421936035, + 0.007512629963457584, + -0.05764073505997658, + 0.01584697514772415, + -0.016094518825411797, + -0.0366678424179554, + -0.02194156125187874, + 0.053442906588315964, + 0.04864593967795372, + -0.009642759338021278, + 0.06584249436855316, + 0.017993653193116188, + 0.02838297188282013, + -0.02758033573627472, + -0.018208689987659454, + -0.08217029273509979, + 0.001340706367045641, + 0.07344162464141846, + -0.0014725526561960578, + 0.027256185188889503, + -0.03795681148767471, + 0.03496084362268448, + -0.009351355955004692, + 0.03554052114486694, + -0.0647641122341156, + -0.018092816695570946, + -0.0003290708118584007, + -0.008958869613707066, + -0.0006743986159563065, + 0.02749652974307537, + -0.005728874355554581, + -0.00014254855341278017, + 0.02650611102581024, + -0.007747439201921225, + -0.036285076290369034, + -0.04723037779331207, + -0.01256555411964655, + -0.015652446076273918, + -0.0033896011300385, + -0.027379868552088737, + 0.06606956571340561, + 0.001414530212059617, + -0.03816799819469452, + 0.005582350306212902, + -0.0037654521875083447, + -0.03315531834959984, + -0.03833584487438202, + 0.005306297447532415, + 0.06055983901023865, + 0.017386972904205322, + 0.017846958711743355, + 0.002940434729680419, + 0.06065093353390694, + -0.033751003444194794, + 0.02014659158885479, + -0.026745468378067017, + 0.02349875122308731, + 0.06887564063072205, + 0.08784544467926025, + 0.0348343662917614, + 0.017027992755174637, + 0.007463646121323109, + 0.010731169953942299, + -0.015452216379344463, + 0.0697169378399849, + 0.06115807220339775, + 0.05587253347039223, + 0.0035254100803285837, + -0.06922555714845657, + -0.00895272009074688, + 0.04390031844377518, + 0.003160918829962611, + 0.0734192356467247, + -0.012384983710944653, + 0.00778034096583724, + -0.06225632503628731, + 0.01105977687984705, + -0.019027134403586388, + -0.01744268462061882, + -0.03861316666007042, + -0.026121554896235466, + -0.03796643018722534, + -0.02607419341802597, + -0.00727757578715682, + -0.04364367574453354, + 0.027054548263549805, + 0.001148495590314269, + -0.0051346817053854465, + -0.014047800563275814, + 0.033344950526952744, + 0.016461240127682686, + 0.033907197415828705, + -0.052207209169864655, + -0.058969806879758835, + 0.019914019852876663, + 0.04874560981988907, + 0.0043409536592662334, + 0.014156220480799675, + -0.025425465777516365, + -0.03806624561548233, + 0.027224158868193626, + -8.918229286791757e-05, + 0.04550011456012726, + 0.02069287933409214, + -0.006964664440602064, + -0.05213857442140579, + 0.03515300899744034, + -0.02322443015873432, + 0.07085354626178741, + 0.010733392089605331, + -0.04821530729532242, + -0.024944474920630455, + 0.01349271647632122, + 0.0064827692694962025, + 0.021682681515812874, + 0.03466835618019104, + -0.023484358564019203, + -0.004177657887339592, + 0.019195759668946266, + 0.021642865613102913, + 0.03591984510421753, + -6.837025284767151e-05, + 0.003064215648919344, + 0.0067205713130533695, + 0.024574855342507362, + 0.03467808663845062, + -0.07038415223360062, + 0.020557953044772148, + 0.05572228878736496, + 0.024007081985473633, + 0.008300675079226494, + 0.05382058024406433, + -0.008657778613269329, + -0.04247821494936943, + -0.02082398161292076, + -0.030047548934817314, + -0.0042150202207267284, + 0.0643019825220108, + 0.08603832125663757, + -0.0032497297506779432, + -0.05890907347202301, + -0.017683515325188637, + -0.0017970707267522812, + 0.030202442780137062, + -0.004163889214396477, + -0.005693267099559307, + 0.07439851015806198, + -0.007623215671628714, + -0.014011486433446407, + -0.06531509011983871, + -0.012002935633063316, + 0.05098460614681244, + 0.018368106335401535, + 0.044709816575050354, + -0.034841395914554596, + 0.04669453203678131, + -0.006633058190345764, + -0.06744810938835144, + 0.00022071562125347555, + -0.02252846583724022, + -0.008146141655743122, + 0.04570293799042702, + -0.017073389142751694, + 0.033481452614068985, + 0.02024919167160988, + -0.00039372473838739097, + -0.015125994570553303, + 0.0035840750206261873, + 0.03293292224407196, + -0.023488696664571762, + 0.02769201435148716, + 0.03366998955607414, + 0.013383373618125916, + -0.0062416414730250835, + -0.05436183512210846, + -0.007013875991106033, + -0.0343070924282074, + 0.008950931020081043, + -0.0007773659308440983, + 0.01631912775337696, + -0.01733097992837429, + 0.007631183601915836, + 0.022811884060502052, + 0.05997275933623314, + -0.025991076603531837, + -0.06607384979724884, + -0.0873650386929512, + -0.05788758397102356, + -0.020700229331851006, + 0.00862400233745575, + 0.008653292432427406, + -0.05257308855652809, + -0.01877412386238575, + 0.001132996054366231, + 0.007562611252069473, + 0.007040517870336771, + -0.03939346596598625, + -0.0012852386571466923, + 0.03364014998078346, + -0.08792895078659058, + 0.0003337061498314142, + 0.04566165804862976, + 0.022397097200155258, + 0.07704627513885498, + 0.011688907630741596, + 0.06875491887331009, + 0.031596384942531586, + -0.07542278617620468, + 0.06929827481508255, + 0.03525209799408913, + -0.05507253482937813, + 0.06310203671455383, + 0.009202172048389912, + 0.08802317827939987, + 0.015267971903085709, + 0.01631786674261093, + -0.08159693330526352, + 0.011958948336541653, + -0.022956276312470436, + -0.0045707738026976585, + 0.06590449810028076, + -0.025062261149287224, + 0.05683448538184166, + 0.08174461871385574, + 0.018841996788978577, + -0.02901572361588478, + 0.04103256016969681, + -0.06138996779918671, + -0.02983909286558628, + -0.03850552439689636, + -0.018056459724903107, + 0.00292590050958097, + -0.0737059935927391, + 0.00898703932762146, + -0.012909052893519402, + -0.00488039618358016, + 0.019017860293388367, + -0.037835441529750824, + -0.05031483247876167, + 0.025473300367593765, + -0.009489303454756737, + -0.08405261486768723, + 0.06039801985025406, + -0.028819533064961433, + 0.01564796455204487, + -0.07851359248161316, + 0.00776974530890584, + -0.0627446398139, + 0.043354298919439316, + -0.0447402149438858, + 0.008833021856844425, + -0.0005271312547847629, + -0.03740326315164566, + -0.033597033470869064, + 0.02730080671608448, + -0.030516251921653748, + 0.03767557814717293, + -0.10619816929101944, + -0.038678478449583054, + 0.02232091873884201, + 0.03868230804800987, + 0.018831931054592133, + -0.05178656801581383, + -0.05465080961585045, + 0.03249572589993477, + 0.009297838434576988, + -0.003563723061233759, + -0.04144677892327309, + 0.0509132519364357, + -0.02094709314405918, + -0.022470436990261078, + -0.04437573254108429, + -0.03695523366332054, + -0.075083889067173, + 0.07801777124404907, + 0.007801617495715618, + -0.005376672837883234, + 0.020604871213436127, + -0.06675189733505249, + 0.0027014226652681828, + 0.08348087221384048, + -0.031110215932130814, + -0.02220381610095501, + 0.021845143288373947, + 0.03032352775335312, + -0.0012008004123345017, + -0.03200481832027435, + 0.049666762351989746, + 0.005313111934810877, + -0.020655009895563126, + 0.007201225031167269, + -0.05322100222110748, + -0.03385355696082115, + 0.010354285128414631, + 0.04187091067433357, + 0.006058192811906338, + 0.005469379480928183, + 0.041591376066207886, + -0.023555509746074677, + 0.043303441256284714, + -0.04954344779253006, + -0.033017441630363464, + -0.01149839162826538, + -0.012791389599442482, + 0.001670036930590868, + -0.012347050942480564, + 0.0004881276981905103, + -0.031120697036385536, + 0.022906621918082237, + -0.050669725984334946, + 0.04269399866461754, + -0.011447146534919739, + -0.017906805500388145, + -0.06953153014183044, + 0.04467186704277992, + -0.04761233553290367, + -0.013187393546104431, + 0.05690088868141174, + -0.042590390890836716, + -0.01746809110045433, + 0.020567748695611954, + 0.05125907063484192, + 0.020307395607233047, + 0.03492629528045654, + -0.04882863909006119, + -0.03183748573064804, + -0.06539574265480042, + -0.01744089275598526, + -0.02758834883570671, + 0.0050849285908043385, + -0.00035606700112111866, + 0.023614611476659775, + 0.01930573768913746, + -0.05899752303957939, + -0.04627015441656113, + 0.0068423328921198845, + 0.03920449689030647, + -0.007687605917453766, + 0.016464397311210632, + 0.04807426780462265, + 0.0023120716214179993, + 0.01973593607544899, + 0.07794646173715591, + 0.07625434547662735, + -0.03674965724349022, + -0.0012999586760997772, + 0.009016714058816433, + 0.03811555355787277, + 0.017517905682325363, + 0.004926901776343584, + -0.07054422050714493, + -0.01442575640976429, + 0.01330371480435133, + -0.008963101543486118, + -0.009463613852858543, + 0.0017095000948756933, + -0.016330784186720848, + -0.017924489453434944, + -0.042089130729436874, + -0.06883884966373444, + -0.042998943477869034, + 0.014172191731631756, + -0.0023317155428230762, + -0.027441971004009247, + 0.004573931451886892, + -0.07700463384389877, + -0.013737251050770283, + 0.025464439764618874, + -0.02619084157049656, + -0.008323452435433865, + -0.03393486887216568, + -0.04159104451537132, + -0.004442669451236725, + -0.008337379433214664, + 0.05703001841902733, + -0.05177110433578491, + 0.002210760721936822, + 0.005930258426815271, + -0.0369490347802639, + -0.013454861007630825, + -0.004840550944209099, + -0.04600533843040466, + -0.010599354282021523, + -0.008193885907530785, + -0.029226260259747505, + -0.06824758648872375, + 0.002242376795038581, + -0.00545460032299161, + -0.016073163598775864, + -0.02212926186621189, + 0.014335459098219872, + 0.02033282071352005, + -0.01998221129179001, + 0.06560437381267548, + -0.007302496116608381, + 0.037101101130247116, + -0.015349503606557846, + -0.0149971479550004, + -0.003208655398339033, + 0.01065454725176096, + -0.010318529792129993, + 0.005211932118982077, + -0.007634020410478115, + 0.007333737798035145, + 0.04658440127968788, + -0.017371229827404022, + -0.02044561877846718, + 0.021157968789339066, + -0.005675977561622858, + -0.016465574502944946, + 0.001816042698919773, + -0.022665906697511673, + 0.04769016057252884, + -0.02464037574827671, + -0.05675514042377472, + 0.05963050201535225, + -0.01688731089234352, + -0.05340677872300148, + 0.010052076540887356, + 0.02069842256605625, + 0.028715714812278748, + 0.009125969372689724, + -0.02970687672495842, + -0.010313224978744984, + -0.03552298620343208, + 0.04363728687167168, + -0.03991911932826042, + -0.004784241318702698, + -0.044753339141607285, + 0.01931679993867874, + -0.056493211537599564, + -0.006617037579417229, + 0.035743631422519684, + -0.053424812853336334, + -0.02699253521859646, + -0.007951406762003899, + -0.009088664315640926, + -0.018690962344408035, + -0.04115553945302963, + 0.02701025828719139, + 0.0571308434009552, + -0.029878465458750725, + -0.03173048421740532, + -0.01149672456085682, + -0.0105333486571908, + -0.005241425707936287, + -0.02809373289346695, + 0.05968040972948074, + 0.0010212024208158255, + 0.042596235871315, + 0.04825957119464874, + -0.003983878996223211, + 0.016225650906562805, + 0.015263753943145275, + -0.023301145061850548, + 0.041719190776348114, + 0.028326746076345444, + 0.026445787400007248, + -0.022935770452022552, + 0.03078318201005459 + ], + "chunk_ind": 1 + }, + { + "url": "https://docs.danswer.dev/more/use_cases/ai_platform", + "title": "AI Platform", + "content": "Build AI Agents powered by the knowledge and workflows specific to your organization.\n\nBeyond Answers\nAgents enabled by generative AI and reasoning capable models are helping teams to automate their work. Danswer is helping teams make it happen. Danswer provides out of the box user chat sessions, attaching custom tools, handling LLM reasoning, code execution, data analysis, referencing internal knowledge, and much more.\n\nDanswer as a platform is not a no-code agent builder. We are made by developers for developers and this gives your team the full flexibility and power to create agents not constrained by blocks and simple logic paths.\n\nFlexibility and Extensibility\nDanswer is open source and completely whitebox. This not only gives transparency to what happens within the system but also means that your team can directly modify the source code to suit your unique needs.", + "title_embedding": [ + 0.032763753086328506, + 0.049961112439632416, + 0.00777681777253747, + -0.009621717967092991, + 0.03860695660114288, + 0.035656899213790894, + -0.029095029458403587, + -0.030549267306923866, + -0.028131460770964622, + -0.023247526958584785, + -0.030750803649425507, + 0.04233109578490257, + 0.044790223240852356, + 0.020764602348208427, + -0.011113415472209454, + -0.052699606865644455, + 0.05441703647375107, + -0.027375519275665283, + 0.03858301043510437, + 0.0015289749717339873, + -0.0007870558765716851, + -0.013234086334705353, + -0.008892231620848179, + -0.0269540473818779, + 0.032256800681352615, + 0.028824904933571815, + 0.021423548460006714, + 0.0196831077337265, + 0.004699843470007181, + 0.01062865275889635, + -0.03573931008577347, + -0.01450167316943407, + 0.06177164614200592, + -0.004766061902046204, + -0.011502844281494617, + -0.059983331710100174, + 0.03794373199343681, + 0.003160010790452361, + 0.05785837396979332, + -0.016349520534276962, + 0.048589278012514114, + 0.03928593918681145, + -0.027400294318795204, + -0.007712628226727247, + 0.044047582894563675, + 0.03514353185892105, + 0.050972215831279755, + -0.027322333306074142, + 0.08146621286869049, + -0.041862014681100845, + 0.034794293344020844, + 0.0064093489199876785, + -0.05552367866039276, + -0.06472223997116089, + -0.0006224742392078042, + 0.010324635542929173, + -0.00513586075976491, + 0.006625971291214228, + -0.03121061436831951, + -0.02010185271501541, + 0.024356791749596596, + 0.04554779455065727, + -0.04365985095500946, + 0.038004688918590546, + 0.026826566085219383, + -0.0002007065195357427, + 0.0025419823359698057, + 0.022517988458275795, + 0.004520556423813105, + -0.04712541028857231, + 0.042386990040540695, + 0.0317973829805851, + 0.022796982899308205, + 0.03537650406360626, + -0.024706847965717316, + -0.05100490525364876, + 0.013296891935169697, + -0.027389265596866608, + -0.022103115916252136, + 0.07237043976783752, + 0.022473221644759178, + -0.08428098261356354, + 0.0284805316478014, + 0.014994120225310326, + 0.0647200271487236, + -0.0013714460656046867, + -0.02798375114798546, + 0.004889763426035643, + -0.02891303412616253, + 0.06638259440660477, + -0.015550877898931503, + 0.01490933820605278, + 0.03998437523841858, + 0.031558796763420105, + -0.09123710542917252, + 0.03090553544461727, + -0.027405250817537308, + 0.0028605929110199213, + -0.01660272665321827, + -0.024673976004123688, + -0.03330164775252342, + 0.019906772300601006, + 0.020785389468073845, + -0.02234416827559471, + 0.0711885541677475, + 0.010001438669860363, + -0.007417359855026007, + -0.03474368155002594, + 0.0117587149143219, + -0.030912458896636963, + -0.04288865998387337, + 0.004992801230400801, + -0.011203224770724773, + 0.026435980573296547, + -0.019328005611896515, + -0.01772245578467846, + 0.05772961303591728, + -0.018587617203593254, + 0.03977040946483612, + 0.0511898435652256, + 0.02799198590219021, + -0.021339384838938713, + 0.016965094953775406, + 0.08415205776691437, + 0.010289170779287815, + -0.02373247779905796, + -0.06358940154314041, + 0.03165338188409805, + -0.013218379579484463, + -0.041016921401023865, + 0.052579861134290695, + 0.016211217269301414, + 0.012958381325006485, + -0.029191715642809868, + -0.0013247805181890726, + 0.05056416615843773, + -0.05472686141729355, + -0.05397220700979233, + 0.07864602655172348, + 0.044400643557310104, + 0.011529057286679745, + -0.0056294528767466545, + 0.0019877285230904818, + -0.01892041228711605, + 0.031235355883836746, + -0.06018691137433052, + 0.015224655158817768, + 0.0035034629981964827, + -0.04407024383544922, + 0.03802705183625221, + 0.016176624223589897, + 0.05680167302489281, + -0.017375409603118896, + -0.01676156371831894, + -0.017084985971450806, + -0.042274024337530136, + -0.07406415045261383, + 0.020823167636990547, + 0.04484682157635689, + -0.023108867928385735, + 0.02925572544336319, + 0.06840821355581284, + -0.027610015124082565, + 0.04234248027205467, + -0.02915036305785179, + -0.004962626378983259, + -0.0017270881216973066, + 0.023044373840093613, + 0.037656962871551514, + 0.04789644852280617, + -0.0027900487184524536, + 0.0004090967122465372, + 0.014888445846736431, + 0.009237252175807953, + 0.036635007709264755, + 0.015078885480761528, + 0.046658437699079514, + -0.025920215994119644, + 0.014571646228432655, + -0.053589239716529846, + 0.024663543328642845, + -0.0388394258916378, + 0.0037244234699755907, + 0.007817366160452366, + -0.03352022543549538, + -0.0609428733587265, + 0.04179045185446739, + -0.05036167427897453, + -0.04099080711603165, + 0.02920934371650219, + -0.037300609052181244, + 0.010041946545243263, + 0.025091813877224922, + -0.032656773924827576, + -0.05137333646416664, + -0.038329657167196274, + 0.03855415806174278, + 0.006781625561416149, + 0.02984003536403179, + -0.06467068940401077, + 0.02395613305270672, + 0.018539344891905785, + -0.0718475878238678, + 0.031203489750623703, + -0.057184506207704544, + 0.02436862140893936, + 0.02837834134697914, + -0.010054084472358227, + -0.02551312930881977, + 0.021066943183541298, + 0.06444599479436874, + 0.01263453345745802, + -0.018358737230300903, + 0.010503370314836502, + -0.023012487217783928, + 0.009831788949668407, + 0.0049070375971496105, + -0.022574082016944885, + -0.0049112942069768906, + -0.01014224998652935, + 0.055648382753133774, + -0.016490083187818527, + -0.012448773719370365, + -0.014511270448565483, + 0.027931246906518936, + 0.024195006117224693, + -0.005839435383677483, + 0.029669128358364105, + -0.007521398831158876, + 0.03150096535682678, + -0.01941276341676712, + -0.06204359978437424, + 0.01095200888812542, + 0.0023097621742635965, + 0.008341503329575062, + 0.0100992601364851, + -0.039239075034856796, + 0.04388657584786415, + 0.015824418514966965, + 0.06830465793609619, + 0.009663422591984272, + -0.00038048860733397305, + 0.035620324313640594, + -0.011668454855680466, + -0.06677736341953278, + 0.008154943585395813, + -0.03417421504855156, + -0.022497251629829407, + 0.01800542138516903, + 0.0010614683851599693, + 0.00842749048024416, + 0.020196812227368355, + -0.005975049454718828, + 0.024395788088440895, + -0.01633184403181076, + -0.004018640611320734, + -0.0018627216340973973, + 0.058719366788864136, + -0.024047864601016045, + -0.0032275430858135223, + 0.07045131176710129, + -0.03221508115530014, + 0.0352499820291996, + 0.02055438607931137, + 0.02973576821386814, + 0.0017980994889512658, + 0.05022549629211426, + 0.03819788247346878, + -0.005316003691405058, + 0.011116476729512215, + 0.019071733579039574, + 0.03500362113118172, + -0.03451540693640709, + 0.09197302162647247, + 0.008307289332151413, + 0.015847783535718918, + -0.003909585066139698, + -0.04707544669508934, + 0.01712993159890175, + -0.026143768802285194, + -0.007809836883097887, + -0.02002348005771637, + -0.03528841957449913, + -0.012745876796543598, + 0.016280299052596092, + 0.005661313887685537, + 0.022872695699334145, + 0.016736241057515144, + -0.048460669815540314, + 0.012391538359224796, + -0.04375111311674118, + -0.06501554697751999, + -0.0159616582095623, + 0.009163076989352703, + -0.008098017424345016, + 0.03997795283794403, + 0.09088447690010071, + -0.025736957788467407, + -0.01334838755428791, + 0.015781259164214134, + 0.010901914909482002, + 0.021588636562228203, + 0.011131210252642632, + -0.034338608384132385, + 0.053609222173690796, + 0.018425501883029938, + 0.03827910125255585, + -0.003314226632937789, + 0.010824226774275303, + 0.020308859646320343, + -0.11467628926038742, + 0.04042372852563858, + 0.01810252107679844, + 0.03511713072657585, + -0.0987866222858429, + 0.016760295256972313, + 0.007829226553440094, + -0.011888569220900536, + 0.034833233803510666, + -0.009197549894452095, + 0.005588896572589874, + -0.07932842522859573, + -0.02078017219901085, + -0.03448954597115517, + 0.0152775589376688, + 0.08626428246498108, + 0.03126169368624687, + 0.04502886161208153, + -0.026686420664191246, + -0.028234312310814857, + 0.0049273171462118626, + 0.023110508918762207, + -0.08400018513202667, + 0.017200743779540062, + 0.02693784609436989, + 0.0036261421628296375, + -0.018591655418276787, + 0.005189367104321718, + 0.0002512435312382877, + -0.01673535816371441, + 0.06507309526205063, + 0.02960938587784767, + 0.0194547139108181, + -0.045088544487953186, + -0.01410599984228611, + -0.001771911047399044, + 0.042333200573921204, + -0.015243434347212315, + 0.027360277250409126, + -0.02644488774240017, + -0.059026844799518585, + 0.0013204477727413177, + -0.005272903945297003, + -0.03697441890835762, + -0.03736754506826401, + 0.06495915353298187, + 0.004548369906842709, + 0.004532824270427227, + -0.005509661976248026, + 0.013331729918718338, + 0.005671144928783178, + -0.043852198868989944, + 0.06886028498411179, + -0.0020801422651857138, + 0.014272121712565422, + -0.02358032390475273, + 0.010091368108987808, + -0.013035510666668415, + -0.009768063202500343, + -0.024086249992251396, + 0.04728090390563011, + -0.024031780660152435, + 0.032426923513412476, + 0.06455196440219879, + 0.08759471774101257, + 0.009270765818655491, + -0.0936349406838417, + -0.012462696991860867, + -0.019188350066542625, + -0.06805568188428879, + 0.01794586144387722, + -0.0007348881918005645, + 0.0024105070624500513, + -0.016566181555390358, + 0.012622764334082603, + -0.03900640457868576, + 0.010342570021748543, + 0.011543489061295986, + 0.01152091845870018, + -0.05232607573270798, + 0.004903953988105059, + -0.05708310753107071, + -0.04076048359274864, + 0.016818160191178322, + -0.020741824060678482, + 0.01609313301742077, + -0.022479360923171043, + 0.03654901310801506, + 0.022170664742588997, + 0.01575297676026821, + -0.011484816670417786, + -0.025103436782956123, + 0.05906060338020325, + 0.02779274433851242, + 0.028078753501176834, + 0.04629473015666008, + -0.005719225853681564, + -0.06190178170800209, + 0.006866101641207933, + -0.002305209171026945, + 0.03215618431568146, + 0.007546067703515291, + -0.02738751657307148, + -0.04539818689227104, + 0.04683874174952507, + -0.0208493173122406, + 0.03900844231247902, + -0.027456291019916534, + -0.028509290888905525, + 0.013289637863636017, + 0.0017003221437335014, + -0.0198791716247797, + 0.014913729391992092, + 0.005401032045483589, + -0.04071260988712311, + 0.02060793526470661, + -0.003016189206391573, + 0.03800947219133377, + -0.019319988787174225, + -0.024961907416582108, + -0.02498740889132023, + -0.04191872850060463, + -0.042030803859233856, + 0.013421737588942051, + -0.045663248747587204, + 0.024831216782331467, + 0.06314653903245926, + 0.013705547899007797, + 0.025637097656726837, + -0.006122317630797625, + 0.0041285231709480286, + 0.050409767776727676, + 0.007197089493274689, + -0.01965370774269104, + -0.04048306494951248, + 0.11998444050550461, + 0.029942067340016365, + 0.02599455416202545, + -0.057833291590213776, + 0.0033883019350469112, + 0.00468824477866292, + -0.01925582066178322, + -0.01766190119087696, + 0.011122050695121288, + 0.04823627695441246, + 0.018773270770907402, + -0.020368080586194992, + -0.009206349961459637, + 0.031074542552232742, + 0.02497885189950466, + 0.0031681342516094446, + 0.015077338553965092, + -0.022211533039808273, + 0.058754149824380875, + -0.016073331236839294, + -0.014968045987188816, + -0.0051240865141153336, + -0.06383436918258667, + -0.0280417762696743, + 0.013401271775364876, + -0.006949563976377249, + -0.009025825187563896, + 0.03748825564980507, + 0.04152849316596985, + -0.03703063353896141, + 0.0006073106196708977, + -0.019878407940268517, + -0.059219732880592346, + 0.03231174871325493, + -0.012458872981369495, + -0.0006862205918878317, + -0.029703414067626, + -0.011737367138266563, + -0.01565374620258808, + -0.002873011166229844, + 0.035379018634557724, + -0.025712305679917336, + 0.027225548401474953, + -0.011701708659529686, + -0.020186487585306168, + -0.013381940312683582, + 0.044779565185308456, + 0.027129901573061943, + -0.03770675137639046, + -0.06656532734632492, + -0.04852313920855522, + -0.07922673970460892, + 0.042464420199394226, + 0.08760115504264832, + -0.01756269298493862, + 0.025902874767780304, + -0.049739014357328415, + 0.015325409360229969, + 0.04406426474452019, + 0.012947683222591877, + -0.022557666525244713, + -0.033376943320035934, + -0.12034522742033005, + 0.019998058676719666, + 0.04397791251540184, + 0.024618806317448616, + -0.013922464102506638, + 0.031511057168245316, + 0.03906194865703583, + 0.011382625438272953, + -0.027103818953037262, + 0.04971625655889511, + 0.051205385476350784, + -0.08501561731100082, + 0.011972213163971901, + -0.018331818282604218, + -0.00884521659463644, + -0.0015008534537628293, + 0.0827648937702179, + -0.03979771211743355, + 0.0015674568712711334, + -0.014266063459217548, + -0.03932151570916176, + 0.04269920662045479, + -0.059784602373838425, + 0.01841970533132553, + 0.06251460313796997, + -0.02819698490202427, + -0.040344759821891785, + 0.0010407248046249151, + -0.034333355724811554, + -0.029237672686576843, + -0.0001084851028281264, + 0.06710729002952576, + 0.019469408318400383, + -0.01640215329825878, + 0.019526075571775436, + 0.007778842933475971, + 0.03379968926310539, + 0.030870657414197922, + -0.059688691049814224, + -0.05436835065484047, + 0.053333111107349396, + 0.004061849322170019, + -0.08632408827543259, + 0.014255499467253685, + -0.05555962026119232, + 0.010840730741620064, + -0.05179913341999054, + -0.007342956960201263, + 0.0011719957692548633, + 0.022990427911281586, + 0.013041576370596886, + -0.026316920295357704, + -0.022087475284934044, + -0.02786962315440178, + 0.013592005707323551, + 0.021783264353871346, + -0.059460774064064026, + -0.029133567586541176, + -0.06166587024927139, + -0.055512115359306335, + 0.004256486427038908, + 0.0341678261756897, + 0.011773993261158466, + -0.029188869521021843, + -0.021346861496567726, + -0.036212995648384094, + 0.025272972881793976, + 0.02215636521577835, + -0.03782811760902405, + 0.01701144315302372, + -0.05073560029268265, + -0.06574195623397827, + 0.012947561219334602, + 0.003303218400105834, + -0.05047185719013214, + 0.010198806412518024, + -0.04323785379528999, + -0.04194899648427963, + 0.02726336568593979, + -0.015397109091281891, + -0.02849482372403145, + 0.058862827718257904, + -0.0026129265315830708, + 0.006432596128433943, + 0.04382907226681709, + -0.05114292353391647, + -0.02147330716252327, + -0.05826929211616516, + 0.046473387628793716, + -0.09205549210309982, + 0.04540986940264702, + -0.006234755739569664, + -0.05360054224729538, + -0.012155161239206791, + -0.030249077826738358, + 0.02822766825556755, + 0.013851269148290157, + 0.027002329006791115, + 0.09613272547721863, + 0.035666726529598236, + 0.03504599630832672, + -0.00038134161150082946, + -0.06922309845685959, + 0.016433153301477432, + -0.031455833464860916, + -0.018132444471120834, + -0.02008064091205597, + -0.015955988317728043, + -0.04022971913218498, + -0.00230028061196208, + -0.06941505521535873, + 0.0230435561388731, + -0.026967540383338928, + 0.0354134738445282, + -0.08307641744613647, + 0.055718302726745605, + 0.0012352125486359, + 0.017340224236249924, + 0.02709241770207882, + -0.009195402264595032, + 0.020474854856729507, + 0.0016901030903682113, + 0.05093026161193848, + -0.02238425612449646, + 0.011796950362622738, + -0.007241291459649801, + -0.0334753580391407, + -0.04778272658586502, + -0.030247407034039497, + -0.012979192659258842, + 0.004056413192301989, + 0.015001167543232441, + 0.06737781316041946, + 0.028164206072688103, + 0.0028011424001306295, + -0.049282923340797424, + 0.06260383874177933, + 0.04237203299999237, + -0.026161646470427513, + 0.02427232824265957, + 0.021224258467555046, + 0.002963172970339656, + -0.049155037850141525, + 0.033326923847198486, + 0.07168576121330261, + -0.04409810155630112, + -0.012802177108824253, + 0.011941076256334782, + 0.005057428497821093, + -0.04857957363128662, + -0.011230324395000935, + 0.009986268356442451, + 0.010389930568635464, + -0.013448627665638924, + -0.04319113492965698, + -0.02839748188853264, + 0.011157489381730556, + 0.015462666749954224, + -0.014774681068956852, + -0.035400133579969406, + 0.003983446396887302, + -0.06980624049901962, + -0.0019868735689669847, + -0.0014860559022054076, + -0.017261963337659836, + 0.03138411417603493, + -0.07367079704999924, + 0.025024767965078354, + 0.037335801869630814, + 0.04612639173865318, + -0.018027080222964287, + -0.015578734688460827, + -0.05632679536938667, + -0.01690700650215149, + 0.023824671283364296, + 0.003364108270034194, + -0.0478903129696846, + 0.014160525053739548, + 0.0023307709489017725, + 0.028807908296585083, + 0.0053710732609033585, + -0.007223619148135185, + -0.09570229798555374, + 0.013001752085983753, + -0.03882845118641853, + -0.018106481060385704, + -0.08351759612560272, + -0.01296163722872734, + -0.0017098417738452554, + -0.042986027896404266, + 0.02120766043663025, + -0.00032761419424787164, + 0.059994783252477646, + 0.00795682892203331, + 0.025746053084731102, + 0.026430919766426086, + 0.10314885526895523, + -0.042013708502054214, + -0.01044819038361311, + -0.06457454711198807, + 0.04287077486515045, + -0.0233222134411335, + -0.011595506221055984, + 0.008520099334418774, + 0.021662304177880287, + 0.04874734953045845, + 0.03213977813720703, + -0.03502868860960007, + -0.013689175248146057, + 0.007175855804234743, + -0.06394322961568832, + -0.03230760619044304, + -0.0520993173122406, + 0.03424723073840141, + -0.01675051636993885, + -0.04967552423477173, + 0.03324288874864578, + -0.03822193667292595, + -0.015012933872640133, + -0.02746376395225525, + -0.015637405216693878, + 0.040449269115924835, + -0.0027442676946520805, + 0.008192671462893486, + 0.013573664240539074, + -0.0065663764253258705, + 0.07001614570617676, + -0.00289558875374496, + 0.004224210977554321, + -0.05637960880994797, + -0.010168599896132946, + -0.02271331660449505, + 0.0014612390659749508, + 0.06994854658842087, + -0.00733678275719285, + -0.0025255896616727114, + 0.03514084219932556, + 0.02634606696665287, + -0.016171403229236603, + -0.02692556194961071, + 0.015410004183650017, + 0.07382199913263321, + 0.01444800104945898, + -0.020071715116500854, + 0.030701540410518646, + 0.0056877885945141315, + 0.011047931388020515, + -0.05641033127903938, + 0.03570398688316345, + -0.06379767507314682, + 0.09488129615783691, + 0.015704551711678505, + -0.0008733674185350537, + 0.009907273575663567, + 0.004910382442176342, + 0.050873469561338425, + 0.01800096221268177, + -0.027450138702988625, + -0.001498246449045837, + -0.027504686266183853, + -0.019632702693343163 + ], + "content_embedding": [ + -0.0417482890188694, + 0.0512668639421463, + 0.0012354102218523622, + -0.035204555839300156, + 0.028333576396107674, + 0.006138786673545837, + 0.017678435891866684, + 0.004378852900117636, + -0.022564459592103958, + -0.03274708241224289, + -0.06855575740337372, + 0.03446828946471214, + 0.03136003389954567, + -0.016096506267786026, + -0.007832110859453678, + 0.01546874176710844, + 0.025302864611148834, + -0.01542437169700861, + 0.009685760363936424, + 0.025153761729598045, + 0.01136286836117506, + -0.03678102046251297, + 0.01742858625948429, + -0.04800569638609886, + 0.052324045449495316, + -0.0188713688403368, + -0.017203466966748238, + 0.04401639476418495, + -0.05147295445203781, + -0.005816930439323187, + 0.04151167348027229, + 0.0020627069752663374, + 0.008849645033478737, + -0.03293370082974434, + -0.030744211748242378, + -0.025762831792235374, + 0.07024409621953964, + -0.029683783650398254, + -0.02081390842795372, + 0.034864746034145355, + 0.057659171521663666, + 0.009455090388655663, + -0.001964752795174718, + -0.028249403461813927, + 0.045469045639038086, + 0.010203365236520767, + 0.039163172245025635, + -0.01693413034081459, + 0.03357663378119469, + -0.016916338354349136, + 0.007125346455723047, + -0.02135808765888214, + -0.007920235395431519, + -0.014657854102551937, + -0.0023566402960568666, + 0.026274284347891808, + -0.0449351891875267, + 0.006130301393568516, + 0.0021915079560130835, + -0.05063489079475403, + 0.010083623230457306, + 0.03967271372675896, + -0.047972869127988815, + 0.011878297664225101, + -0.02869013138115406, + -0.06947814673185349, + 0.012776396237313747, + -0.022227533161640167, + -0.021391209214925766, + -0.0071424199268221855, + -0.010884602554142475, + 0.0022353651002049446, + 0.04208262637257576, + 0.04585080221295357, + -0.028864840045571327, + 0.014383035711944103, + 0.0006865983596071601, + -0.003945623058825731, + -0.024596840143203735, + 0.02039221115410328, + 0.05236830934882164, + -0.06231372430920601, + 0.0006878590793348849, + 0.005045242141932249, + 0.04543100297451019, + -0.022787010297179222, + -0.0323825404047966, + 0.0060617332346737385, + -0.0009496629354543984, + 0.1132081151008606, + 0.021422259509563446, + -0.008516624569892883, + 0.011941758915781975, + 0.06050655618309975, + -0.06464048475027084, + 0.0715012326836586, + -0.04892478510737419, + -0.014262699522078037, + 0.02197115309536457, + -0.02258905954658985, + -0.03329572454094887, + 0.0733470693230629, + 0.01521797850728035, + -0.02922399342060089, + 0.05403874069452286, + -0.0024076823610812426, + -0.005156014114618301, + -0.0004758739669341594, + -0.0009397549438290298, + 0.022768890485167503, + -0.06273472309112549, + -0.013565277680754662, + -0.038060612976551056, + 0.03901419788599014, + -0.025413114577531815, + -0.031085047870874405, + 0.062427643686532974, + -0.05666875094175339, + 0.018170330673456192, + 0.03758049011230469, + -0.005046131554991007, + -0.03363005071878433, + 0.0071977670304477215, + -0.007294844835996628, + 0.04950850084424019, + 0.05829211696982384, + -0.028599455952644348, + -0.00011273028212599456, + -0.027114247903227806, + -0.04813091829419136, + 0.03546503558754921, + -0.0017865434056147933, + -0.06174362823367119, + 0.015936153009533882, + 0.05498664081096649, + 0.06208323314785957, + -0.06043750047683716, + -0.07075081020593643, + 0.03265148773789406, + 0.01779918558895588, + -0.004657578654587269, + 0.013401461765170097, + -0.031561195850372314, + -0.010674675926566124, + 0.02138788439333439, + -0.059565648436546326, + 0.003320328425616026, + -0.0016824831254780293, + -0.021733451634645462, + 0.048551496118307114, + -0.003053524298593402, + 0.011647860519587994, + -0.0014629715587943792, + 0.059308186173439026, + 0.0077448501251637936, + -0.01239799801260233, + -0.039145924150943756, + 0.016731932759284973, + 0.062229979783296585, + -0.029277512803673744, + 0.05666857957839966, + 0.021947506815195084, + -0.027742277830839157, + 0.05703498050570488, + -0.02114000730216503, + -0.0011631653178483248, + 0.04833010211586952, + 0.013655254617333412, + 0.042764052748680115, + 0.04422000050544739, + -0.010796190239489079, + -0.0081519465893507, + 0.0005064443103037775, + -0.007894535548985004, + 0.01271637249737978, + 0.0280605535954237, + 0.023104701191186905, + -0.05545410141348839, + 0.03579716384410858, + -0.01674344576895237, + 0.011995082721114159, + -0.04967891052365303, + 0.018647905439138412, + -0.0025427585933357477, + -0.05248319357633591, + -0.004207089077681303, + 0.0029677890706807375, + -0.08436138927936554, + 0.011933421716094017, + -0.046401966363191605, + 0.004982754122465849, + -0.03336072713136673, + 0.007464535068720579, + -0.02536672353744507, + -0.02103051170706749, + -0.0247516930103302, + 0.03470923379063606, + 0.008188062347471714, + 0.04575216770172119, + -0.04027656093239784, + -0.028462760150432587, + -0.00641157990321517, + -0.1032537892460823, + 0.015407266095280647, + -0.017259350046515465, + 0.057880233973264694, + 0.02970932051539421, + 0.003135938895866275, + 0.04052228853106499, + 0.006307818461209536, + 0.09373948723077774, + 0.012201530858874321, + 0.01518191210925579, + 0.005055180750787258, + -0.00017229207151103765, + -0.008860277943313122, + -0.0009321855613961816, + -0.024702103808522224, + -0.02220877818763256, + 0.018036337569355965, + 0.0461902916431427, + -2.3178456103778444e-05, + -0.021639293059706688, + -0.009496558457612991, + -0.0069047678261995316, + -0.005369818769395351, + -0.038412243127822876, + 0.0376049242913723, + -0.02614714205265045, + 0.010913437232375145, + -0.02533271722495556, + -0.08659890294075012, + 0.05744393169879913, + 0.012141053564846516, + 0.060547053813934326, + -0.0005550469504669309, + -0.01619824767112732, + -0.0022558700293302536, + 0.01814994402229786, + 0.06237058714032173, + 0.055474210530519485, + -0.02512912079691887, + 0.010455053299665451, + -0.023948650807142258, + -0.07459914684295654, + -0.006999264471232891, + -0.006154322065412998, + -0.014305580407381058, + -0.042501892894506454, + 0.04605546593666077, + -0.007378050591796637, + 0.013837042264640331, + 0.005601659417152405, + -0.02454686351120472, + 0.0228840634226799, + -0.010892537422478199, + 0.0011435768101364374, + 0.027678560465574265, + 0.015353331342339516, + -0.03731193020939827, + 0.05862969905138016, + -0.02842552959918976, + 0.03124571032822132, + 0.02315538190305233, + 0.012950807809829712, + 0.026965327560901642, + 0.009465894661843777, + -0.010829408653080463, + -0.008594458922743797, + 0.014982074499130249, + 0.021298887208104134, + -0.018343189731240273, + 0.01739460788667202, + 0.07865084707736969, + 0.02205476351082325, + 0.015017225407063961, + -0.011981618590652943, + -0.02248695306479931, + 0.017631210386753082, + -0.02025180496275425, + -0.05385996773838997, + -0.05477667227387428, + -0.042989905923604965, + 0.004830287769436836, + 0.03188111260533333, + 0.048253823071718216, + 0.0027890182100236416, + -0.01684093475341797, + 0.029284454882144928, + -0.014463928528130054, + 0.029999280348420143, + 0.013334669172763824, + -0.030123639851808548, + 0.007939296774566174, + -0.015909308567643166, + 0.03652086481451988, + 0.043923694640398026, + -0.03349898010492325, + 0.016639679670333862, + 0.007404185365885496, + -0.023147881031036377, + 0.004568914417177439, + 0.008112411946058273, + -0.021877270191907883, + -0.0072467140853405, + -0.024027734994888306, + 0.022522028535604477, + 0.03248016908764839, + 0.04624137282371521, + 0.03288194164633751, + -0.0706077441573143, + 0.00647892989218235, + -0.013711459934711456, + -0.00910367164760828, + -0.06070556864142418, + 0.013195404782891273, + 0.02949078381061554, + -0.04314878582954407, + 0.03952472656965256, + -0.039313577115535736, + 0.01958983577787876, + -0.04745025932788849, + 0.011169768869876862, + -0.07735665887594223, + 0.012919869273900986, + 0.08162245899438858, + 0.04961969330906868, + 0.02261139266192913, + -0.01081178616732359, + -0.022089937701821327, + 0.036029793322086334, + 0.07065453380346298, + -0.050287678837776184, + 0.009584897197782993, + -0.005743148736655712, + -0.03252799063920975, + -0.029911693185567856, + -0.0031824579928070307, + -0.0022875897120684385, + 0.010553253814578056, + -0.005088122095912695, + -0.019103137776255608, + -0.029758833348751068, + -0.03040270134806633, + -0.05643913522362709, + -0.0183008573949337, + 0.0036066959146410227, + -0.010227258317172527, + 0.03830184414982796, + 0.008860573172569275, + -0.04022029787302017, + 0.016092464327812195, + -0.007906369864940643, + -0.0206406619399786, + -0.01545781921595335, + 0.011720928363502026, + 0.030331697314977646, + 0.020348263904452324, + 0.013491041027009487, + 0.015015012584626675, + 0.0757412239909172, + -0.013692211359739304, + 0.0554184690117836, + -0.03535052016377449, + 0.027659131214022636, + 0.062012042850255966, + 0.05365491285920143, + 0.02611374668776989, + 0.03400697186589241, + -0.0187185350805521, + 0.030734656378626823, + -0.04378894716501236, + 0.04222285747528076, + 0.06321597844362259, + 0.0926889032125473, + 0.06395434588193893, + -0.045033425092697144, + -0.02227518893778324, + -0.018914448097348213, + -0.024137776345014572, + 0.06653360277414322, + 0.03000609017908573, + 0.016536613926291466, + -0.05106441304087639, + 0.009556908160448074, + -0.003165673930197954, + -0.02989509329199791, + -0.008909299969673157, + -0.002428715117275715, + -0.038857024163007736, + -0.014716073870658875, + -0.02291145734488964, + -0.03815469145774841, + 0.018349675461649895, + -0.001724440953694284, + 0.024225711822509766, + -0.038882117718458176, + 0.013145080767571926, + 0.013105038553476334, + 0.033219680190086365, + -0.04639777913689613, + -0.044315461069345474, + -0.012929159216582775, + 0.003259071381762624, + 0.012331360019743443, + -1.7462354662711732e-05, + -0.02317662350833416, + -0.042660780251026154, + 0.001802539685741067, + -0.041100095957517624, + 0.04925210401415825, + 0.047337062656879425, + -0.01313596311956644, + -0.048633869737386703, + 0.06100405752658844, + -0.024509647861123085, + 0.06903672963380814, + 0.026338376104831696, + -0.05955340713262558, + -0.013524221256375313, + -0.007072206120938063, + -0.0004094979085493833, + 0.02331911027431488, + 0.006079655606299639, + -0.027727166190743446, + 0.01562763936817646, + 0.011910749599337578, + -0.010385152883827686, + 0.02091721072793007, + -0.030102524906396866, + -0.014945127069950104, + 0.007444288115948439, + -0.009210431948304176, + 0.01587914675474167, + -0.07968660444021225, + 0.034870292991399765, + 0.04423568770289421, + 0.05101220682263374, + -0.0018310192972421646, + 0.04378198832273483, + 0.008875945582985878, + -0.018744593486189842, + -0.010748499073088169, + -0.05976865068078041, + -0.024797234684228897, + 0.02921747788786888, + 0.08715134114027023, + -0.014189728535711765, + -0.05772070586681366, + -0.00013612159818876535, + 0.034182313829660416, + 0.02940666675567627, + -0.007551911287009716, + 0.005196248646825552, + 0.09129910916090012, + 0.03463520109653473, + -0.028487645089626312, + -0.054952532052993774, + -0.019425109028816223, + 0.04267658665776253, + -0.010463536716997623, + -0.022979862987995148, + 0.003282969817519188, + 0.032446060329675674, + -0.03184691444039345, + -0.01494336687028408, + 0.027125416323542595, + -0.03301938623189926, + -0.021615097299218178, + 0.01919432356953621, + -0.018361948430538177, + 0.0440165251493454, + 0.018785251304507256, + 0.05379289388656616, + -0.06103529781103134, + -0.04040123522281647, + 0.06767034530639648, + -0.04255857691168785, + 0.059002116322517395, + 0.06269264966249466, + 0.04158494248986244, + 0.016211502254009247, + -0.046843864023685455, + -0.028105739504098892, + 0.007073850836604834, + 0.052667438983917236, + 0.00735336821526289, + 0.017733542248606682, + -0.023568013682961464, + -0.007077949587255716, + 0.01566276140511036, + 0.048224493861198425, + -0.0003875133115798235, + -0.046327680349349976, + -0.08656812459230423, + -0.025123324245214462, + -0.034193720668554306, + 0.03014206700026989, + 0.05021859332919121, + -0.0026385232340544462, + 0.023799851536750793, + -0.009769299067556858, + -0.01290298905223608, + 0.004491783678531647, + -0.012223453260958195, + -0.00033618492307141423, + 0.0233011394739151, + -0.08696971833705902, + 0.058488454669713974, + 0.000664825493004173, + 0.039359770715236664, + 0.014214815571904182, + 0.03424450755119324, + 0.05592956021428108, + 0.016471324488520622, + -0.059732481837272644, + 0.06536833196878433, + 0.024387361481785774, + -0.10856911540031433, + 0.06828989833593369, + 0.0036337117198854685, + 0.05830007046461105, + 0.016170067712664604, + 0.013002794235944748, + -0.11607159674167633, + 0.0019640743266791105, + 0.026027854532003403, + -0.028382647782564163, + 0.041647832840681076, + -0.005042455159127712, + -0.0010717103723436594, + 0.09709432721138, + 0.018342554569244385, + -0.03699033707380295, + 0.03425338864326477, + -0.07419072836637497, + -0.05410637706518173, + 0.013680101372301579, + -0.007827416993677616, + -0.007252392824739218, + 0.016606653109192848, + 0.015743359923362732, + -0.007168450392782688, + 0.030557913705706596, + 0.010715801268815994, + -0.03387424722313881, + -0.059598296880722046, + 0.061636339873075485, + -0.024311335757374763, + -0.08930302411317825, + 0.04300369694828987, + -0.052911426872015, + 0.048922792077064514, + -0.07488273829221725, + 0.0253959558904171, + -0.057005614042282104, + -0.010324039496481419, + -0.03382004797458649, + 0.01331509929150343, + -0.0060559725388884544, + 0.021830739453434944, + 0.0004554805636871606, + 0.06132755056023598, + -0.04885099083185196, + 0.01681993156671524, + -0.09306737780570984, + -0.03891037777066231, + 0.03394221141934395, + 0.03513973951339722, + 9.119489550357684e-05, + -0.009680265560746193, + -0.018936453387141228, + 0.002022465690970421, + 0.03725491091609001, + -0.007916543632745743, + -0.05493376404047012, + 0.06674706935882568, + -0.04586830735206604, + -0.05310272425413132, + -0.002019708277657628, + -0.03419820964336395, + -0.08405481278896332, + 0.044505130499601364, + -0.022271662950515747, + 0.008551442995667458, + 0.024632176384329796, + -0.057307109236717224, + -0.025764044374227524, + 0.05102856457233429, + -0.01996302232146263, + -0.003182733431458473, + 0.010233199223876, + -0.005380541551858187, + -0.033068619668483734, + -0.038329556584358215, + 0.041149478405714035, + -0.038474202156066895, + 0.03263046592473984, + 0.043984752148389816, + -0.06405626237392426, + -0.04378855600953102, + -0.017724154517054558, + -0.00023550254991278052, + -0.006340715568512678, + 0.008379276841878891, + 0.06068692356348038, + -0.023048071190714836, + 0.04665880277752876, + -0.026433007791638374, + -0.04106089845299721, + 0.008102682419121265, + -0.02919689752161503, + -0.002803279785439372, + 0.00115284975618124, + -0.007610488682985306, + -0.009425876662135124, + 0.014759095385670662, + -0.07407978177070618, + 0.040522702038288116, + -0.028428586199879646, + -0.015484650619328022, + -0.08971428871154785, + 0.04541322588920593, + 0.01523630227893591, + -0.02159152925014496, + 0.06348283588886261, + -0.0762605369091034, + 0.008550439029932022, + -0.0010396456345915794, + 0.09191705286502838, + 0.01919129304587841, + 0.012690366245806217, + -0.032078325748443604, + -0.03879883140325546, + -0.06354136019945145, + -0.016241934150457382, + -0.013353055343031883, + 0.013797549530863762, + 0.03027600795030594, + 0.05205754190683365, + 0.018223397433757782, + -0.02529638260602951, + -0.012619049288332462, + 0.05183516442775726, + 0.04441876709461212, + -0.0014240458840504289, + -0.004662310238927603, + 0.007740246132016182, + -0.023739585652947426, + 0.008351752534508705, + 0.04186442866921425, + 0.06846421957015991, + -0.03302106633782387, + -0.019061105325818062, + 0.03688846528530121, + 0.027123648673295975, + -0.008548760786652565, + 0.006452383007854223, + -0.05057734623551369, + 0.009094422683119774, + -0.003088460536673665, + -0.01042612362653017, + 0.03579631447792053, + -0.008917502127587795, + 0.010444638319313526, + -0.023657843470573425, + -0.03254014626145363, + -0.0009533764678053558, + 0.00684812106192112, + 0.01948300190269947, + 0.00943666510283947, + -0.010625068098306656, + 0.02385423146188259, + -0.05145318806171417, + -0.03215208277106285, + 0.007343036122620106, + 0.01264273189008236, + 0.036680057644844055, + 0.022073568776249886, + -0.06296181678771973, + -0.008569572120904922, + -0.012322318740189075, + 0.021164294332265854, + -0.051289938390254974, + 0.0010486009996384382, + 0.0021613994613289833, + 0.030476249754428864, + 0.01092084776610136, + -0.054112132638692856, + -0.06015515327453613, + 0.023149874061346054, + 0.03427460417151451, + -0.019571471959352493, + -0.07272381335496902, + 0.009794066660106182, + -0.04319072142243385, + -0.04802769050002098, + -0.0024639740586280823, + 0.01276618055999279, + 0.030480578541755676, + -0.007069519720971584, + 0.026940204203128815, + -0.013154259882867336, + 0.05308559536933899, + 0.0008981192368082702, + -0.012286764569580555, + -0.010251149535179138, + 0.056114789098501205, + -0.027719540521502495, + -0.06385437399148941, + -0.01707690954208374, + 0.03182663023471832, + 0.04629168286919594, + 0.003105542855337262, + -0.035991836339235306, + 0.030695278197526932, + -0.01389816403388977, + 0.005694018676877022, + 0.024141885340213776, + -0.056052565574645996, + 0.07325056940317154, + -0.052376989275217056, + -0.0827232152223587, + 0.07083716243505478, + -0.011363365687429905, + -0.049301743507385254, + 0.01282532885670662, + 0.029815899208188057, + 0.0025471607223153114, + 0.014735412783920765, + -0.017929038032889366, + -0.017711512744426727, + -0.03859850764274597, + 0.020923320204019547, + -0.024983150884509087, + -0.009905354119837284, + -0.033428773283958435, + 0.0033264297526329756, + -0.057740144431591034, + 0.0011588952038437128, + 0.05510108917951584, + -0.042752135545015335, + 0.00023805272940080613, + 0.02830038219690323, + -0.0023612258955836296, + 0.004450241569429636, + -0.09065061807632446, + 0.05099336430430412, + 0.050836946815252304, + 0.002225160365924239, + -0.02620827779173851, + -0.0017080202233046293, + -0.006798254791647196, + -0.06735426932573318, + -0.012160968966782093, + 0.0198799017816782, + -0.005785979796200991, + 0.030539529398083687, + 0.03791653737425804, + -0.01715696230530739, + -0.013931870460510254, + -0.026593970134854317, + 0.015033211559057236, + 0.04166087508201599, + -0.0009548550006002188, + 0.002680635079741478, + -0.005521025042980909, + -0.005426781252026558 + ], + "chunk_ind": 0 + }, + { + "url": "https://docs.danswer.dev/more/use_cases/customer_support", + "title": "Customer Support", + "content": "Help your customer support team instantly answer any question across your entire product.\n\nAI Enabled Support\nCustomer support agents have one of the highest breadth jobs. They field requests that cover the entire surface area of the product and need to help your users find success on extremely short timelines. Because they're not the same people who designed or built the system, they often lack the depth of understanding needed - resulting in delays and escalations to other teams. Modern teams are leveraging AI to help their CS team optimize the speed and quality of these critical customer-facing interactions.\n\nThe Importance of Context\nThere are two critical components of AI copilots for customer support. The first is that the AI system needs to be connected with as much information as possible (not just support tools like Zendesk or Intercom) and that the knowledge needs to be as fresh as possible. Sometimes a fix might even be in places rarely checked by CS such as pull requests in a code repository. The second critical component is the ability of the AI system to break down difficult concepts and convoluted processes into more digestible descriptions and for your team members to be able to chat back and forth with the system to build a better understanding.\n\nDanswer takes care of both of these. The system connects up to over 30+ different applications and the knowledge is pulled in constantly so that the information access is always up to date.", + "title_embedding": [ + 0.029702378436923027, + 0.04087577760219574, + 0.011759690940380096, + -0.015867559239268303, + 0.051922496408224106, + 0.04818926751613617, + -0.051036059856414795, + -0.000768028199672699, + -0.011680016294121742, + -0.04731091484427452, + -0.06819964945316315, + 0.02706378884613514, + 0.012011447921395302, + -0.0162456426769495, + 0.00561815220862627, + -0.060200855135917664, + 0.011203722096979618, + 0.011659571900963783, + 0.005986262112855911, + 0.010378050617873669, + -0.02058245800435543, + -0.007857420481741428, + -0.011501888744533062, + -0.06029190123081207, + -0.005981787107884884, + 0.02316387929022312, + -0.003978169988840818, + 0.014874234795570374, + -0.02605351060628891, + 0.015183809213340282, + 0.024635987356305122, + 0.009090029634535313, + 0.02292451448738575, + -0.051132138818502426, + -0.01627650111913681, + -0.04312199354171753, + 0.055974528193473816, + -0.007541665807366371, + 0.05875417962670326, + 0.014909300021827221, + 0.04143260419368744, + 0.013145966455340385, + -0.0019000619649887085, + -0.014630978927016258, + 0.036174625158309937, + -0.009382152929902077, + 0.0129568912088871, + -0.01105422992259264, + 0.05389830842614174, + -0.05418487638235092, + -0.019787268713116646, + 0.021062593907117844, + -0.004908672533929348, + -0.05889728665351868, + 0.005012272857129574, + -0.032561108469963074, + 0.045741673558950424, + 0.0023587732575833797, + -0.029787305742502213, + -0.016032742336392403, + -7.657184141862672e-06, + 0.06451895087957382, + -0.061427876353263855, + 0.05008486285805702, + 0.013032016344368458, + -0.008349837735295296, + -0.023183466866612434, + 0.028445789590477943, + 0.013328451663255692, + 0.002017433987930417, + 0.08471205830574036, + 0.02884836308658123, + 0.03370589017868042, + 0.02926166169345379, + -0.019738871604204178, + -0.06608780473470688, + -0.024426797404885292, + -0.008213629014790058, + -0.017787031829357147, + 0.02045559138059616, + 0.04079979658126831, + -0.03456271439790726, + 0.011362932622432709, + -0.022804994136095047, + 0.06335528194904327, + 0.007143533322960138, + -0.038967471569776535, + 0.01895124651491642, + -0.021929487586021423, + 0.020517535507678986, + -0.005601715296506882, + 0.048027630895376205, + 0.05165044218301773, + 0.021509770303964615, + -0.05171488970518112, + 0.022959010675549507, + 0.008313633501529694, + -0.033466871827840805, + -0.00873673614114523, + -0.07275433838367462, + -0.01826190948486328, + -0.0008458571974188089, + 0.03251243755221367, + 0.0027185927610844374, + 0.03351648896932602, + 0.050253089517354965, + 0.03931482136249542, + 0.011611105874180794, + 0.0006847226177342236, + -0.018391452729701996, + -0.04876922070980072, + 0.0032669915817677975, + 0.002616048092022538, + 0.018911289051175117, + 0.0035516773350536823, + 0.04444553330540657, + 0.0874137431383133, + -0.06154795363545418, + -0.0007547208806499839, + 0.05003364384174347, + 0.047423556447029114, + 0.010614278726279736, + -0.002085448009893298, + 0.028551766648888588, + -0.00320938928052783, + -0.0028788738418370485, + -0.032166119664907455, + 0.032534729689359665, + 0.05165233090519905, + -0.02726086415350437, + 0.032850414514541626, + 0.021426480263471603, + 0.008582738228142262, + -0.05970913544297218, + -0.013117690570652485, + 0.0327497161924839, + -0.04084235802292824, + -0.034347862005233765, + 0.08484583348035812, + -0.007416227832436562, + 0.0817202478647232, + 0.013180759735405445, + 0.01752362586557865, + -0.011748716235160828, + 0.006734360009431839, + -0.05940733850002289, + -0.01464597787708044, + 0.021172411739826202, + 0.015961064025759697, + 0.013145568780601025, + -0.03292446956038475, + 0.05433695763349533, + -0.04581886902451515, + -0.011024394072592258, + -0.013903305865824223, + -0.04525483399629593, + -0.009453397244215012, + -0.01541796326637268, + 0.024864252656698227, + 0.016035286709666252, + 0.04684724658727646, + 0.04711056500673294, + -0.08100881427526474, + 0.07197079807519913, + -0.00942996796220541, + -0.04369058832526207, + 0.02740531787276268, + 0.027486257255077362, + 0.035378992557525635, + 0.05205152928829193, + 0.000229495475650765, + -0.015225332230329514, + 0.018225882202386856, + 0.019075268879532814, + 0.05653514340519905, + 0.00206256122328341, + 0.04172705486416817, + 0.025263279676437378, + 0.02995399944484234, + -0.02344629354774952, + -0.00580610940232873, + -0.03100346215069294, + 0.024410588666796684, + 0.05101815611124039, + -0.044721707701683044, + -0.03469102457165718, + 0.015877151861786842, + -0.05606666952371597, + -0.04264648258686066, + 8.914931095205247e-05, + 0.005455580540001392, + 0.02580810897052288, + 0.03810019791126251, + -0.005504349246621132, + -0.03873325511813164, + -0.06938246637582779, + 0.06987633556127548, + -0.0071804821491241455, + 0.025614110752940178, + -0.04443173483014107, + 0.014129945077002048, + 0.04964412376284599, + -0.06445024162530899, + 0.03494735807180405, + 0.00042216022848151624, + 0.03607922047376633, + 0.014481625519692898, + -0.03348603844642639, + 0.04130083695054054, + 0.03306325525045395, + 0.0670546144247055, + -0.004632187075912952, + -0.02899966388940811, + 0.028892826288938522, + -0.02801397442817688, + -0.009044334292411804, + -0.0299741979688406, + 0.006851669866591692, + -0.051097121089696884, + 0.041360609233379364, + 0.040426693856716156, + -0.04066675901412964, + 0.01568625681102276, + -0.016009517014026642, + -0.004742924124002457, + 0.048653047531843185, + -0.010993007570505142, + 0.039591673761606216, + 0.0007891886634752154, + 0.0280364528298378, + -0.029024146497249603, + -0.07559369504451752, + 0.020083241164684296, + -0.02160985954105854, + 0.00466573191806674, + 0.017442384734749794, + -0.02421753853559494, + 0.05211988091468811, + 0.0016645005671307445, + 0.08051992207765579, + 0.02786155417561531, + 0.022991932928562164, + 0.04568661376833916, + -0.03650546818971634, + -0.061528630554676056, + 0.026744728907942772, + -0.029581749811768532, + -0.04499091953039169, + 0.002152943518012762, + -0.015043909661471844, + 0.047530874609947205, + 0.041445743292570114, + 0.041881438344717026, + 0.03457123413681984, + -0.023201758041977882, + -0.05317879468202591, + -0.020216727629303932, + 0.050812073051929474, + 0.0008769077248871326, + -0.01832399144768715, + 0.026449931785464287, + -0.00376958679407835, + 0.04373340308666229, + -0.015004824846982956, + 0.02940281480550766, + -0.028869349509477615, + -0.02610083483159542, + -0.0077619897201657295, + 0.03147227317094803, + -0.0032539069652557373, + 0.02559952437877655, + 0.02357475273311138, + 0.0001173858399852179, + 0.047926079481840134, + 0.03721074387431145, + -0.019753489643335342, + 0.017013119533658028, + -0.028762344270944595, + -0.005848998669534922, + -0.006997200194746256, + 0.05184704810380936, + -0.05036364868283272, + 0.002436417154967785, + 0.0003822402795776725, + 0.003277599113062024, + 0.03293520584702492, + -0.0045876270160079, + -0.004722336772829294, + -0.019277948886156082, + 0.0012148021487519145, + -0.023630889132618904, + -0.011700155213475227, + -0.006263254676014185, + 0.02274380624294281, + 0.004808057565242052, + 0.03601561859250069, + 0.1043647825717926, + -0.014201398938894272, + -0.016617566347122192, + -0.02272864058613777, + -0.030905725434422493, + 0.0010793384863063693, + -0.049122776836156845, + -0.005753105040639639, + 0.01338914968073368, + 0.027050666511058807, + 0.04214894399046898, + -0.023005545139312744, + 0.031917206943035126, + 0.015109232626855373, + -0.07634111493825912, + 0.024369796738028526, + -0.00647472171112895, + 0.043127499520778656, + -0.0673207938671112, + 0.0822305828332901, + 0.06597486138343811, + 0.004127069376409054, + 0.06724239140748978, + 0.004546293988823891, + 0.03501193970441818, + -0.03256544470787048, + 0.02815675362944603, + -0.018141930922865868, + 0.008695757016539574, + 0.030166303738951683, + -0.009897452779114246, + 0.03613714128732681, + -0.013987270183861256, + -0.02539311721920967, + -0.06444346904754639, + -0.01528739370405674, + -0.06564117968082428, + -0.029204169288277626, + 0.03283213824033737, + -0.018580380827188492, + 0.026825398206710815, + -0.012654704041779041, + -0.0018552436958998442, + -0.01754637062549591, + 0.08191259950399399, + 0.0534062460064888, + -0.027856973931193352, + -0.053807660937309265, + -0.02944841794669628, + -0.014591488987207413, + -0.0004364093765616417, + -0.01691609062254429, + 0.0792316198348999, + 0.004102316685020924, + -0.03579891845583916, + -0.0108651639893651, + -0.00966869480907917, + -0.00033933919621631503, + -0.026145832613110542, + -0.016428470611572266, + 0.030308935791254044, + -0.02421530708670616, + -0.01272093690931797, + 0.0026039716321974993, + 0.029183251783251762, + -0.015125368721783161, + 0.07109745591878891, + 0.02079625427722931, + 0.018054857850074768, + -0.00201214081607759, + 0.028579678386449814, + -0.011699595488607883, + 0.010970978997647762, + -0.008872047066688538, + 0.03169122710824013, + -0.06434084475040436, + 0.03283708542585373, + 0.002920384518802166, + 0.1117773950099945, + 0.0207917969673872, + -0.07546871900558472, + -0.0013152466854080558, + -0.009336701594293118, + 0.00034176796907559037, + 0.06051360070705414, + -0.0378379300236702, + -0.01082307007163763, + -0.009045763872563839, + -0.042135000228881836, + -0.04675054922699928, + 0.0008745589875616133, + 0.022651556879281998, + 0.016891758888959885, + -0.06758315861225128, + 0.03011692874133587, + -0.0689280554652214, + -0.0287728663533926, + -0.016613522544503212, + 0.009947648271918297, + -0.009978469461202621, + 0.016907479614019394, + 0.00691134762018919, + 0.04193537309765816, + -0.0010126983979716897, + -0.01135191135108471, + -0.04884914308786392, + 0.05164073780179024, + 0.06193321943283081, + -0.01663290709257126, + 0.0512203685939312, + 0.00277346046641469, + -0.0766502171754837, + 0.0011106275487691164, + -0.008470023050904274, + 0.03819086030125618, + -0.006837388966232538, + -0.03457418084144592, + -0.06866854429244995, + 0.05483240634202957, + -0.016624240204691887, + 0.028569897636771202, + -0.01400308683514595, + 0.0378500260412693, + 0.007686481345444918, + 0.030080482363700867, + -0.05881612002849579, + 0.015524756163358688, + 0.030225753784179688, + -0.02950134687125683, + 0.01465617585927248, + 0.0009167538373731077, + 0.056613512337207794, + -0.02706410363316536, + -0.0473414771258831, + -0.006343611981719732, + 0.011811802163720131, + 0.007573770359158516, + 0.021041858941316605, + -0.014327406883239746, + 0.01859954372048378, + 0.06863977015018463, + -0.002199358306825161, + -0.03532129153609276, + 0.009050965309143066, + 0.02409159019589424, + -0.0025098449550569057, + -0.00499211298301816, + 0.00033862097188830376, + -0.059937484562397, + 0.10898157954216003, + 0.0318506620824337, + 0.0058680190704762936, + -0.03671310096979141, + -0.03349997103214264, + -0.0349581353366375, + -0.023411044850945473, + -0.011138128116726875, + 0.00608166866004467, + 0.08696430921554565, + -0.008161027915775776, + 0.045368045568466187, + -0.01864445023238659, + 0.035301174968481064, + 0.003979773260653019, + 0.016739632934331894, + 0.011675872839987278, + 0.025817174464464188, + 0.03272102400660515, + 0.013721601106226444, + -0.04690241813659668, + 0.05665350705385208, + -0.05676185339689255, + 0.013739561662077904, + 0.020678944885730743, + -0.03532474488019943, + 0.013464651070535183, + 0.04246523231267929, + 0.017674343660473824, + -0.005077862646430731, + -0.019556084647774696, + -0.009594413451850414, + -0.04825031757354736, + 0.00016230896289926022, + 0.003143883077427745, + 0.031157106161117554, + -0.0332491435110569, + -0.010140872560441494, + -0.04249225929379463, + -0.028256090357899666, + -0.008995918557047844, + 0.021160980686545372, + 0.03130994364619255, + -0.001629085629247129, + 0.02819039300084114, + 0.009931858628988266, + 0.05051739886403084, + 0.006850008387118578, + -0.018732454627752304, + -0.09201951324939728, + -0.042829085141420364, + -0.054845187813043594, + 0.021562401205301285, + 0.05139476805925369, + -0.011137720197439194, + 0.021749140694737434, + -0.01837606355547905, + 0.017083071172237396, + 0.023444999009370804, + -0.009212017990648746, + -0.011377239599823952, + -0.018595905974507332, + -0.09953112155199051, + 0.0019816216081380844, + -0.004408092238008976, + -0.027098996564745903, + -0.002341427141800523, + 0.029412943869829178, + 0.041135817766189575, + 0.015357858501374722, + 0.0017108122119680047, + 0.028364799916744232, + 0.004185053985565901, + -0.04822831228375435, + 0.0948278158903122, + -0.0414741188287735, + 0.03572544828057289, + 0.04517536610364914, + 0.009942572563886642, + -0.025769881904125214, + -0.017749540507793427, + -0.03137620911002159, + -0.00617972994223237, + 0.04517003893852234, + -0.03762981668114662, + 0.0358721986413002, + 0.11737086623907089, + 0.0004563555121421814, + -0.06275733560323715, + -0.011418106034398079, + -0.08354005962610245, + -0.048093460500240326, + -0.030154218897223473, + -0.010961515828967094, + 0.007697841618210077, + -0.08613990992307663, + 0.01947987824678421, + 0.017181523144245148, + 0.02698543854057789, + 0.040134966373443604, + -0.03223738074302673, + -0.03745822235941887, + 0.054219458252191544, + -0.03571298345923424, + -0.035921428352594376, + 0.04604002833366394, + -0.04030536487698555, + -0.032255616039037704, + -0.06959861516952515, + 0.032114237546920776, + -0.027767114341259003, + 0.019928939640522003, + 0.022700339555740356, + -0.04375129193067551, + -0.05712258815765381, + -0.02559071220457554, + 0.006574000231921673, + -0.025430524721741676, + -0.028035728260874748, + -0.04453514888882637, + -0.10015997290611267, + -0.0672021210193634, + 0.012007188983261585, + 0.014830735512077808, + 0.00970692653208971, + -0.051091041415929794, + -0.031854890286922455, + -0.025864001363515854, + -0.016826078295707703, + -0.0026011785957962275, + -0.05528291314840317, + 0.04440443217754364, + -0.03783581778407097, + -0.09876326471567154, + 0.029767369851469994, + -0.0023010883014649153, + -0.05689188838005066, + 0.007344242185354233, + 0.009551416151225567, + -0.08018877357244492, + 0.007934950292110443, + -0.03663663938641548, + -0.0009275389602407813, + 0.026911364868283272, + -0.001246148720383644, + 0.054533813148736954, + 0.009721122682094574, + 0.005318093113601208, + -0.00535608222708106, + 0.008591657504439354, + 0.04166155681014061, + -0.03877246752381325, + 0.01399280782788992, + 0.01457316055893898, + -0.02407732978463173, + -0.006206234451383352, + 0.07004162669181824, + 0.01619933731853962, + -0.004103302024304867, + 0.045894261449575424, + 0.03686122968792915, + 0.04804258793592453, + 0.05579492822289467, + -0.0030228029936552048, + -0.07259590923786163, + 0.023546412587165833, + -0.0011336577590554953, + 0.04003886505961418, + -0.0139979999512434, + -0.00017333473078906536, + -0.05454326048493385, + -0.021969035267829895, + -0.03901325911283493, + 0.012417349964380264, + -0.05304381251335144, + 0.004690664820373058, + -0.06269649416208267, + 0.037584088742733, + -0.039430856704711914, + 0.01138926949352026, + 0.024504829198122025, + -0.023247476667165756, + -0.001942053553648293, + 0.01666364073753357, + 0.005435148254036903, + -0.026095639914274216, + -0.002955301431939006, + -0.043733760714530945, + -0.06700831651687622, + -0.06828623265028, + 0.045247308909893036, + -0.02567214146256447, + -0.03503000736236572, + 0.0028738975524902344, + 0.007734893821179867, + 0.041422292590141296, + -0.01760552078485489, + -0.016545895487070084, + -0.007150533143430948, + 0.02324298955500126, + 0.009319701232016087, + 0.003866465063765645, + 0.025515582412481308, + 0.03634219616651535, + -0.033345021307468414, + 0.020966341719031334, + 0.07540836185216904, + -0.0002573730598669499, + 0.05161430686712265, + 0.0003421941655687988, + -0.016416313126683235, + -0.018457459285855293, + -0.04053647443652153, + -0.008631067350506783, + 0.011850157752633095, + 0.014364494942128658, + -0.029469167813658714, + -0.0497945100069046, + 0.008882390335202217, + 0.04006727412343025, + 0.010511534288525581, + -0.03620539978146553, + -0.030285054817795753, + -0.10150802135467529, + -0.01594187133014202, + -0.05266118794679642, + -0.017643682658672333, + 0.017300395295023918, + -0.080828458070755, + 0.027883851900696754, + 0.0260021835565567, + -0.013791227713227272, + 0.01261923462152481, + 0.02038010023534298, + -0.04244862496852875, + 0.011010567657649517, + 0.011758117005228996, + 0.0030098427087068558, + 0.003606629790738225, + 0.0020418025087565184, + -0.0004243037255946547, + 0.03312380611896515, + 0.02103457599878311, + 0.03972248733043671, + -0.07605717331171036, + -0.039676181972026825, + -0.07193399965763092, + -0.030211182311177254, + -0.0786738321185112, + -0.025149084627628326, + -0.0017661137972027063, + -0.017345670610666275, + 0.027870142832398415, + -0.003350367769598961, + 0.060104407370090485, + 0.020051708444952965, + 0.014986025169491768, + 0.0056610992178320885, + 0.0593392550945282, + -0.0053368182852864265, + -0.025954807177186012, + -0.07451668381690979, + -0.0021227921824902296, + -0.03457536920905113, + 0.010495556518435478, + -0.0450577586889267, + -0.016477687284350395, + 0.05702868103981018, + 0.025161782279610634, + 0.016737932339310646, + -0.00856244657188654, + 0.01180358324199915, + -0.04093103110790253, + -0.03350433334708214, + -0.01662720926105976, + 0.002310116309672594, + 0.0026625224854797125, + -0.04226106405258179, + 0.02971433289349079, + -0.07220850884914398, + -0.0009123267373070121, + -0.02786707505583763, + -0.025111757218837738, + 0.033243875950574875, + 0.03572067618370056, + -0.0019114067545160651, + -0.030592206865549088, + -0.021260922774672508, + 0.06805034726858139, + 0.00013317271077539772, + 0.006557960994541645, + -0.0329759456217289, + 0.006780629511922598, + 0.00866342056542635, + -0.01449753437191248, + 0.028198137879371643, + 0.019570309668779373, + -0.036116212606430054, + 0.058676715940237045, + 0.021564209833741188, + -0.026323653757572174, + -0.02302497997879982, + 0.02744974195957184, + 0.05593085661530495, + 0.02073318511247635, + -0.074001245200634, + -0.0184424240142107, + 0.031868427991867065, + 0.018198778852820396, + -0.0450170636177063, + 0.030543111264705658, + -0.06377965956926346, + 0.04590768367052078, + -0.005907150451093912, + -0.01748581975698471, + 0.006950956769287586, + 0.05506323277950287, + 0.04594920575618744, + -0.015593858435750008, + -0.017087753862142563, + 0.029356854036450386, + -0.06531023979187012, + 0.005519233178347349 + ], + "content_embedding": [ + 0.019554156810045242, + 0.04886673390865326, + 0.00547438021749258, + -0.02931770123541355, + 0.024625789374113083, + 0.008072949014604092, + 0.03052533231675625, + -0.006179450079798698, + -0.0333777479827404, + -0.021061548963189125, + -0.05983448028564453, + 0.025203319266438484, + 0.032834798097610474, + 0.013029776513576508, + -0.011046950705349445, + -0.031452618539333344, + -8.028985030250624e-05, + 0.0007977305795066059, + -0.009920830838382244, + -0.0018231356516480446, + -0.006695937365293503, + -0.014926698990166187, + -0.009614776819944382, + -0.05784115567803383, + 0.02651236765086651, + -0.011027690954506397, + -0.009276495315134525, + 0.04284600168466568, + -0.05643690750002861, + 0.04219788312911987, + 0.05048283189535141, + 0.004557965788990259, + -0.01679980754852295, + -0.040684137493371964, + -0.044776126742362976, + -0.018855446949601173, + 0.06871335208415985, + -0.014481916092336178, + -0.024082450196146965, + 0.04807353392243385, + 0.04378245398402214, + -0.0010759941069409251, + 0.014099782332777977, + -0.037625934928655624, + 0.05659622326493263, + 0.01868855021893978, + 0.025368744507431984, + -0.03537469357252121, + 0.014213587157428265, + -0.0705343633890152, + -0.016253503039479256, + -0.005150542128831148, + -0.017522070556879044, + -0.03862348571419716, + 0.00953003577888012, + 0.016891248524188995, + -0.007589798420667648, + 0.006262748036533594, + 0.01169696543365717, + -0.05435675010085106, + 0.03128333017230034, + 0.07449059188365936, + -0.043514277786016464, + 0.022657765075564384, + 0.01074683852493763, + -0.051405169069767, + 0.00816179346293211, + -0.014555123634636402, + -0.01839461177587509, + -0.00691940588876605, + 0.009614893235266209, + -0.0071630412712693214, + 0.02593475580215454, + 0.07137756794691086, + -0.049324654042720795, + -0.04460940882563591, + -0.004007663112133741, + -0.018305329605937004, + -0.029537511989474297, + 0.03624692186713219, + 0.04646339640021324, + -0.053126320242881775, + -0.008358806371688843, + -0.001911070430651307, + 0.05718495324254036, + -0.02093559131026268, + -0.028956729918718338, + 0.005082732532173395, + -0.028617851436138153, + 0.1116873174905777, + 0.006924519315361977, + 0.05388922244310379, + 0.04239538311958313, + 0.03629518672823906, + -0.07756507396697998, + 0.08572934567928314, + -0.034708015620708466, + -0.052686456590890884, + -0.03376411274075508, + -0.011243571527302265, + -0.028565097600221634, + 0.03532436862587929, + 0.025328388437628746, + -0.009712263941764832, + 0.0416463203728199, + 0.02975877933204174, + 0.04073808714747429, + 0.045180853456258774, + 0.01522457879036665, + 0.0021614122670143843, + -0.0602865032851696, + -0.024015003815293312, + -0.032889459282159805, + 0.020978014916181564, + -0.04493942856788635, + 0.012820002622902393, + 0.0644337609410286, + -0.019941547885537148, + -0.004083186853677034, + 0.018924430012702942, + -0.017330998554825783, + 0.0008878704975359142, + 0.02702835015952587, + -0.026265576481819153, + 0.04109559580683708, + -0.009575187228620052, + -0.020085688680410385, + -5.433974365587346e-05, + 0.018562229350209236, + -0.02393198385834694, + -0.030048735439777374, + 0.01821220852434635, + -0.029501279816031456, + -0.01944204978644848, + -0.01614498719573021, + 0.03840102627873421, + -0.04210539907217026, + -0.050642915070056915, + 0.026918280869722366, + -0.008575397543609142, + 0.011357792653143406, + 0.015665695071220398, + 0.01620817743241787, + -0.0165872685611248, + 0.028748027980327606, + -0.10036404430866241, + -0.04525408893823624, + 0.010306733660399914, + -0.04815903678536415, + 0.025287121534347534, + -0.010369544848799706, + 0.023846469819545746, + 0.016379185020923615, + 0.019874077290296555, + 0.004489564802497625, + -0.009458004496991634, + -0.032719556242227554, + -0.0319439135491848, + 0.06740261614322662, + 0.005906871519982815, + 0.024023521691560745, + 0.04657802730798721, + -0.08182766288518906, + 0.062149085104465485, + -0.0061394101940095425, + 0.014895983040332794, + 0.029691752046346664, + 0.0031379619613289833, + 0.04791149124503136, + 0.06459061801433563, + -0.012314707040786743, + 0.016569096595048904, + -0.01054114755243063, + -0.027581606060266495, + 0.02896907925605774, + 0.0048512346111238, + 0.04427911341190338, + -0.026665540412068367, + 0.07613496482372284, + -0.023571502417325974, + -0.004409146960824728, + -0.036679890006780624, + 0.0016537840710952878, + 0.024527747184038162, + -0.02983722649514675, + 0.0021592022385448217, + 0.005430649966001511, + -0.08198896050453186, + -0.017340485006570816, + -0.014370240271091461, + 0.012875599786639214, + -0.008910057134926319, + -0.013228043913841248, + -0.0023926664143800735, + -0.015292157419025898, + -0.03927253186702728, + 0.07715654373168945, + -0.012554320506751537, + 0.03217530623078346, + -0.042463112622499466, + -0.03743144869804382, + 0.008893481455743313, + -0.0666876956820488, + 0.02818153239786625, + -0.009054000489413738, + 0.03337392210960388, + 0.03328379616141319, + 0.009759150445461273, + 0.01269217487424612, + -0.02173757553100586, + 0.07456912100315094, + 0.006997218355536461, + 0.007162875030189753, + 0.040701836347579956, + -0.04538433253765106, + -0.0032951829489320517, + 0.002400761004537344, + -0.04169681295752525, + -0.05523253232240677, + 0.06444490700960159, + 0.03810356929898262, + -0.035072412341833115, + 0.04415622353553772, + -0.02322838269174099, + -0.009917438961565495, + 0.008140898309648037, + -0.02388846129179001, + 0.0034457307774573565, + -0.0054973880760371685, + 0.024084730073809624, + -0.007739664521068335, + -0.06571771949529648, + 0.07359053194522858, + -0.011194998398423195, + 0.0211270023137331, + 0.017875710502266884, + -0.01821357198059559, + 0.03745369240641594, + 0.023267023265361786, + 0.07556653767824173, + 0.042081139981746674, + 0.01543054636567831, + 0.038234904408454895, + -0.019599558785557747, + -0.08676281571388245, + -0.013498742133378983, + -0.011986842378973961, + -0.014071784913539886, + -0.03772296756505966, + 0.009192361496388912, + -0.020469751209020615, + -0.004528891295194626, + 0.003913218155503273, + 0.013755199499428272, + 0.009065939113497734, + -0.049990858882665634, + 0.030110390856862068, + 0.05044790729880333, + 0.020461000502109528, + -0.036168310791254044, + 0.04875757917761803, + -0.023571951314806938, + 0.04313709959387779, + 0.02649555914103985, + 0.0055029122158885, + 0.0138795655220747, + -0.0009625149541534483, + -0.019915221258997917, + -0.0024100886657834053, + -0.00426845159381628, + 0.0008668623049743474, + 0.014360995963215828, + 0.01953921653330326, + 0.05843087658286095, + 0.010600714012980461, + -0.01941727101802826, + 0.016723858192563057, + -0.027911949902772903, + 0.011009825393557549, + -0.0070054735988378525, + -0.02280472218990326, + -0.0902462899684906, + -0.029843643307685852, + -0.005726281087845564, + -0.007387514691799879, + 0.05724000930786133, + -0.016834275797009468, + 0.015216480940580368, + 0.02209043875336647, + 0.004264513496309519, + 0.019342858344316483, + 0.018849363550543785, + -0.04794900864362717, + 0.02397482842206955, + -0.007828679867088795, + 0.020298736169934273, + 0.09897984564304352, + -0.04056645929813385, + -0.011319068260490894, + 0.0034366592299193144, + -0.0073711141012609005, + -0.005176732316613197, + 0.022680383175611496, + -0.01522906869649887, + 0.002589346142485738, + -0.016521241515874863, + 0.021019242703914642, + -0.004480182658880949, + 0.009419859386980534, + 0.013903859071433544, + -0.05053260177373886, + 0.0012486587511375546, + 0.017442021518945694, + -0.016688739880919456, + -0.034772295504808426, + 0.03298048675060272, + 0.04604269936680794, + -0.008270222693681717, + 0.024096740409731865, + 0.022777941077947617, + 0.03941553831100464, + -0.030161242932081223, + -0.01781023107469082, + -0.03533001244068146, + 0.02088671736419201, + 0.0896298736333847, + 0.0061553712002933025, + 0.054883576929569244, + -0.04094908386468887, + 0.04382561892271042, + -0.003113290062174201, + 0.015607516281306744, + -0.04469290003180504, + -0.013229981996119022, + -0.0026423041708767414, + -0.017652839422225952, + 0.005086386110633612, + 0.035375673323869705, + 9.725322161102667e-05, + -0.026332346722483635, + 0.01958891935646534, + 0.041558533906936646, + -0.02570311166346073, + -0.04806942865252495, + -0.035452235490083694, + -0.015692519024014473, + -0.0027702273800969124, + 0.022831231355667114, + 0.05247980356216431, + 0.0010726081673055887, + -0.025438379496335983, + 0.006880710367113352, + 0.009276037104427814, + -0.021758491173386574, + -0.008571256883442402, + 0.003312689019367099, + 0.032590776681900024, + 0.02640446089208126, + 0.0034450399689376354, + 0.028627941384911537, + 0.07457757741212845, + -0.012303084135055542, + 0.045139290392398834, + -3.2679530704626814e-05, + 0.024825602769851685, + 0.04402294382452965, + 0.025363540276885033, + 0.03840187191963196, + 0.019677763804793358, + -0.03521053493022919, + 0.036523785442113876, + -0.06001857668161392, + 0.006586587987840176, + 0.022668125107884407, + 0.06063239648938179, + 0.015586800873279572, + -0.08306828886270523, + -0.04289577156305313, + -0.0050475094467401505, + -0.0309798214584589, + 0.05810924991965294, + -0.017818300053477287, + -0.01088999304920435, + -0.017884155735373497, + 0.04356890171766281, + -0.023094575852155685, + -0.04477296397089958, + -0.0007801170577295125, + 0.018146106973290443, + -0.05633535981178284, + 0.006709645502269268, + -0.037334758788347244, + -0.055724598467350006, + 0.00994165986776352, + -0.009849119931459427, + -0.027259083464741707, + -0.02565668150782585, + 0.005235382355749607, + 0.016267497092485428, + 0.00393668282777071, + -0.05648971349000931, + -0.05955129489302635, + 0.026542942970991135, + 0.040565431118011475, + -0.02225298061966896, + 0.0017030639573931694, + -0.02689032256603241, + -0.029025251045823097, + 0.030817490071058273, + -0.007936912588775158, + 0.05566547438502312, + 0.0174697358161211, + -0.014709461480379105, + -0.07380940765142441, + 0.07026955485343933, + -0.024563433602452278, + 0.05333513021469116, + 0.020963717252016068, + -0.015575055032968521, + -0.04304461553692818, + 0.00822180975228548, + -0.013204170390963554, + -0.0028262599371373653, + 0.015431943349540234, + -0.025627007707953453, + 0.0006762628327123821, + -0.02078782208263874, + 0.009704814292490482, + -0.006950112525373697, + -0.020425891503691673, + 0.044901806861162186, + 0.020927794277668, + 0.009534145705401897, + 0.004958992823958397, + -0.037563592195510864, + 0.03806327283382416, + 0.0783824622631073, + 0.011150919832289219, + -0.024385575205087662, + 0.03461897745728493, + 0.02127663977444172, + -0.012272517196834087, + -0.01546854991465807, + -0.06705902516841888, + -0.01649612747132778, + 0.06068763509392738, + 0.07799869775772095, + 0.0014717800077050924, + -0.04836009815335274, + -0.026833070442080498, + 0.013509837910532951, + 0.04280327260494232, + 0.009658309631049633, + -0.007854060269892216, + 0.09166036546230316, + -0.00760420598089695, + 0.024465130642056465, + -0.041591379791498184, + 0.007116211112588644, + 0.05567977577447891, + 0.01807284727692604, + 0.028304288163781166, + -0.014866949990391731, + 0.026369474828243256, + 0.008983064442873001, + -0.02317068539559841, + 0.03937782347202301, + -0.01901034638285637, + -0.0021325594279915094, + 0.030924763530492783, + -0.020755570381879807, + 0.030001072213053703, + 0.03809978440403938, + 0.0334426648914814, + -0.042968180030584335, + -0.012311535887420177, + 0.03697645664215088, + -0.041293930262327194, + 0.01696925237774849, + 0.03560850769281387, + 0.03847989812493324, + -0.037730954587459564, + -0.05352506786584854, + -0.02746652066707611, + -0.05294184759259224, + -0.017248092219233513, + -0.005418767221271992, + 0.01951681822538376, + -0.017932193353772163, + -0.007422131486237049, + 0.03827866166830063, + 0.05701953545212746, + -0.02210610918700695, + 0.010034722276031971, + -0.07954911887645721, + -0.0485968291759491, + -0.028629625216126442, + 0.009054362773895264, + 0.02398092858493328, + -0.009973667562007904, + 0.0011409823782742023, + -0.011182617396116257, + 0.0020028105936944485, + 0.006942914333194494, + -0.039862822741270065, + -0.0066703930497169495, + -0.004236259963363409, + -0.1050848662853241, + 0.025538505986332893, + 0.029989799484610558, + 0.01211432833224535, + 0.04559238627552986, + 0.050545401871204376, + 0.05476491525769234, + 0.01163802482187748, + -0.054837070405483246, + 0.07410066574811935, + 0.0024028397165238857, + -0.10432479530572891, + 0.07078853249549866, + -0.012614017352461815, + 0.06030529364943504, + 0.054839566349983215, + 0.014005501754581928, + -0.1118561178445816, + 0.009275965392589569, + -0.023663705214858055, + -0.028527697548270226, + 0.0584726445376873, + -0.045622922480106354, + 0.03320262208580971, + 0.11574956774711609, + 0.007655338849872351, + -0.07191741466522217, + 0.03859880194067955, + -0.06297247856855392, + -0.018405890092253685, + -0.028816718608140945, + -0.0126464469358325, + 0.0202946774661541, + -0.03483844920992851, + 0.024250855669379234, + -0.006263037212193012, + 0.02315174601972103, + -0.0037849699147045612, + -0.0359908789396286, + -0.037620242685079575, + 0.05056930333375931, + -0.006831671576946974, + -0.10494183748960495, + 0.07645728439092636, + -0.028021620586514473, + 0.009692930616438389, + -0.10232461243867874, + 0.05202733352780342, + -0.05352945625782013, + 0.02073156274855137, + -0.017188169062137604, + 0.0020886484999209642, + -0.04133611172437668, + -0.044309552758932114, + 0.018564041703939438, + 0.026704275980591774, + -0.055197130888700485, + 0.007287430576980114, + -0.1245347261428833, + -0.037194203585386276, + 0.06665091216564178, + 0.061706364154815674, + 0.01804385520517826, + -0.013962237164378166, + -0.024994580075144768, + -0.011294208467006683, + -0.009245212189853191, + 0.029879143461585045, + -0.046995047479867935, + 0.02158532105386257, + -0.002750945743173361, + -0.08373189717531204, + 0.006320012733340263, + -0.029947226867079735, + -0.09511808305978775, + 0.03186006098985672, + 0.0028628590516746044, + -0.02251911163330078, + 0.03152289241552353, + -0.07237773388624191, + 0.013269931077957153, + 0.09650041908025742, + -0.045900944620370865, + -0.006541605107486248, + 0.006844623479992151, + 0.03238837793469429, + -0.020313216373324394, + -0.013520441018044949, + 0.000403873244067654, + -0.024447409436106682, + -0.014962681569159031, + 0.006884200032800436, + -0.04582136869430542, + -0.02838127687573433, + 0.02936256304383278, + 0.04077419266104698, + 0.02266734093427658, + 0.013416043482720852, + 0.0345231331884861, + 0.052821315824985504, + 0.028492338955402374, + -0.01292923279106617, + -0.03745116665959358, + 0.0028044944629073143, + 0.006625712383538485, + 0.017212992534041405, + -0.00239459122531116, + -0.002529381774365902, + -0.029911531135439873, + 0.029142336919903755, + -0.07905209809541702, + 0.01719854585826397, + -0.03854485601186752, + -0.0116807846352458, + -0.046873632818460464, + 0.05275706946849823, + -0.04152297228574753, + -0.03727864846587181, + 0.048614900559186935, + -0.010790652595460415, + -0.0024877521209418774, + 0.012286793440580368, + 0.08292551338672638, + 0.03208302706480026, + 0.02867130935192108, + -0.039545897394418716, + -0.02800118923187256, + -0.07673710584640503, + 0.005896291229873896, + -0.030191265046596527, + -0.0187264122068882, + -0.03647911176085472, + 0.0371234230697155, + 0.05115009844303131, + -0.04013253375887871, + -0.03304912894964218, + 0.05173036456108093, + 0.04860515147447586, + -0.00717319268733263, + 0.045631736516952515, + 0.04671349748969078, + 0.004948618821799755, + 0.009837846271693707, + 0.04381090775132179, + 0.0918705016374588, + -0.005869758781045675, + 0.02249985560774803, + 0.010875782929360867, + 0.02956228516995907, + 0.0036174776032567024, + 0.011353887617588043, + -0.05186513066291809, + 0.0173268411308527, + 0.011808891780674458, + -0.00798418466001749, + -0.00929324608296156, + -0.004763087723404169, + 0.010464239865541458, + -0.001494695316068828, + -0.024331238120794296, + -0.042497288435697556, + -0.049635156989097595, + 0.006581253372132778, + -0.05040008947253227, + -0.01882144808769226, + -0.02419630065560341, + -0.06420613825321198, + 0.020046783611178398, + 0.024303266778588295, + -0.009806456044316292, + -0.003666533390060067, + 0.0018573087872937322, + -0.02440156601369381, + 0.0059090061113238335, + -0.03926969692111015, + 0.011777290143072605, + -0.032393939793109894, + -0.0015500712906941772, + 0.019816264510154724, + 0.0037668957374989986, + -0.0033931683283299208, + -0.01164526678621769, + -0.07331964373588562, + -0.008798380382359028, + -0.016916625201702118, + -0.034663647413253784, + -0.05818398669362068, + 0.015174336731433868, + -0.010697754099965096, + -0.04179975762963295, + 0.0064012641087174416, + 0.01987038180232048, + 0.019733907654881477, + -0.0013441460905596614, + 0.05315450206398964, + -0.021319502964615822, + 0.05351290851831436, + 0.052106473594903946, + -0.010152475908398628, + -0.017445174977183342, + 0.030323222279548645, + 0.02796917036175728, + 0.0026626174803823233, + -0.03001641295850277, + 0.014342408627271652, + 0.03061019256711006, + -0.00294340867549181, + -0.018154315650463104, + 0.02443081885576248, + 0.003663803683593869, + -0.023388244211673737, + -0.0018819351680576801, + 0.0010939103085547686, + 0.03557095676660538, + -0.053037356585264206, + -0.06498610228300095, + 0.04878298565745354, + -0.03413922339677811, + -0.02763182483613491, + 0.0009715812630020082, + 0.00486012501642108, + 0.03292006626725197, + 0.023539533838629723, + 0.011763699352741241, + -0.026951594278216362, + -0.03602864220738411, + 0.04788520559668541, + -0.04133124649524689, + -0.013166938908398151, + -0.037222955375909805, + 0.014670289121568203, + -0.056680355221033096, + -0.008314837701618671, + 0.04227377846837044, + -0.04986898973584175, + -0.03443481773138046, + -0.00174588686786592, + 0.0027100981678813696, + -0.001079584937542677, + -0.06199319660663605, + 0.03100454993546009, + 0.06752201914787292, + 0.028809374198317528, + -0.06289442628622055, + -0.005715612787753344, + -0.052428603172302246, + -0.01548265665769577, + -0.048164043575525284, + 0.04651368409395218, + 0.00703784916549921, + 0.0493292361497879, + 0.024252086877822876, + -0.03811171278357506, + 0.030049748718738556, + 0.02749229036271572, + -0.0005660666502080858, + 0.009821311570703983, + 0.01306783128529787, + 0.01775788515806198, + -0.051085181534290314, + 0.028779184445738792 + ], + "chunk_ind": 0 + }, + { + "url": "https://docs.danswer.dev/more/use_cases/sales", + "title": "Sales", + "content": "Keep your team up to date on every conversation and update so they can close.\n\nRecall Every Detail\nBeing able to instantly revisit every detail of any call without reading transcripts is helping Sales teams provide more tailored pitches, build stronger relationships, and close more deals. Instead of searching and reading through hours of transcripts in preparation for a call, your team can now ask Danswer \"What specific features was ACME interested in seeing for the demo\". Since your team doesn't have time to read every transcript prior to a call, Danswer provides a more thorough summary because it can instantly parse hundreds of pages and distill out the relevant information. Even for fast lookups it becomes much more convenient - for example to brush up on connection building topics by asking \"What rapport building topic did we chat about in the last call with ACME\".\n\nKnow Every Product Update\nIt is impossible for Sales teams to keep up with every product update. Because of this, when a prospect has a question that the Sales team does not know, they have no choice but to rely on the Product and Engineering orgs to get an authoritative answer. Not only is this distracting to the other teams, it also slows down the time to respond to the prospect (and as we know, time is the biggest killer of deals). With Danswer, it is even possible to get answers live on call because of how fast accessing information becomes. A question like \"Have we shipped the Microsoft AD integration yet?\" can now be answered in seconds meaning that prospects can get answers while on the call instead of asynchronously and sales cycles are reduced as a result.", + "title_embedding": [ + 0.008453648537397385, + 0.049128592014312744, + 0.0009390072082169354, + -0.011420674622058868, + 0.009472657926380634, + 0.05824451148509979, + -0.04129518195986748, + -0.018892904743552208, + -0.007598293945193291, + -0.03804052621126175, + -0.003747896058484912, + 0.005537204910069704, + 0.026590371504426003, + -0.02672540210187435, + 0.02651272714138031, + -0.033856429159641266, + 0.03809495270252228, + -0.013283955864608288, + -0.00013869917893316597, + 0.020309027284383774, + -0.011650309897959232, + -0.02103874459862709, + 0.01066586747765541, + -0.057243604212999344, + 0.031903959810733795, + -0.012924387119710445, + -0.012852400541305542, + 0.01951044611632824, + 0.018149282783269882, + -0.01314238179475069, + 0.01411629281938076, + -0.009169652126729488, + 0.017607972025871277, + -0.0685962364077568, + -0.03812728449702263, + -0.06783904880285263, + 0.009403989650309086, + 0.0073580555617809296, + 0.024488259106874466, + 0.0005618860013782978, + 0.0418679341673851, + 0.01953919045627117, + -0.05312833935022354, + -0.024998614564538002, + 0.017944931983947754, + -0.004367160610854626, + 0.023977765813469887, + 0.012059491127729416, + 0.049591515213251114, + -0.05067070946097374, + 0.001997043378651142, + 0.0024496091064065695, + -0.029425427317619324, + -0.01783224567770958, + 0.0016368982614949346, + 0.006662128958851099, + 0.0406189002096653, + 0.011542750522494316, + 0.023075561970472336, + -0.012605642899870872, + 0.010134617798030376, + 0.054372530430555344, + -0.008662662468850613, + 0.016197390854358673, + 0.015719175338745117, + -0.024816671386361122, + -0.006610441952943802, + 0.015418685972690582, + -0.011234346777200699, + -0.003028685925528407, + 0.09540237486362457, + 0.026426734402775764, + 0.03904794156551361, + 0.025213684886693954, + -0.01716603711247444, + -0.03701924905180931, + 0.0097318384796381, + 0.003443458816036582, + -0.022713838145136833, + 0.029555244371294975, + -0.0012461059959605336, + -0.02693314291536808, + 0.027585584670305252, + 0.00919498410075903, + 0.0784342810511589, + -0.004147294908761978, + 0.0058822669088840485, + -0.03257093206048012, + -0.0194808728992939, + -0.015468079596757889, + -0.007020206656306982, + 0.017711102962493896, + 0.09179206192493439, + 0.07245390862226486, + -0.08327075839042664, + -0.01777547225356102, + -0.0036551833618432283, + 0.005220785271376371, + -0.013074155896902084, + -0.05137576162815094, + -0.03984086588025093, + -0.001254269853234291, + 0.0388508178293705, + -0.026559771969914436, + 0.0941508412361145, + 0.028184799477458, + 0.030329154804348946, + -0.015901995822787285, + -0.05033569037914276, + -0.040444258600473404, + -0.08671051263809204, + 0.016047460958361626, + -0.015745067968964577, + 0.036006249487400055, + -0.019317714497447014, + 0.0028998597990721464, + 0.08916892111301422, + -0.06901372224092484, + 0.05473657324910164, + 0.06781236082315445, + 0.016306273639202118, + 0.0011640831362456083, + 0.0008445090497843921, + 0.06246241182088852, + -0.020153285935521126, + -0.011525464244186878, + -0.043366242200136185, + 0.036528658121824265, + 0.012839822098612785, + -0.0585474967956543, + 0.04394524171948433, + 0.017970293760299683, + 0.0273651871830225, + -0.006580607499927282, + 0.04960521310567856, + 0.04129025712609291, + -0.039038222283124924, + -0.007922167889773846, + 0.06417153030633926, + 0.00870921928435564, + 0.04419026896357536, + 0.03394830971956253, + 0.04194091632962227, + -0.009943729266524315, + -0.026762165129184723, + -0.06321611255407333, + 0.018493760377168655, + 0.02112971432507038, + -0.008362037129700184, + 0.0030741533264517784, + 0.02977512590587139, + 0.007839385420084, + -0.030427763238549232, + -0.005435082130134106, + -0.023782288655638695, + -0.06198855862021446, + -0.006325197406113148, + 0.03481286019086838, + 0.01096314936876297, + -0.008224114775657654, + 0.016886647790670395, + 0.018134206533432007, + -0.02693709172308445, + 0.08969569951295853, + -0.03854775056242943, + -0.036120977252721786, + 0.003924157004803419, + 0.05902013182640076, + 0.02604551427066326, + 0.04082872346043587, + -0.006722352001816034, + -0.011970511637628078, + -0.014307579025626183, + 0.0019104834645986557, + 0.05876787751913071, + -0.002502535469830036, + 0.027572669088840485, + 0.027537895366549492, + -0.026239819824695587, + -0.02237943559885025, + 0.026839105412364006, + -0.04806261509656906, + 0.022188611328601837, + 0.05301826074719429, + -0.017422696575522423, + -0.04489205405116081, + 0.04667934030294418, + -0.03202678635716438, + -0.022968970239162445, + 0.019313516095280647, + -0.06724600493907928, + 0.023997649550437927, + 0.02574523165822029, + -0.021134337410330772, + -0.018166225403547287, + -0.02959403023123741, + 0.038194961845874786, + 0.009773771278560162, + 0.026523802429437637, + -0.014497771859169006, + 0.009200031869113445, + 0.01488631684333086, + -0.10185936093330383, + 0.018728939816355705, + -0.01575741171836853, + 0.02251303941011429, + 0.02899281494319439, + -0.04970388114452362, + 0.007716581225395203, + 0.06781720370054245, + 0.07015375792980194, + -0.02934109978377819, + -0.006221897434443235, + 0.012976646423339844, + -0.00737345777451992, + -0.02422930672764778, + -0.03612032160162926, + 0.003943525720387697, + -0.02053997851908207, + 0.01488402672111988, + -0.016384800896048546, + -0.02631048858165741, + 0.0029128696769475937, + 0.0012878051493316889, + 0.029553934931755066, + 0.06647666543722153, + -0.014463554136455059, + 0.04724975302815437, + 0.01416710950434208, + 0.0259545985609293, + -0.019878843799233437, + -0.04123354330658913, + -0.003678504843264818, + -0.015237071551382542, + -0.037017468363046646, + -0.014126508496701717, + 0.037044424563646317, + 0.041153766214847565, + 0.034035731106996536, + 0.0518031120300293, + -0.004720885772258043, + -0.01163511723279953, + 0.07213476300239563, + -0.06036211922764778, + -0.03499453514814377, + -0.006943386513739824, + -0.06392820924520493, + -0.013479998335242271, + 0.01668090932071209, + 0.030153054744005203, + 0.04787909612059593, + 0.042566943913698196, + 0.01869821362197399, + 0.05060578137636185, + -0.005738548934459686, + 0.0004935020115226507, + -0.04920157790184021, + 0.05485580489039421, + -0.02717270515859127, + -0.026000261306762695, + 0.017413917928934097, + 0.00194182014092803, + 0.06368009001016617, + -0.021680809557437897, + 0.011896755546331406, + 0.008441813290119171, + -0.009322874248027802, + 0.004058067686855793, + 0.003404452698305249, + -0.0070596663281321526, + -0.01350175030529499, + 0.027827585116028786, + -0.017853371798992157, + 0.05100760981440544, + -0.01331804133951664, + -0.021219315007328987, + 0.012195413932204247, + -0.04513333737850189, + 0.022477995604276657, + 0.004410626832395792, + 0.033304695039987564, + 0.023220136761665344, + 0.00041832958231680095, + 0.007724999450147152, + 0.0359807088971138, + 0.010411631315946579, + 0.0007441110792569816, + -0.018354782834649086, + -0.030612032860517502, + 0.04444800317287445, + -0.004541076719760895, + -0.012099254876375198, + 0.03223736584186554, + -0.017639242112636566, + 0.012390367686748505, + 0.055463794618844986, + 0.09133724123239517, + -0.028237899765372276, + -0.026783155277371407, + -0.029024433344602585, + 0.014482105150818825, + 0.05629871413111687, + -0.03724139928817749, + 0.008170249871909618, + 0.06597549468278885, + 0.051776643842458725, + 0.042193781584501266, + -0.01338224858045578, + 0.03543481230735779, + 0.0065676262602210045, + -0.04679378867149353, + 0.048750247806310654, + -0.01348006259649992, + 0.06560897082090378, + -0.10058096796274185, + 0.06226775795221329, + 0.06525543332099915, + -0.020321687683463097, + 0.05926727131009102, + -0.023439910262823105, + 0.00998155027627945, + -0.04136430844664574, + 0.04513855278491974, + -0.07410337775945663, + -0.0032536713406443596, + 0.022534336894750595, + -0.0035887588746845722, + 0.018703486770391464, + -0.023037323728203773, + -0.03570957109332085, + -0.03149940446019173, + 0.01058092713356018, + -0.08196881413459778, + -0.012937279418110847, + 0.02611234411597252, + 0.03242015466094017, + 0.00964296329766512, + -0.03003847971558571, + -0.02878165803849697, + 0.005552087444812059, + 0.11100566387176514, + 0.006707212887704372, + 0.007847320288419724, + -0.04757271707057953, + -0.010918735526502132, + 0.007332456298172474, + -0.04022597521543503, + -0.03945135325193405, + 0.08289318531751633, + -0.049061018973588943, + -0.04947024583816528, + -0.030500037595629692, + -0.03648613020777702, + 0.007221090141683817, + -0.023051844909787178, + 0.06497090309858322, + 0.024345578625798225, + -0.0074218385852873325, + -0.04062318801879883, + -0.020212918519973755, + -0.009461181238293648, + -0.04491201415657997, + 0.05126942694187164, + -0.005242756102234125, + 0.024492694064974785, + -0.02291315235197544, + 0.06517285853624344, + -0.006112807895988226, + 0.004548671655356884, + 0.009358521550893784, + 0.0066603804007172585, + -0.005717182531952858, + 0.046729590743780136, + 0.04008319228887558, + 0.09077014029026031, + 0.03511488437652588, + -0.05282759666442871, + 0.020438214763998985, + -0.01378707680851221, + -0.005117158405482769, + 0.07433145493268967, + 0.0034097072202712297, + -0.011192821897566319, + 0.0009265196276828647, + -0.03159809112548828, + -0.033578045666217804, + -0.012528836727142334, + -0.006292750593274832, + 0.0452519953250885, + -0.025647340342402458, + -0.026395585387945175, + -0.044332105666399, + -0.0012870433274656534, + 0.016866305842995644, + -0.00726186903193593, + -0.014325585216283798, + 0.02150380238890648, + 0.05446008965373039, + 0.01817481219768524, + 0.017272990196943283, + 7.192481280071661e-05, + -0.021787019446492195, + 0.03518282249569893, + 0.04129958152770996, + 0.005599076859652996, + 0.07016170769929886, + 0.0068466924130916595, + -0.038150086998939514, + 0.022464951500296593, + -0.007263584528118372, + 0.04023060202598572, + -0.006662019528448582, + -0.03398700803518295, + -0.027063554152846336, + -0.014334858395159245, + 0.00031888100784271955, + 0.03320762887597084, + -0.0263507217168808, + -0.01863865926861763, + 0.018559059128165245, + 0.06845609098672867, + -0.0037615702021867037, + 0.023087816312909126, + -0.019276361912488937, + -0.03351914510130882, + 0.021971892565488815, + 0.041575655341148376, + 0.05621027201414108, + -0.003078967332839966, + -0.06297048181295395, + -0.05009821802377701, + -0.026463210582733154, + 0.0035874273162335157, + 0.021911393851041794, + -0.074904665350914, + -0.0012849566992372274, + 0.06580246239900589, + -0.0096419183537364, + -0.01183160487562418, + 0.002244731178507209, + -0.02129410021007061, + -0.004490557126700878, + 0.006305266637355089, + 0.020787451416254044, + -0.028946323320269585, + 0.09907153993844986, + 0.06419308483600616, + -0.018514622002840042, + -0.03974919393658638, + -0.055583421140909195, + -0.04144161939620972, + -0.01479779276996851, + -0.015063298866152763, + -0.05278000980615616, + 0.056262142956256866, + 0.0039010541513562202, + 0.025815758854150772, + -0.01457720622420311, + 0.017469312995672226, + 0.032789044082164764, + 0.010338534601032734, + 0.009348046034574509, + -0.002339842962101102, + 0.023598607629537582, + 0.01676766760647297, + -0.03503762558102608, + 0.033228978514671326, + -0.03216487169265747, + -0.027531251311302185, + 0.05846886709332466, + -0.00979926623404026, + 0.011551604606211185, + 0.026247017085552216, + 0.00776244793087244, + -0.042052820324897766, + 0.003744697431102395, + -0.013622709549963474, + -0.021054048091173172, + -0.022621311247348785, + 0.03077824041247368, + 0.019676415249705315, + -0.02765408344566822, + -0.013561422936618328, + -0.015634974464774132, + -0.03614448755979538, + 0.014710169285535812, + 0.00825627613812685, + 0.014769040048122406, + 0.006793464533984661, + -0.010395821183919907, + -0.0022474846336990595, + 0.033902380615472794, + 0.003390782279893756, + -0.04533020406961441, + -0.09353062510490417, + -0.027594659477472305, + -0.026893651112914085, + 0.00822615996003151, + 0.03448451682925224, + -0.013120760209858418, + 0.03301888331770897, + -0.03717275336384773, + -0.017613839358091354, + 0.03131122142076492, + 0.01355862244963646, + -0.016891464591026306, + -0.005221182014793158, + -0.09999044984579086, + -0.0072242445312440395, + 0.023685455322265625, + -0.006333169527351856, + 0.05389386788010597, + 0.0006377844838425517, + 0.06066382676362991, + -0.010544034652411938, + -0.03069271147251129, + 0.046539101749658585, + 0.04315992072224617, + -0.04527072235941887, + 0.08369841426610947, + -0.05231470242142677, + -0.02663319930434227, + -0.017284002155065536, + 0.05720992013812065, + -0.02959314174950123, + -0.053442198783159256, + -0.055300384759902954, + -0.0034046657383441925, + 0.058179739862680435, + -0.0067557781003415585, + 0.048700254410505295, + 0.06980213522911072, + 0.0022220234386622906, + -0.02891203574836254, + -0.03402455151081085, + -0.07090416550636292, + -0.02885468490421772, + -0.033339668065309525, + 0.057542525231838226, + 0.035563718527555466, + -0.0376402772963047, + 0.01505962759256363, + 0.025728864595294, + 0.001696597901172936, + 0.04947248846292496, + -0.0798964574933052, + -0.02692596986889839, + -0.012759744189679623, + -0.02195296436548233, + -0.014371627941727638, + 0.02381875552237034, + -0.01423177681863308, + -0.014195146039128304, + -0.09804418683052063, + -0.0008884363924153149, + -0.0418919213116169, + 0.015419455245137215, + -0.0015723679680377245, + -0.008167393505573273, + -0.027652231976389885, + -0.01364823617041111, + 0.042369287461042404, + -0.020887810736894608, + -0.01855718344449997, + -0.030453767627477646, + -0.08889546990394592, + -0.042489275336265564, + -0.003145430004224181, + -0.0007042307406663895, + 0.016261309385299683, + -0.04196145758032799, + -0.02786160260438919, + 0.00031149861752055585, + 0.0020716730505228043, + -0.018168985843658447, + -0.035522907972335815, + 0.06329862773418427, + -0.06277810037136078, + -0.04981480538845062, + 0.05988100543618202, + 0.0031491960398852825, + -0.03463412821292877, + -0.010109111666679382, + -0.013390148058533669, + -0.08232187479734421, + 0.018557677045464516, + -0.023832205682992935, + -0.021515224128961563, + 0.03051081858575344, + -0.021489854902029037, + 0.009503633715212345, + 0.025148555636405945, + -0.023579541593790054, + -0.035016197711229324, + -0.022730164229869843, + 0.04465099051594734, + -0.04341805726289749, + 0.011980813927948475, + 0.024123655632138252, + -0.026239709928631783, + -0.017752202227711678, + 0.027042675763368607, + 0.023839112371206284, + 0.01306204218417406, + 0.039557792246341705, + 0.07731491327285767, + 0.02771804668009281, + 0.07320678234100342, + -0.008236434310674667, + -0.025150621309876442, + 0.0035144551657140255, + -0.045307569205760956, + -0.004989498760551214, + 0.006890833377838135, + 0.013798183761537075, + -0.04717986658215523, + 0.00230599008500576, + -0.06974467635154724, + 0.013648996129631996, + -0.05875125527381897, + -0.0020281318575143814, + -0.07060255855321884, + 0.04474693909287453, + -0.010507912375032902, + 0.01920556277036667, + 0.037952445447444916, + -0.04831290617585182, + -0.030323892831802368, + 0.017083611339330673, + 0.01788332499563694, + -0.019379939883947372, + 0.0296696275472641, + -0.0202578566968441, + -0.05725785344839096, + -0.07489712536334991, + 0.023742130026221275, + -0.07134415209293365, + -0.011462788097560406, + 0.0046195476315915585, + 0.04435937851667404, + 0.01344655267894268, + -0.003911314997822046, + -0.03020038641989231, + -0.0032732610125094652, + 0.03007005713880062, + 0.006368617527186871, + -0.03210403770208359, + 0.00835089199244976, + 0.05988067761063576, + -0.03537531942129135, + 0.05247778445482254, + 0.03723180294036865, + -0.008693824522197247, + 0.04847349226474762, + 0.016112500801682472, + 0.011540782637894154, + -0.0065521071664988995, + -0.03243750333786011, + -0.011966057121753693, + 0.017163656651973724, + -0.0029253605753183365, + -0.053153038024902344, + 0.0018134346464648843, + 0.01733018085360527, + 0.029417017474770546, + 0.030433885753154755, + 0.0021621473133563995, + -0.027712296694517136, + -0.05925380811095238, + -0.022185055539011955, + -0.0350322499871254, + -0.02007930353283882, + 0.010399214923381805, + -0.055177975445985794, + 0.0007819311576895416, + 0.024769598618149757, + 0.03780986741185188, + 0.03521614894270897, + -0.01817735843360424, + -0.08278614282608032, + -0.021156983450055122, + 0.03359638899564743, + -0.023659229278564453, + -0.007879458367824554, + 0.0292595736682415, + -0.035273004323244095, + 0.032482825219631195, + 0.02688293345272541, + 0.023407144472002983, + -0.047480449080467224, + 0.0006359686376526952, + -0.04895651713013649, + 0.011627614498138428, + -0.07718108594417572, + 0.010565578006207943, + -0.01866811513900757, + 0.01029923651367426, + -0.023772811517119408, + -0.032370492815971375, + 0.05088132247328758, + 0.011682837270200253, + 0.03289812430739403, + 0.017765464261174202, + 0.0604407899081707, + -0.03875206410884857, + -0.05453289672732353, + -0.05849386751651764, + -0.008108421228826046, + -0.036137521266937256, + -0.003845603670924902, + -0.010756440460681915, + 0.01515593845397234, + 0.09156721830368042, + 0.02919408679008484, + 0.024247899651527405, + -0.020837178453803062, + 0.04395196586847305, + -0.10095755755901337, + -0.07707840204238892, + -0.04705304652452469, + 0.007648217957466841, + -0.015342561528086662, + -0.02814168483018875, + 0.039529476314783096, + -0.06335531175136566, + -0.03782089054584503, + -0.032349780201911926, + -0.011073637753725052, + 0.03126451373100281, + 0.01734590344130993, + -0.0038809722755104303, + 0.013081631623208523, + -0.03124905936419964, + 0.0832752138376236, + -0.007435368373990059, + 0.00989855732768774, + 0.004071374889463186, + -0.021534224972128868, + -0.022376252338290215, + 0.0033940861467272043, + 0.01537957414984703, + -0.01530678290873766, + -0.006626737304031849, + 0.0722239539027214, + 0.023105483502149582, + -0.048958491533994675, + -0.031564872711896896, + 0.020468993112444878, + 0.02797403372824192, + 0.02208004891872406, + -0.06905028969049454, + -0.0069040716625750065, + 0.05185015872120857, + 0.020353762432932854, + -0.059334978461265564, + 0.019217371940612793, + -0.06793943047523499, + 0.052697695791721344, + 0.039136022329330444, + -0.03286914899945259, + 0.0017800497589632869, + 0.031667277216911316, + 0.0455632358789444, + -0.028096599504351616, + 0.007715262472629547, + 0.005349942483007908, + -0.051207322627305984, + -0.04513049125671387 + ], + "content_embedding": [ + 0.018702084198594093, + 0.03602918982505798, + -0.0030462138820439577, + -0.044632311910390854, + -0.00032779801404103637, + 0.013867323286831379, + 0.028261501342058182, + -0.0397375151515007, + -0.04266185685992241, + -0.01854686811566353, + -0.00980929471552372, + 0.005383333191275597, + 0.025814494118094444, + 0.020457584410905838, + -0.01165740005671978, + -0.02068958804011345, + 0.024463720619678497, + -0.029769178479909897, + 0.00032462665694765747, + 0.020778311416506767, + -0.009875921532511711, + -0.03926593065261841, + -0.007392906118184328, + -0.02128470689058304, + 0.020180456340312958, + -0.007395976223051548, + -0.01573384366929531, + 0.01813557744026184, + -0.04103250429034233, + 0.025176111608743668, + 0.07708748430013657, + -0.03301733359694481, + -0.00411647092550993, + -0.03946784511208534, + -0.06054544821381569, + -0.040751539170742035, + 0.01922212913632393, + -0.027164554223418236, + -0.07051867246627808, + 0.03071393072605133, + 0.07258772104978561, + -0.026042146608233452, + 0.00869719311594963, + -0.028085211291909218, + 0.0623227022588253, + -0.018381644040346146, + 0.07613946497440338, + -0.037470318377017975, + 0.022420862689614296, + -0.05576684698462486, + 0.008394862525165081, + -0.032719686627388, + -0.01822705753147602, + -0.0010964440880343318, + 0.01320287398993969, + 0.03199688717722893, + 0.02982492372393608, + 0.013676099479198456, + 0.04192396625876427, + -0.007906809449195862, + 0.009779189713299274, + 0.014214487746357918, + -0.0091244550421834, + 0.0358707420527935, + 0.003965431824326515, + -0.10266417264938354, + 0.007566090207546949, + 0.001176235033199191, + -0.027059122920036316, + -0.011146960780024529, + 0.010613090358674526, + 0.0269278846681118, + 0.00038031316944397986, + 0.024177612736821175, + -0.004191671498119831, + 0.005235857795923948, + 0.018077049404382706, + -0.04018911346793175, + -0.05125276744365692, + 0.01798614114522934, + 0.022944867610931396, + -0.04374289512634277, + 0.003367446828633547, + 0.026103869080543518, + 0.03640212118625641, + -0.006004476919770241, + 0.011275619268417358, + 0.016332507133483887, + 0.0004818506713490933, + 0.04315895587205887, + 0.022076765075325966, + -0.004544341471046209, + 0.03210305795073509, + 0.0906452164053917, + -0.07215604186058044, + 0.08494149893522263, + 0.006179131101816893, + -0.004107883665710688, + -0.012479269877076149, + -0.034221433103084564, + -0.017944667488336563, + 0.04593302682042122, + 0.019292891025543213, + -0.0031508891843259335, + 0.06966886669397354, + 0.062235988676548004, + 0.01879720948636532, + -6.745052814949304e-05, + 0.013703186996281147, + -0.003644032636657357, + -0.04458186402916908, + -0.0017342075007036328, + -0.033848460763692856, + 0.019645417109131813, + -0.02943187765777111, + 0.001105084316805005, + 0.11609244346618652, + -0.05156521871685982, + 0.009876714088022709, + 0.005161592271178961, + -0.032977450639009476, + -0.04834079369902611, + 0.021944768726825714, + -0.012638481333851814, + 0.04569210857152939, + 0.015415391884744167, + -0.03170562908053398, + -0.0031294531654566526, + 0.015164556913077831, + -0.034650497138500214, + 0.006696060299873352, + 0.006991597358137369, + -0.05405446141958237, + 0.002478727139532566, + 0.03736428543925285, + 0.006255546119064093, + -0.023612871766090393, + -0.04719111695885658, + 0.019092371687293053, + -0.007389509119093418, + 0.005412441678345203, + 0.0032002630177885294, + 0.014097358100116253, + 0.0011166664771735668, + -0.0012068386422470212, + -0.0596468411386013, + 0.0030182863119989634, + 0.018532730638980865, + 0.020043276250362396, + -0.0014203430619090796, + 0.03718654438853264, + -0.04137871786952019, + 0.003067273646593094, + 0.060497768223285675, + -0.002445906400680542, + -0.05149608850479126, + -0.011358898133039474, + -0.0057965232990682125, + 0.04786103963851929, + 0.05521485581994057, + 0.03300704434514046, + 0.01832137256860733, + -0.03220272436738014, + 0.05887257307767868, + -0.04280361905694008, + 0.030713768675923347, + 0.031198250129818916, + 0.018273506313562393, + 0.021060051396489143, + 0.018141141161322594, + -0.01044323481619358, + 0.012220986187458038, + 0.011005178093910217, + -0.008985857479274273, + 0.05407913029193878, + -0.010373812168836594, + 0.015498371794819832, + -0.04114103317260742, + 0.02436467818915844, + -0.033720944076776505, + 0.04162474721670151, + -0.03209234029054642, + 0.007773025427013636, + 0.03150646388530731, + -0.042852289974689484, + -0.0062582893297076225, + 0.04668346792459488, + -0.06638985872268677, + -0.005832660011947155, + -0.022002393379807472, + -0.015554124489426613, + -0.017163358628749847, + -0.04198216274380684, + -0.01709570921957493, + 0.026661567389965057, + -0.049418482929468155, + 0.06740570068359375, + 0.0159238763153553, + 0.0023050543386489153, + -0.031238939613103867, + -0.03321292996406555, + -0.004760078154504299, + -0.07937376946210861, + 0.03254229202866554, + -0.023319443687796593, + 0.04906806722283363, + 0.05458753556013107, + -0.013899387791752815, + 0.03574313595890999, + 0.011882249265909195, + 0.11678190529346466, + 0.0007563747349195182, + 0.025212422013282776, + 0.023760458454489708, + -0.021716047078371048, + -0.017915191128849983, + -0.02478560246527195, + -0.028406206518411636, + -0.06178540363907814, + 0.044959306716918945, + -0.003325885394588113, + -0.02079332433640957, + -0.010518986731767654, + -0.00242405547760427, + -0.0030814141500741243, + 0.0010505993850529194, + 0.0034556719474494457, + 0.005322635173797607, + -0.02364535629749298, + -0.0034255431964993477, + -0.04123266786336899, + -0.09191295504570007, + 0.03907715901732445, + 0.017849568277597427, + 0.003202608088031411, + -0.009892004542052746, + 0.00447180075570941, + 0.02633223496377468, + 0.010955878533422947, + 0.08653970807790756, + 0.020712584257125854, + 0.0020712309051305056, + 0.02159838192164898, + -0.03148637339472771, + -0.1106131300330162, + -0.0034493962302803993, + 0.008495570160448551, + 0.025898300111293793, + -0.01585080474615097, + 0.047963947057724, + 0.03191608935594559, + 0.05672791600227356, + 0.017725899815559387, + -0.011000119149684906, + 0.05266193300485611, + -0.02026527188718319, + -0.0076444013975560665, + 0.04474569857120514, + 0.0030594514682888985, + -0.04981522262096405, + -0.0012618869077414274, + -0.028191188350319862, + 0.06203592196106911, + -0.04548441618680954, + -0.01024117786437273, + 0.012013573199510574, + -0.03531227633357048, + -0.0303136445581913, + 0.01046642567962408, + -0.029064299538731575, + -0.015392802655696869, + 0.02021191082894802, + 0.015328207053244114, + 0.07215247303247452, + -0.024603676050901413, + -0.0021844934672117233, + 0.01121720764786005, + -0.03952696546912193, + 0.057082369923591614, + -0.007885781116783619, + -0.05230427160859108, + -0.0490812249481678, + -0.031762074679136276, + -0.009603463113307953, + -0.008093117736279964, + 0.030981115996837616, + -0.0013626269064843655, + -0.019603300839662552, + 0.025847315788269043, + 0.012290321290493011, + 0.007945788092911243, + 0.011359087191522121, + 0.01893901824951172, + 0.03544235974550247, + 0.01802144944667816, + 0.07854204624891281, + 0.07568025588989258, + -0.05122705176472664, + 0.008560816757380962, + -0.03897644579410553, + 0.024606050923466682, + 0.03792334720492363, + 0.01617903634905815, + -0.04735874757170677, + 0.003156541381031275, + -0.011881450191140175, + -0.026287615299224854, + 0.030718199908733368, + 0.04659629613161087, + -0.006789658684283495, + -0.03779527172446251, + -0.0030539771541953087, + -0.05919982120394707, + -0.03511202707886696, + -0.079665407538414, + 0.0580061711370945, + 0.07525473088026047, + 0.02381243370473385, + 0.04075026884675026, + 0.011406723409891129, + 0.020479097962379456, + -0.004844597075134516, + -0.012301536276936531, + -0.07029860466718674, + 0.0063995844684541225, + 0.03451430797576904, + 0.023998068645596504, + 0.03119623102247715, + -0.0481903962790966, + 0.03359334170818329, + 0.02563287690281868, + 0.04894624277949333, + -0.05896732583642006, + -0.020169634371995926, + 0.01319917943328619, + 0.00783664919435978, + 0.0051937587559223175, + 0.0014421058585867286, + -0.026831623166799545, + -0.0031287523452192545, + 0.024796785786747932, + -0.0008400659426115453, + -0.03314085304737091, + -0.038561608642339706, + -0.013552311807870865, + -0.012201554141938686, + -0.010810038074851036, + -0.025452986359596252, + 0.058491192758083344, + -0.017493925988674164, + -0.04302553832530975, + 0.00978845451027155, + 0.0001076174812624231, + -0.042208705097436905, + -0.011103725992143154, + 0.019692320376634598, + 0.035941820591688156, + 0.02046988718211651, + -0.013415461406111717, + 0.013622494414448738, + 0.03867186978459358, + -0.019314907491207123, + 0.018686568364501, + -0.026298167183995247, + 0.052582357078790665, + 0.027494613081216812, + 0.046435534954071045, + 0.03811647742986679, + 0.0486551970243454, + 0.0019721186254173517, + -0.017199190333485603, + -0.006901210639625788, + 0.025136850774288177, + 0.0804467722773552, + 0.061168037354946136, + 0.017717817798256874, + -0.06603220850229263, + -0.05064086616039276, + 0.039705704897642136, + -0.024581512436270714, + 0.09781734645366669, + -0.030609596520662308, + 0.006824797950685024, + -0.004317844286561012, + 0.0027715987525880337, + 0.000943489489145577, + -0.013181749731302261, + -0.025556521490216255, + -0.028432460501790047, + -0.03751988708972931, + -0.019560452550649643, + -0.0136410528793931, + -0.024382753297686577, + 0.02857314422726631, + -0.011391760781407356, + -0.0005634031840600073, + 0.03159620240330696, + -0.005198314320296049, + -0.006265239790081978, + 0.025611309334635735, + -0.058613672852516174, + -0.04532675817608833, + -0.021873218938708305, + 0.02903951145708561, + -0.0121288001537323, + 0.04538734629750252, + -0.027941465377807617, + -0.07353822141885757, + 0.03773269057273865, + -0.00512319291010499, + 0.028662901371717453, + 0.04792957380414009, + -0.01053295936435461, + -0.035430196672677994, + 0.009273026138544083, + 0.004318219143897295, + 0.08100441098213196, + 0.014380274340510368, + -0.03378414362668991, + -0.01987980492413044, + 0.021573858335614204, + 0.02855539321899414, + -0.007542841136455536, + 0.004633236676454544, + 0.008639613166451454, + 0.006394797004759312, + 0.019281607121229172, + -0.015256315469741821, + -0.00148143304977566, + -0.02985287643969059, + 0.025533605366945267, + -0.011111553758382797, + 0.01765139028429985, + 0.05442074313759804, + -0.09088895469903946, + 0.032542143017053604, + 0.0574481226503849, + 0.027539772912859917, + 0.028780700638890266, + 0.04139337316155434, + -0.014601831324398518, + -0.04883953556418419, + -0.015341846272349358, + -0.03965975344181061, + -0.000157194648636505, + 0.0576823353767395, + 0.0886307954788208, + 0.008708767592906952, + -0.03148962929844856, + 0.00118768191896379, + -0.009285139851272106, + 0.0349595844745636, + 0.00010961518273688853, + -0.007748626638203859, + 0.08073285222053528, + -0.026761949062347412, + 0.008831962943077087, + -0.04131530225276947, + -0.00733856251463294, + 0.06809361279010773, + 0.03708426281809807, + 0.06835067272186279, + 0.006261076312512159, + 0.015920374542474747, + -0.004048558417707682, + -0.02677253447473049, + 0.07650309801101685, + -0.023148853331804276, + -0.057494066655635834, + 0.014478741213679314, + -0.007499868981540203, + 0.02524508722126484, + 0.04229635000228882, + 0.017856169492006302, + -0.048948079347610474, + -0.014559978619217873, + 0.05829133093357086, + -0.007394919637590647, + 0.008901085704565048, + 0.03540206328034401, + 0.011715879663825035, + 0.03447958827018738, + -0.05490283668041229, + -0.0033728398848325014, + -0.029942180961370468, + -0.0025663028936833143, + 0.02124219387769699, + 0.02587033249437809, + -0.03495795652270317, + -0.01766275428235531, + 0.05129474401473999, + 0.050688110291957855, + -0.04483504965901375, + -0.01242926623672247, + -0.10075337439775467, + -0.039148375391960144, + -0.01708081364631653, + 0.022980742156505585, + 0.0012766321888193488, + -0.05624091997742653, + 0.02920171432197094, + -0.0004628047754522413, + -0.012556084431707859, + -0.004911895841360092, + -0.006203844211995602, + 0.011994684115052223, + -0.005147894844412804, + -0.0653131902217865, + 0.021533435210585594, + 0.027332814410328865, + 0.010824107564985752, + 0.06882979720830917, + 0.002402055310085416, + 0.06736285239458084, + 0.007376556750386953, + -0.09907388687133789, + 0.0738959014415741, + 0.011251496151089668, + -0.089520663022995, + 0.07383604347705841, + -0.02708776667714119, + 0.039623651653528214, + 0.001983445603400469, + 0.034104056656360626, + -0.10747380554676056, + -0.01417585276067257, + -0.04512251541018486, + 0.001210794085636735, + 0.05437818914651871, + -0.004397509153932333, + 0.03336326405405998, + 0.06369500607252121, + 0.014758906327188015, + -0.009938295930624008, + 0.027304060757160187, + -0.025614865124225616, + -0.019298158586025238, + -0.03774742782115936, + -0.021663375198841095, + 0.004094315692782402, + -0.05966729298233986, + 0.025774789974093437, + 0.004207789432257414, + 0.03211497142910957, + 0.0222456231713295, + -0.07142850011587143, + -0.02816791646182537, + 0.008917749859392643, + -0.03328888863325119, + -0.04815923050045967, + 0.055692847818136215, + 0.005043115001171827, + 0.007406118791550398, + -0.10431766510009766, + 0.003935595508664846, + -0.07654271274805069, + 0.018002459779381752, + -0.026796353980898857, + -0.01480060163885355, + -0.003071046667173505, + -0.031164491549134254, + -0.004783581010997295, + 0.028996651992201805, + -0.00432590302079916, + 0.03827083855867386, + -0.10670920461416245, + -0.009844367392361164, + 0.05061553791165352, + 0.044921379536390305, + -0.021305931732058525, + -0.010570063255727291, + -0.037161700427532196, + 0.03244778513908386, + -0.026579100638628006, + -0.021481862291693687, + -0.0803975760936737, + 0.0602957159280777, + -0.030482472851872444, + -0.016915978863835335, + -0.03414126858115196, + -0.07902093976736069, + -0.05861777812242508, + 0.02578902617096901, + -0.006971773691475391, + 0.021272379904985428, + 0.004250700585544109, + -0.04617677628993988, + -0.025576869025826454, + 0.09362083673477173, + -0.02747775986790657, + -0.010427952744066715, + -0.005847673863172531, + 0.03786874935030937, + -0.023494398221373558, + -0.03599749505519867, + 0.06915943324565887, + -0.0005259242025204003, + -0.020210636779665947, + 0.012060794048011303, + -0.03892034292221069, + -0.03120974451303482, + 0.03798247501254082, + 0.08222217112779617, + -0.03596770018339157, + -0.001334474771283567, + 0.06480739265680313, + 0.016418466344475746, + 0.017459729686379433, + -0.05252225697040558, + -0.05017365887761116, + -0.013104243203997612, + -0.023724595084786415, + 0.006762322038412094, + -0.005946440156549215, + 0.006083739455789328, + -0.014293180778622627, + 0.041052985936403275, + -0.02417348138988018, + 0.03206375241279602, + -0.05389661714434624, + 0.0052835363894701, + -0.038879118859767914, + 0.0735245794057846, + -0.004067298024892807, + -0.007775747217237949, + 0.03442619740962982, + -0.013299554586410522, + -0.01858234964311123, + 0.015830783173441887, + 0.050536882132291794, + 0.039121512323617935, + 0.025488585233688354, + -0.083254374563694, + -0.0376444049179554, + -0.03673558309674263, + 0.02858356013894081, + -0.0017373028676956892, + -0.029856612905859947, + -0.020456591621041298, + 0.014330082572996616, + 0.026495426893234253, + -0.029473086819052696, + -0.05005846545100212, + 0.036553170531988144, + 0.03604103624820709, + -0.014557436108589172, + 0.0075491974130272865, + 0.02816123701632023, + 0.022223982959985733, + -0.010599660687148571, + 0.0738152265548706, + 0.09043020755052567, + -0.0195071529597044, + 0.03981706127524376, + 0.04508437216281891, + 0.03942303732037544, + 0.016333166509866714, + -0.007340291049331427, + -0.041841596364974976, + -0.014305119402706623, + -0.005970897153019905, + -0.04999639838933945, + -0.00753607414662838, + -0.026936067268252373, + 0.02390979416668415, + -0.02593693509697914, + -0.015126893296837807, + -0.035575494170188904, + -0.01576480083167553, + 0.01482314057648182, + 0.01960604451596737, + 0.012122674845159054, + 0.018187053501605988, + -0.04843643680214882, + -0.032472606748342514, + -0.002006485592573881, + -0.003907614853233099, + 0.012371492572128773, + -0.03494970500469208, + -0.04294227808713913, + 0.004812099505215883, + -0.02547234669327736, + 0.028849929571151733, + -0.021960295736789703, + -0.0013683459255844355, + 0.030571121722459793, + -0.04714812710881233, + -0.0034763696603477, + -0.03908781707286835, + -0.04101671278476715, + 0.013097001239657402, + 0.004385901615023613, + -0.033829864114522934, + -0.04254792630672455, + 0.015273491851985455, + -0.040665704756975174, + -0.00920754112303257, + -0.041413065046072006, + -0.013588172383606434, + -0.0017439010553061962, + -0.030785854905843735, + 0.07103034108877182, + -0.01929519884288311, + 0.025452272966504097, + -0.022381870076060295, + -0.028560174629092216, + -0.011340905912220478, + -0.008002392016351223, + -0.013147140853106976, + 0.021906575188040733, + -0.04703265056014061, + 0.00766343716531992, + 0.06170996278524399, + -0.004122643731534481, + -0.01931242272257805, + 0.03596718981862068, + 0.0322248749434948, + -0.026486199349164963, + -0.03052559122443199, + -0.03129229322075844, + 0.024310404434800148, + -0.029317326843738556, + -0.07851212471723557, + 0.04514205455780029, + -0.020682433620095253, + -0.04681077226996422, + 0.008951415307819843, + 0.04007868468761444, + 0.028472354635596275, + 0.009118284098803997, + -0.013431325554847717, + -0.020488806068897247, + -0.027376288548111916, + 0.035840798169374466, + -0.03870074450969696, + -0.002007996430620551, + -0.017398731783032417, + 0.031902845948934555, + -0.07956399768590927, + -0.04125808924436569, + 0.01867605932056904, + -0.08004764467477798, + -0.005094117484986782, + 0.03083234466612339, + -0.01335862372070551, + -0.04482260346412659, + -0.033836718648672104, + 0.02282416820526123, + 0.06287918239831924, + 0.010162614285945892, + -0.05158773064613342, + 0.0008301119669340551, + -0.03881796821951866, + 0.002004651352763176, + -0.022358564659953117, + 0.022438282147049904, + 0.04948568344116211, + 0.03556858375668526, + 0.04151606187224388, + -0.03065376915037632, + 0.0035080660600215197, + -0.004210236947983503, + -0.02430005557835102, + 0.02775806188583374, + 0.0647825375199318, + 0.003446039743721485, + -0.015345090068876743, + -0.004865385591983795 + ], + "chunk_ind": 0 + }, + { + "url": "https://docs.danswer.dev/more/use_cases/operations", + "title": "Operations", + "content": "Double the productivity of your Ops teams like IT, HR, etc.\n\nAutomatically Resolve Tickets\nModern teams are leveraging AI to auto-resolve up to 50% of tickets. Whether it is an employee asking about benefits details or how to set up the VPN for remote work, Danswer can help your team help themselves. This frees up your team to do the real impactful work of landing star candidates or improving your internal processes.\n\nAI Aided Onboarding\nOne of the periods where your team needs the most help is when they're just ramping up. Instead of feeling lost in dozens of new tools, Danswer gives them a single place where they can ask about anything in natural language. Whether it's how to set up their work environment or what their onboarding goals are, Danswer can walk them through every step with the help of Generative AI. This lets your team feel more empowered and gives time back to the more seasoned members of your team to focus on moving the needle.", + "title_embedding": [ + 0.010730741545557976, + 0.018373621627688408, + -0.0013679212424904108, + 0.0001091610174626112, + 0.034599218517541885, + 0.038814906030893326, + -0.03269535303115845, + -0.015120825730264187, + -0.011999477632343769, + -0.023377608507871628, + -0.003536652075126767, + 0.019120972603559494, + 0.032052282243967056, + -0.03316797688603401, + 0.002971385605633259, + -0.06660863012075424, + 0.015637297183275223, + -0.004499480128288269, + -0.01167437992990017, + 0.024017684161663055, + 0.011193061247467995, + 0.02649473212659359, + 0.04822992533445358, + -0.022897351533174515, + 0.04229900613427162, + 0.004849326331168413, + -0.0013035786105319858, + -0.022214103490114212, + -0.03940191864967346, + -0.010781657882034779, + 0.049329955130815506, + -0.010857120156288147, + 0.04519270732998848, + -0.025993647053837776, + -0.03483803570270538, + -0.08306694775819778, + 0.023532472550868988, + 0.01766788400709629, + 0.07221196591854095, + -0.001823332509957254, + 0.003104567062109709, + 0.05640452727675438, + -0.04483344778418541, + -0.04334229975938797, + 0.019754929468035698, + -0.011862428858876228, + 0.03863349184393883, + 0.01501200906932354, + 0.03863223269581795, + -0.05570034310221672, + -0.0267505943775177, + 0.03793827444314957, + -0.015061624348163605, + -0.025455573573708534, + 0.015559123829007149, + -0.01849287934601307, + 0.04038143530488014, + 0.026783155277371407, + 0.0023170525673776865, + -0.005963196512311697, + 0.04137548804283142, + 0.01764686405658722, + -0.02246052585542202, + -0.012901525013148785, + -0.017714571207761765, + -0.01027537789195776, + -0.016164785251021385, + -0.007979510352015495, + 0.021984701976180077, + -0.009949913248419762, + 0.08931540697813034, + 0.019962448626756668, + 0.03341870754957199, + 0.028858954086899757, + 0.0011826930567622185, + -0.00010822620242834091, + 0.021924806758761406, + -0.009182920679450035, + -0.018256863579154015, + 0.04718794673681259, + 0.020425673574209213, + 0.0002538118860684335, + 0.05454721301794052, + 0.019339540973305702, + 0.09129136055707932, + -0.010062575340270996, + 0.011977903544902802, + -0.022689297795295715, + -0.02022380940616131, + 0.013067576102912426, + 0.005217134952545166, + 0.037848640233278275, + 0.09128513187170029, + 0.052562836557626724, + -0.08711232244968414, + -0.0015501821180805564, + 0.00441542686894536, + -0.025076182559132576, + -0.014110158197581768, + -0.06030100956559181, + -0.0010345132322981954, + 0.018056612461805344, + 0.03524528071284294, + -0.01293126679956913, + 0.04700767621397972, + 0.06564126163721085, + 0.010113431140780449, + 0.018003467470407486, + -0.05646204203367233, + -0.02047823928296566, + -0.048007529228925705, + 0.05380301922559738, + -0.01607179455459118, + 0.0008504731231369078, + -0.015249709598720074, + -0.005599239841103554, + 0.054663073271512985, + -0.036346085369586945, + 0.024314023554325104, + 0.0768347755074501, + 0.03234424442052841, + 0.008295328356325626, + -0.0015371616464108229, + 0.043760448694229126, + -0.0018124273046851158, + -0.017262862995266914, + -0.042646538466215134, + 0.025395702570676804, + 0.03709862381219864, + -0.03921937197446823, + 0.050630342215299606, + 0.024703586474061012, + 0.022064829245209694, + -0.008263661526143551, + 0.03165263310074806, + 0.017121389508247375, + -0.06672775000333786, + -0.06984685361385345, + 0.11498068273067474, + -0.02806312032043934, + 0.025353405624628067, + 0.006591377779841423, + -0.011244958266615868, + -0.014589745551347733, + 0.031125560402870178, + -0.06834094226360321, + -0.015911657363176346, + 0.0176913570612669, + -0.022801805287599564, + 0.01242455281317234, + 0.001349485362879932, + 0.05671858787536621, + -0.024975799024105072, + -0.019439268857240677, + -0.0038488772697746754, + -0.057438429445028305, + -0.004414469934999943, + 0.018468070775270462, + 0.006111294496804476, + 0.018633801490068436, + 0.04350016638636589, + 0.04317209869623184, + -0.058885347098112106, + 0.06605420261621475, + -0.060788847506046295, + -0.059009939432144165, + 0.003782198065891862, + 0.04133265092968941, + 0.019440937787294388, + 0.0046022264286875725, + 0.010991339571774006, + -0.0554840974509716, + 0.04083068668842316, + 0.007237149402499199, + 0.04218628630042076, + 0.015044232830405235, + 0.0565854087471962, + 0.03397437930107117, + 0.017036354169249535, + -0.02862199954688549, + -0.0019827275536954403, + -0.04851892590522766, + 0.020672423765063286, + 0.029321348294615746, + -0.04698231443762779, + -0.06201909855008125, + 0.01672869734466076, + -0.06657315790653229, + -0.061089128255844116, + -0.006803641561418772, + -0.041214216500520706, + 0.025210469961166382, + 0.03659403696656227, + -0.056864507496356964, + -0.017966432496905327, + -0.04572960361838341, + 0.03334927558898926, + 0.0010855993023142219, + 0.009142755530774593, + -0.045417286455631256, + 0.0508459098637104, + -0.0020350103732198477, + -0.08134196698665619, + -0.014234581962227821, + -0.018346119672060013, + 0.027286984026432037, + 0.01077864971011877, + 0.025067729875445366, + -0.0010993028990924358, + 0.055933188647031784, + 0.08128975331783295, + -0.00964485201984644, + 0.02629624865949154, + 0.02482009492814541, + -0.004299841821193695, + -0.02121540531516075, + -0.012340475805103779, + -0.010211183689534664, + -0.033044300973415375, + 0.01627231389284134, + 0.0025289515033364296, + -0.024867739528417587, + -0.02439618855714798, + 0.031935419887304306, + 0.015503033064305782, + 0.058931007981300354, + 0.0018315770430490375, + 0.05652814358472824, + -0.01652582921087742, + -0.009255263954401016, + -0.03914093226194382, + -0.015466556884348392, + 0.04188372567296028, + -0.027017222717404366, + -0.022300412878394127, + 0.016426892951130867, + 0.017171800136566162, + 0.07701553404331207, + 0.043874118477106094, + 0.07433117181062698, + 0.026014234870672226, + 0.013816924765706062, + 0.014636811800301075, + -0.036465343087911606, + -0.03994021564722061, + 0.003269175998866558, + -0.03902558237314224, + -0.0039056213572621346, + 0.01340826041996479, + -0.012667474336922169, + 0.013740241527557373, + 0.014225244522094727, + -0.01617649756371975, + 0.045204829424619675, + 0.0021076719276607037, + -0.03156042471528053, + -0.051285773515701294, + 0.050252512097358704, + -0.03781714290380478, + 0.017089596018195152, + 0.028835022822022438, + 0.005575904157012701, + 0.05654308199882507, + -0.004466162994503975, + 0.05050184950232506, + 0.011318539269268513, + -0.049033407121896744, + 0.022067567333579063, + 0.011711984872817993, + 0.005116294138133526, + -0.02025405503809452, + -0.010977067984640598, + -0.030916348099708557, + 0.05374428257346153, + 0.00808737613260746, + -0.044140078127384186, + 0.030092595145106316, + -0.01702306419610977, + -0.020538438111543655, + 0.00427399855107069, + 0.002780117094516754, + -0.005801517982035875, + -0.04212724789977074, + 0.024279015138745308, + -6.510222738143057e-05, + -0.020652174949645996, + -0.006957880686968565, + 0.0016846335493028164, + -0.0663430467247963, + -0.0335371308028698, + 2.869481068046298e-05, + -0.037348829209804535, + 0.039201609790325165, + -0.000545984017662704, + 0.02819826453924179, + 0.04861782118678093, + 0.08528425544500351, + 0.012239249423146248, + 0.014359706081449986, + -0.019882500171661377, + -0.03053932823240757, + 0.04383694753050804, + -0.05820803344249725, + -0.0014366158284246922, + 0.06986244767904282, + 0.06611118465662003, + 0.024454524740576744, + -0.02059408277273178, + -0.016752762719988823, + -0.01459463406354189, + -0.02889612317085266, + 0.04970743879675865, + -0.028303874656558037, + 0.03978912904858589, + -0.07312510907649994, + 0.05429210141301155, + 0.00571118388324976, + -0.02056923136115074, + 0.053339049220085144, + -0.018249599263072014, + -0.021281961351633072, + -0.05355891212821007, + 0.0478244312107563, + -0.06067035347223282, + -0.0008842060924507678, + 0.04420148581266403, + 0.000588231545407325, + 0.056371595710515976, + 0.014279269613325596, + -0.05001707002520561, + -0.037117116153240204, + 0.030044561251997948, + -0.05617158114910126, + 0.02152038738131523, + 0.017715860158205032, + 0.02034214325249195, + 0.021688375622034073, + -0.024712584912776947, + -0.03572659194469452, + -0.03006441332399845, + 0.11338905245065689, + 0.029146766290068626, + -0.01698526367545128, + -0.05355465039610863, + -0.0036588346119970083, + -0.0032888834830373526, + -0.022690337151288986, + -0.05653419718146324, + 0.053955987095832825, + -0.010936236009001732, + -0.05121518298983574, + -0.04233774170279503, + -0.0650610476732254, + -0.009094701148569584, + -0.00337587739340961, + 0.05269891023635864, + 0.05860234051942825, + 0.015521558932960033, + -0.038103096187114716, + -0.029688136652112007, + -0.011934547685086727, + -0.05070113763213158, + 0.025371648371219635, + 0.03601797670125961, + 0.021862944588065147, + -0.020703352987766266, + 0.05289195850491524, + -0.004169228952378035, + -0.03900706395506859, + -0.0029100535903126, + 0.003359412308782339, + -0.03696342185139656, + 0.007729679811745882, + 0.005611894652247429, + 0.04680318757891655, + 0.002203753450885415, + -0.057359859347343445, + 0.0586426742374897, + 0.027003217488527298, + -0.030223455280065536, + 0.06817735731601715, + 0.03476058319211006, + -0.011411039158701897, + -5.9986756241414696e-05, + -0.03633617237210274, + -0.016408616676926613, + 0.00833839550614357, + 0.0022074412554502487, + 0.048158638179302216, + -0.031786687672138214, + -0.028205247595906258, + -0.056731242686510086, + -0.023744143545627594, + -0.00782334990799427, + 0.03123593144118786, + -0.016950296238064766, + 0.01794753596186638, + 0.04291892051696777, + 0.0099559361115098, + -0.0012914348626509309, + -0.004629518836736679, + -0.05153423175215721, + 0.058259084820747375, + 0.056788213551044464, + -0.0333746112883091, + 0.030548732727766037, + 0.01471715047955513, + -0.002818142296746373, + -0.013207555748522282, + 0.022568998858332634, + 0.025705108419060707, + -0.014197800308465958, + -0.02527414821088314, + 0.0009442049195058644, + 0.005861984565854073, + 0.00919109396636486, + 0.012627449817955494, + 0.017443764954805374, + -0.0162491612136364, + 0.05474800989031792, + 0.02047792077064514, + 0.0003552198759280145, + 0.0005291366251185536, + 0.003957713954150677, + -0.073692187666893, + 0.044466596096754074, + 0.02759671024978161, + 0.03048691712319851, + 0.024890316650271416, + -0.04379572719335556, + -0.004758962895721197, + -0.012311465106904507, + 0.003943922929465771, + 0.035470347851514816, + -0.06701556593179703, + -0.0010001214686781168, + 0.06592956185340881, + 0.008649672381579876, + 0.005259859841316938, + -0.00453479727730155, + -0.026313234120607376, + 0.0128184137865901, + -0.04696577414870262, + 0.0357656292617321, + -0.007445288822054863, + 0.10806939005851746, + 0.0789022147655487, + -0.01642726920545101, + -0.042041581124067307, + -0.015044954605400562, + -0.020660532638430595, + -0.01043805480003357, + -0.011654903180897236, + -0.05754747614264488, + 0.06964936852455139, + 0.03990045189857483, + -0.0017825361574068666, + -0.0382373072206974, + 0.005986799951642752, + 0.022415796294808388, + -0.011907496489584446, + -0.015296644531190395, + 0.005779241677373648, + 0.051009099930524826, + -0.013554011471569538, + -0.036914244294166565, + -0.018791811540722847, + -0.03514847159385681, + -0.050969723612070084, + 0.07429437339305878, + -0.014117387123405933, + 0.01858820766210556, + 0.029953083023428917, + 0.013503451831638813, + 0.024872425943613052, + -0.01447504572570324, + -0.03305073082447052, + -0.03757826238870621, + 0.009820172563195229, + 0.004212466534227133, + 0.009773447178304195, + -0.03176327049732208, + -0.00981978140771389, + -0.018437808379530907, + -0.016843365505337715, + 0.04063236713409424, + 0.03664008155465126, + -0.017759712412953377, + 0.017491042613983154, + 0.017961829900741577, + -0.007718573324382305, + 0.08410634845495224, + 0.04713455215096474, + -0.030763784423470497, + -0.07868245244026184, + -0.04293506219983101, + -0.02663402259349823, + 0.06502995640039444, + 0.04134368151426315, + -0.0075789024122059345, + 0.037559330463409424, + -0.022960234433412552, + -0.03559660166501999, + 0.02433147467672825, + 0.004160139709711075, + -0.006933990400284529, + -0.018853498622775078, + -0.07674053311347961, + -0.015214351005852222, + -0.0031465317588299513, + -0.0032662826124578714, + 0.020626401528716087, + -0.023296812549233437, + 0.01533068809658289, + -0.018831759691238403, + -0.013347934931516647, + 0.05832105129957199, + 0.016617508605122566, + -0.06054726615548134, + 0.020946228876709938, + -0.05081603676080704, + 0.0005762121290899813, + -0.013293956406414509, + 0.05424816533923149, + -0.028400346636772156, + -0.008744322694838047, + -0.027219830080866814, + -0.009028765372931957, + 0.08349941670894623, + -0.02220912277698517, + -0.0035634897649288177, + 0.039803411811590195, + -0.009209544397890568, + -0.05272921174764633, + -0.03965644910931587, + -0.05518830195069313, + -0.0076417475938797, + 0.011989743448793888, + 0.07631900161504745, + 0.03853122144937515, + -0.03482687100768089, + 0.010916730388998985, + -0.012828757055103779, + 0.04919871687889099, + 0.05019025877118111, + -0.04743821546435356, + -0.024848056957125664, + 0.03214584290981293, + -0.03514641523361206, + -0.030496522784233093, + -0.005360030569136143, + -0.03538995608687401, + -0.016368992626667023, + -0.07339470833539963, + 0.022023534402251244, + -0.012556820176541805, + 0.018365638330578804, + 0.025001555681228638, + -0.029118210077285767, + -0.046666670590639114, + 0.008231466636061668, + 0.015543444082140923, + -0.018029138445854187, + 0.007146908901631832, + -0.009827123954892159, + -0.09648268669843674, + -0.03867226839065552, + -0.0066386335529387, + -0.03292228281497955, + 0.022209322080016136, + -0.048995133489370346, + -0.03118632733821869, + -0.017475144937634468, + 0.005314778070896864, + -0.010616753250360489, + -0.04724809527397156, + 0.027169331908226013, + 0.003503959160298109, + -0.06886278837919235, + 0.022409209981560707, + 0.008452179841697216, + -0.024392019957304, + -0.020817982032895088, + -0.004606961738318205, + -0.08660633116960526, + 0.031076667830348015, + 0.020726440474390984, + -0.011223231442272663, + 0.05015091598033905, + 0.021451715379953384, + 0.049609262496232986, + 0.05560477077960968, + -0.05192512646317482, + -0.016848105937242508, + 0.009753277525305748, + 0.03756638243794441, + -0.03334583342075348, + 0.040159404277801514, + 0.01897178590297699, + -0.056337252259254456, + 0.018127072602510452, + 8.343596709892154e-05, + 0.02721443772315979, + -0.037742555141448975, + 0.01802193559706211, + 0.09791397303342819, + -0.025166938081383705, + 0.07462649047374725, + -0.027766922488808632, + -0.06332777440547943, + -0.005818391218781471, + -0.018811773508787155, + -0.03425326570868492, + 0.003983109258115292, + -0.034356050193309784, + -0.04613350331783295, + 0.006219684612005949, + -0.02750561013817787, + 0.03812394291162491, + -0.029214290902018547, + -0.008362852968275547, + -0.046344004571437836, + 0.015400147996842861, + -0.027044160291552544, + 0.019668098539114, + 0.01860121265053749, + -0.02408520132303238, + -0.07096672058105469, + -0.0003313044144306332, + 0.024360060691833496, + -0.02565479651093483, + 0.033272501081228256, + -0.009740769863128662, + -0.07754906266927719, + -0.005022569093853235, + 0.01834244467318058, + -0.0670025572180748, + 0.0031950040720403194, + 0.016223441809415817, + 0.05652153119444847, + 0.05896124988794327, + -0.05402825400233269, + -0.007856715470552444, + 0.010221654549241066, + -0.005885730497539043, + 0.01112558413296938, + -0.03978840634226799, + -0.008038174360990524, + 0.048503343015909195, + -0.04076062887907028, + 0.05717281252145767, + 0.02310008741915226, + -0.015716947615146637, + 0.00578808831050992, + 0.005534487310796976, + 0.01627301797270775, + 0.012666025198996067, + -0.037932366132736206, + -0.02889685146510601, + 0.00509311119094491, + -0.015608384273946285, + -0.059495046734809875, + -0.02494220808148384, + 0.024446364492177963, + 0.03732331097126007, + -0.006206210236996412, + 0.03822293132543564, + -0.030338769778609276, + -0.10873781144618988, + -0.021372010931372643, + -0.04941859468817711, + -0.0004265901807229966, + -0.011848388239741325, + -0.040232446044683456, + 0.02904931642115116, + -0.0006163326324895024, + 0.04501201957464218, + -0.0009105035569518805, + -0.034277040511369705, + -0.028672119602560997, + 0.0012552812695503235, + 0.04327942058444023, + 0.0058600720949471, + 0.007959491573274136, + -0.0052106245420873165, + -0.01613856852054596, + 0.029123952612280846, + 0.007203509099781513, + 0.006210403982549906, + -0.053780049085617065, + -0.003420298220589757, + -0.03657878562808037, + 0.019055450335144997, + -0.07376986742019653, + -0.019189076498150826, + -0.007890082895755768, + -0.022230826318264008, + 0.006225301884114742, + 0.01237472239881754, + 0.06595351547002792, + 0.04492981359362602, + 0.01869170181453228, + 0.012387770228087902, + 0.05166162550449371, + -0.06800039112567902, + -0.03918451815843582, + -0.056165844202041626, + 0.02326592244207859, + -0.015541363507509232, + 0.033293239772319794, + -0.007216084748506546, + -0.012664951384067535, + 0.05342225730419159, + 0.009652439504861832, + 0.027964351698756218, + -0.016909338533878326, + 0.03330600634217262, + -0.060126710683107376, + -0.07517267763614655, + -0.025813739746809006, + -0.024271255359053612, + 0.011216769926249981, + -0.020943764597177505, + 0.01686793565750122, + -0.06828945130109787, + -0.018023250624537468, + -0.004829781129956245, + -0.022270847111940384, + 0.030936313793063164, + -0.013894669711589813, + 0.0368037149310112, + -0.05242981016635895, + -0.05051284283399582, + 0.06604990363121033, + 0.009301775135099888, + -0.014957845211029053, + -0.04281012713909149, + -0.0006833449588157237, + -0.005769087467342615, + -0.010068363510072231, + 0.058508969843387604, + 0.01898365654051304, + -0.018636951223015785, + 0.04689044877886772, + 0.056091975420713425, + -0.03881967067718506, + -0.019658103585243225, + -0.0003990831028204411, + 0.07142409682273865, + 0.012017408385872841, + -0.04087359085679054, + 0.03531723469495773, + 0.030207011848688126, + 0.005139552056789398, + -0.07084347307682037, + 0.009759706445038319, + -0.074960857629776, + 0.05481172725558281, + 0.028797954320907593, + -0.016217226162552834, + 0.03127933293581009, + 0.06848310679197311, + 0.0331764742732048, + -0.007261150050908327, + -0.023036431521177292, + -0.015215406194329262, + -0.056041885167360306, + -0.05146646127104759 + ], + "content_embedding": [ + -0.035467296838760376, + 0.05443365499377251, + 0.004722591955214739, + -0.022551164031028748, + 0.013528825715184212, + -0.01865273527801037, + 0.02473974972963333, + -0.0093984454870224, + -0.04820428788661957, + -0.011309967376291752, + -0.041853927075862885, + 0.02753269486129284, + 0.023483864963054657, + 0.0086299953982234, + -0.004760670010000467, + -0.011422916315495968, + 0.02231433242559433, + -0.018113020807504654, + 0.027575815096497536, + 0.008628769777715206, + 0.031078867614269257, + -0.022500742226839066, + 0.011097921058535576, + -0.020555853843688965, + 0.041947007179260254, + 0.007781036198139191, + -0.03356451168656349, + 0.05067972093820572, + -0.05009249970316887, + 0.013387891463935375, + 0.04737459868192673, + -0.02956528402864933, + -0.012283756397664547, + -0.02492530830204487, + -0.051903702318668365, + -0.020648062229156494, + 0.07008657604455948, + -0.05525917932391167, + -0.005968133453279734, + 0.042117420583963394, + 0.04600752145051956, + -0.023857053369283676, + 0.024955181404948235, + -0.019478371366858482, + 0.027009692043066025, + -0.0035888778511434793, + 0.050264179706573486, + -0.026319395750761032, + 0.02974606677889824, + -0.03574950620532036, + -0.0011404261458665133, + 0.00908538419753313, + -0.026853032410144806, + -0.01625720039010048, + -0.011155915446579456, + 0.012969470582902431, + -0.03395452722907066, + 0.004224491771310568, + 0.027397319674491882, + -0.02826162986457348, + 0.042576149106025696, + 0.04274202510714531, + -0.023343440145254135, + 0.031393349170684814, + 0.003865004051476717, + -0.068922258913517, + 0.021687647327780724, + -0.006593589670956135, + 0.014760294929146767, + -0.007734894752502441, + -0.0031233499757945538, + -0.014138679951429367, + 0.01479700393974781, + 0.05785622447729111, + -0.04781193286180496, + -0.02295715920627117, + -0.02882302924990654, + -0.018427176401019096, + -0.018964029848575592, + 0.06096252053976059, + 0.05383418872952461, + -0.0870966985821724, + 0.0031813366804271936, + 0.01873805560171604, + 0.046315208077430725, + -0.016668183729052544, + 0.007771935313940048, + -0.008389935828745365, + -0.03101789578795433, + 0.12752030789852142, + 0.03457779064774513, + 0.03240315988659859, + 0.048814207315444946, + 0.05700814723968506, + -0.06329526007175446, + 0.057182129472494125, + -0.027157682925462723, + -0.008035550825297832, + -0.02922128140926361, + -0.033899255096912384, + -0.013119292445480824, + 0.05552409961819649, + 0.04266372323036194, + -0.003065067809075117, + 0.03790399059653282, + 0.01804651878774166, + 0.018934324383735657, + 0.04061003401875496, + 0.03233874961733818, + 0.017353640869259834, + -0.045026157051324844, + 0.0002423059631837532, + 0.00812580157071352, + 0.017658300697803497, + -0.007964730262756348, + 0.01015512179583311, + 0.04046032205224037, + -0.06913582980632782, + 0.004168798215687275, + 0.03317571058869362, + 0.012463781051337719, + -0.020389260724186897, + -0.022882815450429916, + -0.015693804249167442, + 0.0500093474984169, + 0.05442529916763306, + -0.05275300145149231, + -0.0008568991324864328, + -0.009641895070672035, + -0.003380047157406807, + -0.019793419167399406, + 0.0063031697645783424, + -0.03327865153551102, + 0.016263391822576523, + 0.03218098729848862, + 0.022281551733613014, + -0.06236790120601654, + -0.06712637841701508, + 0.013925489969551563, + 0.01688770391047001, + 0.01467123068869114, + 0.029035737738013268, + -0.013510127551853657, + -0.0371820330619812, + 0.01489016879349947, + -0.05226032808423042, + -0.021125929430127144, + -0.002014611614868045, + -0.05400311201810837, + 0.019783688709139824, + -0.0014786357060074806, + -0.01895768567919731, + 0.01083068735897541, + -0.005890357308089733, + 0.003382777562364936, + -0.04273455590009689, + -0.0478648841381073, + 0.006365248002111912, + 0.04027433693408966, + 0.022969869896769524, + 0.06722807139158249, + 0.02113204449415207, + -0.03740633279085159, + 0.0656556561589241, + -0.014887429773807526, + 0.022357333451509476, + 0.036207813769578934, + 0.009992213919758797, + 0.03665810078382492, + 0.07260408997535706, + -0.005702183116227388, + -0.00880548357963562, + 0.033025965094566345, + -0.017093362286686897, + 0.039024271070957184, + 0.04158668965101242, + 0.008214588277041912, + -0.017436640337109566, + 0.05074054002761841, + -0.021646566689014435, + 0.0577220693230629, + -0.06182146817445755, + 0.03490613400936127, + 0.009574614465236664, + -0.05135552957653999, + -0.016593866050243378, + 0.001374077401123941, + -0.0582745335996151, + 0.009521838277578354, + -0.04114346206188202, + 0.05998831242322922, + -0.017294712364673615, + -0.017998410388827324, + -0.036417942494153976, + -0.014111478812992573, + -0.035168007016181946, + 0.04580182209610939, + 0.006420421414077282, + 0.012049577198922634, + -0.03691839054226875, + -0.041549112647771835, + -0.02593182772397995, + -0.07608001679182053, + -0.004325924441218376, + -0.029293090105056763, + 0.05871257558465004, + 0.04561365023255348, + -0.018353786319494247, + 0.018815817311406136, + -0.02768997848033905, + 0.10190171003341675, + 0.05626858025789261, + -0.006779504008591175, + 0.005354198161512613, + -0.053908295929431915, + -0.03453999012708664, + 0.02781379222869873, + -0.04989396408200264, + -0.03950505331158638, + 0.02282608300447464, + 0.006389955058693886, + -0.007375086657702923, + -0.016243990510702133, + 0.028544900938868523, + -0.020701216533780098, + 0.012176213786005974, + -0.019373498857021332, + 0.023940887302160263, + -0.03986368328332901, + -0.0040043736808001995, + -0.02535220980644226, + -0.08186554163694382, + 0.07860240340232849, + 0.004978376906365156, + 0.06400016695261002, + -0.02991490252315998, + -0.03288387134671211, + 0.03286135569214821, + 0.0247455183416605, + 0.08134172111749649, + 0.062203265726566315, + 0.017080431804060936, + -0.003969072364270687, + -0.024894852191209793, + -0.07480036467313766, + 0.03882874175906181, + -0.0074541885405778885, + -0.011322571896016598, + -0.06285038590431213, + 0.004618136677891016, + -0.019962741062045097, + 0.01853892207145691, + 0.002305575180798769, + 0.023541608825325966, + 0.017216432839632034, + -0.044929757714271545, + -0.022532327100634575, + 0.05122198164463043, + 0.0008766956743784249, + -0.025474006310105324, + 0.04030180349946022, + -0.013362268917262554, + 0.049265045672655106, + 0.001589711057022214, + 0.014848759397864342, + 0.0022126054391264915, + -0.028914116322994232, + -0.009881545789539814, + -0.010318109765648842, + 0.006385906133800745, + 0.010556558147072792, + 0.007666149642318487, + 0.016665013507008553, + 0.09090837836265564, + 0.008256189525127411, + -0.006583006586879492, + 0.0044986652210354805, + -0.0336960032582283, + 0.047732625156641006, + -0.007069372106343508, + -0.044969744980335236, + -0.0907006487250328, + -0.04223865643143654, + 0.007469010539352894, + 0.006147805601358414, + 0.04827409237623215, + 0.03519561514258385, + 0.032267000526189804, + 0.05173507332801819, + -0.016001909971237183, + 0.034578241407871246, + -0.02854917198419571, + -0.01587686315178871, + 0.03134807571768761, + -0.010930678807199001, + 0.044245973229408264, + 0.1186295673251152, + -0.031642355024814606, + 0.01669827103614807, + -0.026775898411870003, + -0.002936996053904295, + -0.013815718702971935, + -0.009453569538891315, + -0.035879991948604584, + -0.02232815884053707, + -0.009286822751164436, + -0.01117252279073, + 0.014098073355853558, + 0.023366371169686317, + 0.022420832887291908, + -0.029833031818270683, + 0.0013570807641372085, + -0.0211170744150877, + -0.027633074671030045, + -0.02915397845208645, + 0.023663034662604332, + 0.04199281334877014, + -0.0311698317527771, + 0.025238486006855965, + -0.00992826372385025, + 0.005087476689368486, + -0.050041183829307556, + -0.018602682277560234, + -0.06774407625198364, + -0.019117988646030426, + 0.08245334029197693, + 0.030311768874526024, + 0.020432988181710243, + -0.03738946095108986, + 0.04251522198319435, + 0.002886879490688443, + 0.0938342958688736, + -0.05836429446935654, + -0.0232597254216671, + 0.0074686286970973015, + -0.0020157117396593094, + -0.013439277186989784, + 0.02590363286435604, + 0.0034541902132332325, + 0.002955070696771145, + -0.0020802158396691084, + -0.011827156879007816, + -0.06622112542390823, + -0.05226997658610344, + -0.031827233731746674, + 0.0012936017010360956, + 0.01702217012643814, + -0.016136569902300835, + 0.04939497634768486, + 0.006943605840206146, + -0.05098084360361099, + 0.03143058344721794, + -0.004094736184924841, + -0.011557313613593578, + 0.000757173984311521, + -0.01120754610747099, + 0.036905039101839066, + 0.02395678497850895, + 0.009881307370960712, + 0.05312298238277435, + 0.05778184533119202, + 0.02520277164876461, + 0.020175758749246597, + -0.025740133598446846, + 0.04891965910792351, + 0.05155428871512413, + 0.04089348018169403, + 0.06249197572469711, + 0.02368168905377388, + -0.03247880935668945, + 0.019493652507662773, + -0.06181112304329872, + 0.019057979807257652, + 0.037210095673799515, + 0.02910085767507553, + 0.02495957538485527, + -0.08179862797260284, + -0.007498551160097122, + 0.036072876304388046, + -0.016061626374721527, + 0.0725645050406456, + 0.005645937751978636, + 0.0032921379897743464, + -0.029446475207805634, + 0.020205944776535034, + 0.002388844033703208, + -0.029442399740219116, + -0.031475961208343506, + 0.024486446753144264, + -0.038434699177742004, + -0.05131153389811516, + 0.00129043054766953, + -0.01692604087293148, + -0.007422945462167263, + -0.02433120459318161, + 0.004650108516216278, + -0.04251663386821747, + -0.009143602102994919, + 0.017507102340459824, + 0.042100246995687485, + -0.06103592365980148, + -0.0508011095225811, + -0.000937039265409112, + 0.025160834193229675, + -0.048878248780965805, + 0.010422220453619957, + -0.014773974195122719, + -0.06574267894029617, + 0.0027621325571089983, + -0.0019821953028440475, + 0.006184928119182587, + 0.024707674980163574, + -0.022308405488729477, + -0.06509386748075485, + 0.04186487942934036, + 0.0023416660260409117, + 0.0650840550661087, + 0.03807358071208, + -0.024585191160440445, + -0.017596496269106865, + 0.005341595038771629, + 0.03675152733922005, + 0.06293662637472153, + 0.010854244232177734, + -0.050199203193187714, + -0.037359531968832016, + 0.017929432913661003, + 0.023822667077183723, + 0.019726712256669998, + -0.00759292533621192, + 0.043509598821401596, + -0.0014670701930299401, + -0.0006681938539259136, + -0.0055070724338293076, + -0.07182206958532333, + 0.037307076156139374, + 0.06350742280483246, + 0.049223095178604126, + 0.017340589314699173, + 0.05529596656560898, + 0.023639194667339325, + -0.02478986792266369, + -0.02248029224574566, + -0.042737238109111786, + -0.0018032155930995941, + 0.05576873943209648, + 0.12722158432006836, + 0.004959811456501484, + -0.033451229333877563, + -0.007337609305977821, + 0.018852578476071358, + 0.031502317637205124, + 0.013375848531723022, + -0.0066598327830433846, + 0.07771285623311996, + -0.017693838104605675, + -0.030503049492836, + -0.04393269121646881, + 0.013323146849870682, + 0.010107941925525665, + 0.02004137821495533, + 0.0377974770963192, + 0.013478322885930538, + 0.024949608370661736, + -0.01634461060166359, + -0.015226340852677822, + 0.013924108818173409, + 0.0038409747648984194, + -0.00358059024438262, + -0.005057516973465681, + -0.008272752165794373, + 0.04373026266694069, + -0.0014998909318819642, + 0.009924792684614658, + -0.040317751467227936, + -0.04541180655360222, + 0.06625904142856598, + -0.028080880641937256, + 0.04732294753193855, + 0.0047000702470541, + 0.02857903018593788, + 0.004553706850856543, + -0.04188435524702072, + 0.023083623498678207, + -0.060619525611400604, + 0.01963491179049015, + -0.008608276024460793, + 0.0034778753761202097, + -0.016133952885866165, + 0.009059683419764042, + -0.0009118590969592333, + 0.08675801753997803, + 0.004535067826509476, + -0.021998068317770958, + -0.0789642184972763, + -0.033289894461631775, + -0.04486677423119545, + 0.014945252798497677, + 0.04513613134622574, + -0.05311649665236473, + 0.009399711154401302, + 0.004345519933849573, + -0.004021052736788988, + 0.01709410734474659, + -0.020986782386898994, + -0.011685016565024853, + -0.02048366330564022, + -0.08524532616138458, + -0.004114300478249788, + 0.040531281381845474, + -0.0005771859432570636, + 0.02984555996954441, + -0.0002479814866092056, + 0.04756562039256096, + -0.013039377517998219, + -0.09513615071773529, + 0.07444311678409576, + 0.0044719018042087555, + -0.09768522530794144, + 0.04403488337993622, + 0.013910059817135334, + 0.06657753884792328, + 0.026994489133358, + 0.03657658398151398, + -0.11561834812164307, + 0.02878704108297825, + -0.012593223713338375, + -0.01532658003270626, + 0.06045927479863167, + -0.04569881781935692, + -0.0029045850969851017, + 0.06762480735778809, + 0.012874988839030266, + -0.011422640644013882, + 0.025211291387677193, + -0.07570745050907135, + -0.018061399459838867, + -0.033531878143548965, + -0.010049374774098396, + 0.02582205832004547, + -0.015443898737430573, + 0.029427431523799896, + -0.02071801945567131, + 0.02054932527244091, + 0.017246615141630173, + -0.07276910543441772, + -0.053859222680330276, + 0.060189153999090195, + -0.04336293414235115, + -0.06396458297967911, + 0.08002400398254395, + -0.041166432201862335, + 0.000430541840614751, + -0.10547704994678497, + -0.014112395234405994, + -0.06070064380764961, + 0.01796649396419525, + -0.045275188982486725, + -0.0018861661665141582, + -0.0022482818458229303, + 0.004191190470010042, + 6.745498831151053e-05, + 0.07350871711969376, + -0.01797996647655964, + 0.03183342143893242, + -0.10409794747829437, + -0.0291685052216053, + 0.02071727253496647, + 0.021267961710691452, + -0.021560702472925186, + -0.05026571452617645, + -0.013422243297100067, + -0.0011607048800215125, + 0.016948888078331947, + -0.01588856242597103, + -0.05063013359904289, + 0.05952488258481026, + -0.05575632303953171, + -0.06906414031982422, + -0.004353572614490986, + -0.02773641049861908, + -0.043097492307424545, + 0.03103402815759182, + 0.02164989709854126, + 0.0013185666175559163, + 0.02606332302093506, + -0.059723641723394394, + -0.008657965809106827, + 0.06580374389886856, + -0.020771000534296036, + -0.022305399179458618, + 0.008068420924246311, + -0.004975682124495506, + -0.033461254090070724, + -0.040884874761104584, + 0.052932899445295334, + -0.0054899416863918304, + -0.03155453875660896, + 0.002439886098727584, + -0.0361575223505497, + -0.03652369976043701, + -0.010043974034488201, + 0.01681465655565262, + 3.9381829992635176e-05, + 0.011527255177497864, + 0.06904088705778122, + -0.005501871462911367, + 0.0259085800498724, + -0.021282166242599487, + -0.03796657174825668, + -0.002881726250052452, + -0.018672630190849304, + -0.003463461296632886, + -0.008101037703454494, + -0.019035371020436287, + -0.0025111068971455097, + 0.03926572576165199, + -0.0513470396399498, + 0.04829537495970726, + -0.001188569120131433, + -0.0121685229241848, + -0.059901442378759384, + 0.07364466786384583, + 0.006562754046171904, + 0.00707247294485569, + 0.028408123180270195, + -0.02494397945702076, + -0.04187498614192009, + 0.0066386316902935505, + 0.06244710460305214, + 0.02900586649775505, + 0.04932861402630806, + -0.04402685537934303, + -0.006739918142557144, + -0.0502609983086586, + -0.0015801729168742895, + -0.026301531121134758, + -0.024203499779105186, + -0.019028285518288612, + 0.055076178163290024, + 0.009030332788825035, + -0.04907704144716263, + -0.019399652257561684, + 0.009713590145111084, + 0.05042042210698128, + -0.00020382895309012383, + -0.010405965149402618, + 0.01872927136719227, + 0.002546734409406781, + 0.020958390086889267, + 0.0634453296661377, + 0.04931068792939186, + -0.014592095278203487, + 0.0075549716129899025, + 0.02017839439213276, + 0.03344761207699776, + -0.005005223676562309, + 0.01818416453897953, + -0.05528895929455757, + 0.03879536688327789, + 0.018610917031764984, + -0.029319677501916885, + -0.00493574095889926, + 0.01762193627655506, + 0.008898349478840828, + -0.017192110419273376, + -0.03400791808962822, + -0.026591692119836807, + -0.03768239915370941, + 0.007602880708873272, + -0.020310858264565468, + -0.0036565649788826704, + -0.00616755336523056, + -0.057577136904001236, + 0.008387535810470581, + 0.021555650979280472, + -0.01923108845949173, + -0.019822189584374428, + -0.03861076384782791, + -0.04258895292878151, + 0.0005390863516367972, + -0.009946192614734173, + 0.04911184310913086, + -0.05009220167994499, + 0.00297548552043736, + 0.019344164058566093, + 0.005506082437932491, + 0.018321573734283447, + -0.027131471782922745, + -0.052699681371450424, + -0.02292790077626705, + 0.0568309910595417, + -0.008538461290299892, + -0.05772045999765396, + 0.018903164193034172, + -0.03690820932388306, + -0.034110669046640396, + -0.008381159976124763, + 0.03926640748977661, + 0.04370100051164627, + -0.007475440856069326, + 0.06952399015426636, + -0.0031064660288393497, + 0.040785301476716995, + -0.008955440483987331, + -0.016698094084858894, + -0.007412049453705549, + 0.022290483117103577, + 0.006028760224580765, + -0.019992463290691376, + -0.04154061898589134, + -0.023284243419766426, + 0.04972238838672638, + 0.0008079080143943429, + -0.0057194954715669155, + 0.037808094173669815, + -0.00983867421746254, + -0.030191699042916298, + 0.014910571277141571, + 0.0004001195775344968, + 0.08586109429597855, + -0.014850648120045662, + -0.07815773040056229, + 0.05393945425748825, + -0.019953783601522446, + 0.0016224493738263845, + 0.018219690769910812, + 0.014311570674180984, + 0.05567210912704468, + 0.004363455809652805, + 0.01872050203382969, + -0.05933142080903053, + -0.04872509092092514, + 0.02652469463646412, + -0.04346488043665886, + -0.027931908145546913, + -0.03355146571993828, + 0.004589339718222618, + -0.05587214604020119, + -0.015419036149978638, + 0.04789341986179352, + -0.059666525572538376, + 0.00552733987569809, + 0.012681040912866592, + 0.007240649312734604, + -0.0045613935217261314, + -0.060716547071933746, + 0.03223521634936333, + 0.024270178750157356, + -0.025784391909837723, + -0.01736401580274105, + -0.0014227400533854961, + -0.011367680504918098, + -0.035415612161159515, + -0.01793254353106022, + 0.033805977553129196, + -0.0080083217471838, + 0.021929381415247917, + 0.012236963026225567, + 0.002641203347593546, + 0.0067292568273842335, + -0.007680798415094614, + -0.02231515571475029, + 0.023524953052401543, + 0.010132606141269207, + 0.0030864113941788673, + -0.03816894069314003, + -0.0007171767647378147 + ], + "chunk_ind": 0 + } +] \ No newline at end of file diff --git a/backend/danswer/seeding/load_docs.py b/backend/danswer/seeding/load_docs.py new file mode 100644 index 00000000000..2e9c13b10ba --- /dev/null +++ b/backend/danswer/seeding/load_docs.py @@ -0,0 +1,214 @@ +import datetime +import json +import os +from typing import cast + +from sqlalchemy.orm import Session + +from danswer.access.models import default_public_access +from danswer.configs.constants import DEFAULT_BOOST +from danswer.configs.constants import DocumentSource +from danswer.configs.constants import KV_DOCUMENTS_SEEDED_KEY +from danswer.configs.model_configs import DEFAULT_DOCUMENT_ENCODER_MODEL +from danswer.connectors.models import Document +from danswer.connectors.models import IndexAttemptMetadata +from danswer.connectors.models import InputType +from danswer.connectors.models import Section +from danswer.db.connector import check_connectors_exist +from danswer.db.connector import create_connector +from danswer.db.connector_credential_pair import add_credential_to_connector +from danswer.db.credentials import PUBLIC_CREDENTIAL_ID +from danswer.db.document import check_docs_exist +from danswer.db.enums import AccessType +from danswer.db.enums import ConnectorCredentialPairStatus +from danswer.db.index_attempt import mock_successful_index_attempt +from danswer.db.search_settings import get_current_search_settings +from danswer.document_index.factory import get_default_document_index +from danswer.indexing.indexing_pipeline import index_doc_batch_prepare +from danswer.indexing.models import ChunkEmbedding +from danswer.indexing.models import DocMetadataAwareIndexChunk +from danswer.key_value_store.factory import get_kv_store +from danswer.key_value_store.interface import KvKeyNotFoundError +from danswer.server.documents.models import ConnectorBase +from danswer.utils.logger import setup_logger +from danswer.utils.retry_wrapper import retry_builder + + +logger = setup_logger() + + +def _create_indexable_chunks( + preprocessed_docs: list[dict], + tenant_id: str | None, +) -> tuple[list[Document], list[DocMetadataAwareIndexChunk]]: + ids_to_documents = {} + chunks = [] + for preprocessed_doc in preprocessed_docs: + document = Document( + id=preprocessed_doc["url"], # For Web connector, the URL is the ID + # The section is not really used past this point since we have already done the other processing + # for the chunking and embedding. + sections=[ + Section(text=preprocessed_doc["content"], link=preprocessed_doc["url"]) + ], + source=DocumentSource.WEB, + semantic_identifier=preprocessed_doc["title"], + metadata={}, + doc_updated_at=None, + primary_owners=[], + secondary_owners=[], + ) + if preprocessed_doc["chunk_ind"] == 0: + ids_to_documents[document.id] = document + + chunk = DocMetadataAwareIndexChunk( + chunk_id=preprocessed_doc["chunk_ind"], + blurb=preprocessed_doc["content"] + .split(".", 1)[0] + .split("!", 1)[0] + .split("?", 1)[0], + content=preprocessed_doc["content"], + source_links={0: preprocessed_doc["url"]}, + section_continuation=False, + source_document=document, + title_prefix=preprocessed_doc["title"], + metadata_suffix_semantic="", + metadata_suffix_keyword="", + mini_chunk_texts=None, + large_chunk_reference_ids=[], + embeddings=ChunkEmbedding( + full_embedding=preprocessed_doc["content_embedding"], + mini_chunk_embeddings=[], + ), + title_embedding=preprocessed_doc["title_embedding"], + tenant_id=tenant_id, + access=default_public_access, + document_sets=set(), + boost=DEFAULT_BOOST, + ) + chunks.append(chunk) + + return list(ids_to_documents.values()), chunks + + +def seed_initial_documents(db_session: Session, tenant_id: str | None) -> None: + """ + Seed initial documents so users don't have an empty index to start + + Documents are only loaded if: + - This is the first setup (if the user deletes the docs, we don't load them again) + - The index is empty, there are no docs and no (non-default) connectors + - The user has not updated the embedding models + - If they do, then we have to actually index the website + - If the embedding model is already updated on server startup, they're not a new user + + Note that regardless of any search settings, the default documents are always loaded with + the predetermined chunk sizes and single pass embedding. + + Steps are as follows: + - Check if this needs to run + - Create the connector representing this + - Create the cc-pair (attaching the public credential) and mocking values like the last success + - Indexing the documents into Postgres + - Indexing the documents into Vespa + - Create a fake index attempt with fake times + """ + logger.info("Seeding initial documents") + + kv_store = get_kv_store() + try: + kv_store.load(KV_DOCUMENTS_SEEDED_KEY) + logger.info("Documents already seeded, skipping") + return + except KvKeyNotFoundError: + pass + + if check_docs_exist(db_session): + logger.info("Documents already exist, skipping") + return + + if check_connectors_exist(db_session): + logger.info("Connectors already exist, skipping") + return + + search_settings = get_current_search_settings(db_session) + if search_settings.model_name != DEFAULT_DOCUMENT_ENCODER_MODEL: + logger.info("Embedding model has been updated, skipping") + return + + document_index = get_default_document_index( + primary_index_name=search_settings.index_name, secondary_index_name=None + ) + + # Create a connector so the user can delete it if they want + # or reindex it with a new search model if they want + connector_data = ConnectorBase( + name="Sample Use Cases", + source=DocumentSource.WEB, + input_type=InputType.LOAD_STATE, + connector_specific_config={ + "base_url": "https://docs.danswer.dev/more/use_cases", + "web_connector_type": "recursive", + }, + refresh_freq=None, # Never refresh by default + prune_freq=None, + indexing_start=None, + ) + + connector = create_connector(db_session, connector_data) + connector_id = cast(int, connector.id) + + last_index_time = datetime.datetime.now(datetime.timezone.utc) + + result = add_credential_to_connector( + db_session=db_session, + user=None, + connector_id=connector_id, + credential_id=PUBLIC_CREDENTIAL_ID, + access_type=AccessType.PUBLIC, + cc_pair_name=connector_data.name, + groups=None, + initial_status=ConnectorCredentialPairStatus.PAUSED, + last_successful_index_time=last_index_time, + ) + cc_pair_id = cast(int, result.data) + + initial_docs_path = os.path.join( + os.getcwd(), "danswer", "seeding", "initial_docs.json" + ) + processed_docs = json.load(open(initial_docs_path)) + + docs, chunks = _create_indexable_chunks(processed_docs, tenant_id) + + index_doc_batch_prepare( + document_batch=docs, + index_attempt_metadata=IndexAttemptMetadata( + connector_id=connector_id, + credential_id=PUBLIC_CREDENTIAL_ID, + ), + db_session=db_session, + ignore_time_skip=True, # Doesn't actually matter here + ) + + # In this case since there are no other connectors running in the background + # and this is a fresh deployment, there is no need to grab any locks + logger.info( + "Indexing seeding documents into Vespa " + "(Vespa may take a few seconds to become ready after receiving the schema)" + ) + + # Retries here because the index may take a few seconds to become ready + # as we just sent over the Vespa schema and there is a slight delay + + index_with_retries = retry_builder()(document_index.index) + index_with_retries(chunks=chunks) + + # Mock a run for the UI even though it did not actually call out to anything + mock_successful_index_attempt( + connector_credential_pair_id=cc_pair_id, + search_settings_id=search_settings.id, + docs_indexed=len(docs), + db_session=db_session, + ) + + kv_store.store(KV_DOCUMENTS_SEEDED_KEY, True) diff --git a/backend/danswer/server/auth_check.py b/backend/danswer/server/auth_check.py index 8a35a560a24..4300bc464cb 100644 --- a/backend/danswer/server/auth_check.py +++ b/backend/danswer/server/auth_check.py @@ -10,6 +10,8 @@ from danswer.auth.users import current_user_with_expired_token from danswer.configs.app_configs import APP_API_PREFIX from danswer.server.danswer_api.ingestion import api_key_dep +from ee.danswer.auth.users import current_cloud_superuser +from ee.danswer.server.tenants.access import control_plane_dep PUBLIC_ENDPOINT_SPECS = [ @@ -98,6 +100,8 @@ def check_router_auth( or depends_fn == current_curator_or_admin_user or depends_fn == api_key_dep or depends_fn == current_user_with_expired_token + or depends_fn == control_plane_dep + or depends_fn == current_cloud_superuser ): found_auth = True break diff --git a/backend/danswer/server/danswer_api/ingestion.py b/backend/danswer/server/danswer_api/ingestion.py index cea3ec86575..bae316535c7 100644 --- a/backend/danswer/server/danswer_api/ingestion.py +++ b/backend/danswer/server/danswer_api/ingestion.py @@ -9,6 +9,7 @@ from danswer.db.connector_credential_pair import get_connector_credential_pair_from_id from danswer.db.document import get_documents_by_cc_pair from danswer.db.document import get_ingestion_documents +from danswer.db.engine import get_current_tenant_id from danswer.db.engine import get_session from danswer.db.models import User from danswer.db.search_settings import get_current_search_settings @@ -67,6 +68,7 @@ def upsert_ingestion_doc( doc_info: IngestionDocument, _: User | None = Depends(api_key_dep), db_session: Session = Depends(get_session), + tenant_id: str = Depends(get_current_tenant_id), ) -> IngestionResult: doc_info.document.from_ingestion_api = True @@ -101,6 +103,7 @@ def upsert_ingestion_doc( document_index=curr_doc_index, ignore_time_skip=True, db_session=db_session, + tenant_id=tenant_id, ) new_doc, __chunk_count = indexing_pipeline( @@ -134,6 +137,7 @@ def upsert_ingestion_doc( document_index=sec_doc_index, ignore_time_skip=True, db_session=db_session, + tenant_id=tenant_id, ) sec_ind_pipeline( diff --git a/backend/danswer/server/documents/cc_pair.py b/backend/danswer/server/documents/cc_pair.py index 428666751a4..68b48b85b0f 100644 --- a/backend/danswer/server/documents/cc_pair.py +++ b/backend/danswer/server/documents/cc_pair.py @@ -1,4 +1,5 @@ import math +from datetime import datetime from http import HTTPStatus from fastapi import APIRouter @@ -11,8 +12,10 @@ from danswer.auth.users import current_curator_or_admin_user from danswer.auth.users import current_user from danswer.background.celery.celery_utils import get_deletion_attempt_snapshot -from danswer.background.celery.celery_utils import skip_cc_pair_pruning_by_task -from danswer.background.task_utils import name_cc_prune_task +from danswer.background.celery.tasks.pruning.tasks import ( + try_creating_prune_generator_task, +) +from danswer.background.celery.versioned_apps.primary import app as primary_app from danswer.db.connector_credential_pair import add_credential_to_connector from danswer.db.connector_credential_pair import get_connector_credential_pair_from_id from danswer.db.connector_credential_pair import remove_credential_from_connector @@ -20,6 +23,8 @@ update_connector_credential_pair_from_id, ) from danswer.db.document import get_document_counts_for_cc_pairs +from danswer.db.engine import CURRENT_TENANT_ID_CONTEXTVAR +from danswer.db.engine import get_current_tenant_id from danswer.db.engine import get_session from danswer.db.enums import AccessType from danswer.db.enums import ConnectorCredentialPairStatus @@ -29,17 +34,25 @@ from danswer.db.index_attempt import get_latest_index_attempt_for_cc_pair_id from danswer.db.index_attempt import get_paginated_index_attempts_for_cc_pair_id from danswer.db.models import User +from danswer.db.search_settings import get_current_search_settings +from danswer.db.tasks import check_task_is_live_and_not_timed_out from danswer.db.tasks import get_latest_task +from danswer.redis.redis_connector import RedisConnector +from danswer.redis.redis_pool import get_redis_client from danswer.server.documents.models import CCPairFullInfo -from danswer.server.documents.models import CCPairPruningTask from danswer.server.documents.models import CCStatusUpdateRequest +from danswer.server.documents.models import CeleryTaskStatus from danswer.server.documents.models import ConnectorCredentialPairIdentifier from danswer.server.documents.models import ConnectorCredentialPairMetadata from danswer.server.documents.models import PaginatedIndexAttempts from danswer.server.models import StatusResponse from danswer.utils.logger import setup_logger +from ee.danswer.background.task_name_builders import ( + name_sync_external_doc_permissions_task, +) from ee.danswer.db.user_group import validate_user_creation_permissions + logger = setup_logger() router = APIRouter(prefix="/manage") @@ -81,6 +94,7 @@ def get_cc_pair_full_info( cc_pair_id: int, user: User | None = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), + tenant_id: str | None = Depends(get_current_tenant_id), ) -> CCPairFullInfo: cc_pair = get_connector_credential_pair_from_id( cc_pair_id, db_session, user, get_editable=False @@ -111,11 +125,16 @@ def get_cc_pair_full_info( latest_attempt = get_latest_index_attempt_for_cc_pair_id( db_session=db_session, - connector_credential_pair_id=cc_pair.id, + connector_credential_pair_id=cc_pair_id, secondary_index=False, only_finished=False, ) + search_settings = get_current_search_settings(db_session) + + redis_connector = RedisConnector(tenant_id, cc_pair_id) + redis_connector_index = redis_connector.new_index(search_settings.id) + return CCPairFullInfo.from_models( cc_pair_model=cc_pair, number_of_index_attempts=count_index_attempts_for_connector( @@ -127,9 +146,11 @@ def get_cc_pair_full_info( connector_id=cc_pair.connector_id, credential_id=cc_pair.credential_id, db_session=db_session, + tenant_id=tenant_id, ), num_docs_indexed=documents_indexed, is_editable_for_current_user=is_editable_for_current_user, + indexing=redis_connector_index.fenced, ) @@ -146,6 +167,7 @@ def update_cc_pair_status( user=user, get_editable=True, ) + if not cc_pair: raise HTTPException( status_code=400, @@ -155,7 +177,6 @@ def update_cc_pair_status( if status_update_request.status == ConnectorCredentialPairStatus.PAUSED: cancel_indexing_attempts_for_ccpair(cc_pair_id, db_session) - # Just for good measure cancel_indexing_attempts_past_model(db_session) update_connector_credential_pair_from_id( @@ -164,6 +185,8 @@ def update_cc_pair_status( status=status_update_request.status, ) + db_session.commit() + @router.put("/admin/cc-pair/{cc_pair_id}/name") def update_cc_pair_name( @@ -194,12 +217,36 @@ def update_cc_pair_name( raise HTTPException(status_code=400, detail="Name must be unique") -@router.get("/admin/cc-pair/{cc_pair_id}/prune") -def get_cc_pair_latest_prune( +@router.get("/admin/cc-pair/{cc_pair_id}/last_pruned") +def get_cc_pair_last_pruned( + cc_pair_id: int, + user: User = Depends(current_curator_or_admin_user), + db_session: Session = Depends(get_session), +) -> datetime | None: + cc_pair = get_connector_credential_pair_from_id( + cc_pair_id=cc_pair_id, + db_session=db_session, + user=user, + get_editable=False, + ) + if not cc_pair: + raise HTTPException( + status_code=400, + detail="cc_pair not found for current user's permissions", + ) + + return cc_pair.last_pruned + + +@router.post("/admin/cc-pair/{cc_pair_id}/prune") +def prune_cc_pair( cc_pair_id: int, user: User = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), -) -> CCPairPruningTask: + tenant_id: str | None = Depends(get_current_tenant_id), +) -> StatusResponse[list[int]]: + """Triggers pruning on a particular cc_pair immediately""" + cc_pair = get_connector_credential_pair_from_id( cc_pair_id=cc_pair_id, db_session=db_session, @@ -212,34 +259,82 @@ def get_cc_pair_latest_prune( detail="Connection not found for current user's permissions", ) - # look up the last prune task for this connector (if it exists) - pruning_task_name = name_cc_prune_task( - connector_id=cc_pair.connector_id, credential_id=cc_pair.credential_id + r = get_redis_client(tenant_id=tenant_id) + + redis_connector = RedisConnector(tenant_id, cc_pair_id) + if redis_connector.prune.fenced: + raise HTTPException( + status_code=HTTPStatus.CONFLICT, + detail="Pruning task already in progress.", + ) + + logger.info( + f"Pruning cc_pair: cc_pair_id={cc_pair_id} " + f"connector_id={cc_pair.connector_id} " + f"credential_id={cc_pair.credential_id} " + f"{cc_pair.connector.name} connector." + ) + tasks_created = try_creating_prune_generator_task( + primary_app, cc_pair, db_session, r, CURRENT_TENANT_ID_CONTEXTVAR.get() + ) + if not tasks_created: + raise HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail="Pruning task creation failed.", + ) + + return StatusResponse( + success=True, + message="Successfully created the pruning task.", + ) + + +@router.get("/admin/cc-pair/{cc_pair_id}/sync") +def get_cc_pair_latest_sync( + cc_pair_id: int, + user: User = Depends(current_curator_or_admin_user), + db_session: Session = Depends(get_session), +) -> CeleryTaskStatus: + cc_pair = get_connector_credential_pair_from_id( + cc_pair_id=cc_pair_id, + db_session=db_session, + user=user, + get_editable=False, ) - last_pruning_task = get_latest_task(pruning_task_name, db_session) - if not last_pruning_task: + if not cc_pair: + raise HTTPException( + status_code=400, + detail="Connection not found for current user's permissions", + ) + + # look up the last sync task for this connector (if it exists) + sync_task_name = name_sync_external_doc_permissions_task(cc_pair_id=cc_pair_id) + last_sync_task = get_latest_task(sync_task_name, db_session) + if not last_sync_task: raise HTTPException( status_code=HTTPStatus.NOT_FOUND, - detail="No pruning task found.", + detail="No sync task found.", ) - return CCPairPruningTask( - id=last_pruning_task.task_id, - name=last_pruning_task.task_name, - status=last_pruning_task.status, - start_time=last_pruning_task.start_time, - register_time=last_pruning_task.register_time, + return CeleryTaskStatus( + id=last_sync_task.task_id, + name=last_sync_task.task_name, + status=last_sync_task.status, + start_time=last_sync_task.start_time, + register_time=last_sync_task.register_time, ) -@router.post("/admin/cc-pair/{cc_pair_id}/prune") -def prune_cc_pair( +@router.post("/admin/cc-pair/{cc_pair_id}/sync") +def sync_cc_pair( cc_pair_id: int, user: User = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), ) -> StatusResponse[list[int]]: # avoiding circular refs - from danswer.background.celery.tasks.pruning.tasks import prune_documents_task + from ee.danswer.background.celery.apps.primary import ( + sync_external_doc_permissions_task, + ) cc_pair = get_connector_credential_pair_from_id( cc_pair_id=cc_pair_id, @@ -253,30 +348,27 @@ def prune_cc_pair( detail="Connection not found for current user's permissions", ) - pruning_task_name = name_cc_prune_task( - connector_id=cc_pair.connector_id, credential_id=cc_pair.credential_id - ) - last_pruning_task = get_latest_task(pruning_task_name, db_session) - if skip_cc_pair_pruning_by_task( - last_pruning_task, - db_session=db_session, + sync_task_name = name_sync_external_doc_permissions_task(cc_pair_id=cc_pair_id) + last_sync_task = get_latest_task(sync_task_name, db_session) + + if last_sync_task and check_task_is_live_and_not_timed_out( + last_sync_task, db_session ): raise HTTPException( status_code=HTTPStatus.CONFLICT, - detail="Pruning task already in progress.", + detail="Sync task already in progress.", ) - logger.info(f"Pruning the {cc_pair.connector.name} connector.") - prune_documents_task.apply_async( + logger.info(f"Syncing the {cc_pair.connector.name} connector.") + sync_external_doc_permissions_task.apply_async( kwargs=dict( - connector_id=cc_pair.connector.id, - credential_id=cc_pair.credential.id, - ) + cc_pair_id=cc_pair_id, tenant_id=CURRENT_TENANT_ID_CONTEXTVAR.get() + ), ) return StatusResponse( success=True, - message="Successfully created the pruning task.", + message="Successfully created the sync task.", ) diff --git a/backend/danswer/server/documents/connector.py b/backend/danswer/server/documents/connector.py index 58dcf7e7691..0e2c00d67bc 100644 --- a/backend/danswer/server/documents/connector.py +++ b/backend/danswer/server/documents/connector.py @@ -9,6 +9,7 @@ from fastapi import Request from fastapi import Response from fastapi import UploadFile +from google.oauth2.credentials import Credentials # type: ignore from pydantic import BaseModel from sqlalchemy.orm import Session @@ -16,37 +17,43 @@ from danswer.auth.users import current_curator_or_admin_user from danswer.auth.users import current_user from danswer.background.celery.celery_utils import get_deletion_attempt_snapshot +from danswer.background.celery.tasks.indexing.tasks import try_creating_indexing_task +from danswer.background.celery.versioned_apps.primary import app as primary_app from danswer.configs.app_configs import ENABLED_CONNECTOR_TYPES from danswer.configs.constants import DocumentSource from danswer.configs.constants import FileOrigin -from danswer.connectors.gmail.connector_auth import delete_gmail_service_account_key -from danswer.connectors.gmail.connector_auth import delete_google_app_gmail_cred -from danswer.connectors.gmail.connector_auth import get_gmail_auth_url -from danswer.connectors.gmail.connector_auth import get_gmail_service_account_key -from danswer.connectors.gmail.connector_auth import get_google_app_gmail_cred -from danswer.connectors.gmail.connector_auth import ( - update_gmail_credential_access_tokens, +from danswer.connectors.google_utils.google_auth import ( + get_google_oauth_creds, ) -from danswer.connectors.gmail.connector_auth import ( - upsert_gmail_service_account_key, +from danswer.connectors.google_utils.google_kv import ( + build_service_account_creds, ) -from danswer.connectors.gmail.connector_auth import upsert_google_app_gmail_cred -from danswer.connectors.google_drive.connector_auth import build_service_account_creds -from danswer.connectors.google_drive.connector_auth import delete_google_app_cred -from danswer.connectors.google_drive.connector_auth import delete_service_account_key -from danswer.connectors.google_drive.connector_auth import get_auth_url -from danswer.connectors.google_drive.connector_auth import get_google_app_cred -from danswer.connectors.google_drive.connector_auth import ( - get_google_drive_creds_for_authorized_user, +from danswer.connectors.google_utils.google_kv import ( + delete_google_app_cred, ) -from danswer.connectors.google_drive.connector_auth import get_service_account_key -from danswer.connectors.google_drive.connector_auth import ( +from danswer.connectors.google_utils.google_kv import ( + delete_service_account_key, +) +from danswer.connectors.google_utils.google_kv import get_auth_url +from danswer.connectors.google_utils.google_kv import ( + get_google_app_cred, +) +from danswer.connectors.google_utils.google_kv import ( + get_service_account_key, +) +from danswer.connectors.google_utils.google_kv import ( update_credential_access_tokens, ) -from danswer.connectors.google_drive.connector_auth import upsert_google_app_cred -from danswer.connectors.google_drive.connector_auth import upsert_service_account_key -from danswer.connectors.google_drive.connector_auth import verify_csrf -from danswer.connectors.google_drive.constants import DB_CREDENTIALS_DICT_TOKEN_KEY +from danswer.connectors.google_utils.google_kv import ( + upsert_google_app_cred, +) +from danswer.connectors.google_utils.google_kv import ( + upsert_service_account_key, +) +from danswer.connectors.google_utils.google_kv import verify_csrf +from danswer.connectors.google_utils.shared_constants import ( + DB_CREDENTIALS_DICT_TOKEN_KEY, +) from danswer.db.connector import create_connector from danswer.db.connector import delete_connector from danswer.db.connector import fetch_connector_by_id @@ -57,25 +64,31 @@ from danswer.db.connector_credential_pair import get_cc_pair_groups_for_ids from danswer.db.connector_credential_pair import get_connector_credential_pair from danswer.db.connector_credential_pair import get_connector_credential_pairs +from danswer.db.credentials import cleanup_gmail_credentials +from danswer.db.credentials import cleanup_google_drive_credentials from danswer.db.credentials import create_credential from danswer.db.credentials import delete_gmail_service_account_credentials from danswer.db.credentials import delete_google_drive_service_account_credentials from danswer.db.credentials import fetch_credential_by_id from danswer.db.deletion_attempt import check_deletion_attempt_is_allowed from danswer.db.document import get_document_counts_for_cc_pairs +from danswer.db.engine import get_current_tenant_id from danswer.db.engine import get_session from danswer.db.enums import AccessType -from danswer.db.index_attempt import create_index_attempt from danswer.db.index_attempt import get_index_attempts_for_cc_pair from danswer.db.index_attempt import get_latest_index_attempt_for_cc_pair_id from danswer.db.index_attempt import get_latest_index_attempts from danswer.db.index_attempt import get_latest_index_attempts_by_status from danswer.db.models import IndexingStatus +from danswer.db.models import SearchSettings from danswer.db.models import User from danswer.db.models import UserRole from danswer.db.search_settings import get_current_search_settings -from danswer.dynamic_configs.interface import ConfigNotFoundError +from danswer.db.search_settings import get_secondary_search_settings from danswer.file_store.file_store import get_default_file_store +from danswer.key_value_store.interface import KvKeyNotFoundError +from danswer.redis.redis_connector import RedisConnector +from danswer.redis.redis_pool import get_redis_client from danswer.server.documents.models import AuthStatus from danswer.server.documents.models import AuthUrl from danswer.server.documents.models import ConnectorCredentialPairIdentifier @@ -115,8 +128,8 @@ def check_google_app_gmail_credentials_exist( _: User = Depends(current_curator_or_admin_user), ) -> dict[str, str]: try: - return {"client_id": get_google_app_gmail_cred().web.client_id} - except ConfigNotFoundError: + return {"client_id": get_google_app_cred(DocumentSource.GMAIL).web.client_id} + except KvKeyNotFoundError: raise HTTPException(status_code=404, detail="Google App Credentials not found") @@ -125,7 +138,7 @@ def upsert_google_app_gmail_credentials( app_credentials: GoogleAppCredentials, _: User = Depends(current_admin_user) ) -> StatusResponse: try: - upsert_google_app_gmail_cred(app_credentials) + upsert_google_app_cred(app_credentials, DocumentSource.GMAIL) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) @@ -137,10 +150,12 @@ def upsert_google_app_gmail_credentials( @router.delete("/admin/connector/gmail/app-credential") def delete_google_app_gmail_credentials( _: User = Depends(current_admin_user), + db_session: Session = Depends(get_session), ) -> StatusResponse: try: - delete_google_app_gmail_cred() - except ConfigNotFoundError as e: + delete_google_app_cred(DocumentSource.GMAIL) + cleanup_gmail_credentials(db_session=db_session) + except KvKeyNotFoundError as e: raise HTTPException(status_code=400, detail=str(e)) return StatusResponse( @@ -153,8 +168,10 @@ def check_google_app_credentials_exist( _: User = Depends(current_curator_or_admin_user), ) -> dict[str, str]: try: - return {"client_id": get_google_app_cred().web.client_id} - except ConfigNotFoundError: + return { + "client_id": get_google_app_cred(DocumentSource.GOOGLE_DRIVE).web.client_id + } + except KvKeyNotFoundError: raise HTTPException(status_code=404, detail="Google App Credentials not found") @@ -163,7 +180,7 @@ def upsert_google_app_credentials( app_credentials: GoogleAppCredentials, _: User = Depends(current_admin_user) ) -> StatusResponse: try: - upsert_google_app_cred(app_credentials) + upsert_google_app_cred(app_credentials, DocumentSource.GOOGLE_DRIVE) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) @@ -175,10 +192,12 @@ def upsert_google_app_credentials( @router.delete("/admin/connector/google-drive/app-credential") def delete_google_app_credentials( _: User = Depends(current_admin_user), + db_session: Session = Depends(get_session), ) -> StatusResponse: try: - delete_google_app_cred() - except ConfigNotFoundError as e: + delete_google_app_cred(DocumentSource.GOOGLE_DRIVE) + cleanup_google_drive_credentials(db_session=db_session) + except KvKeyNotFoundError as e: raise HTTPException(status_code=400, detail=str(e)) return StatusResponse( @@ -191,8 +210,12 @@ def check_google_service_gmail_account_key_exist( _: User = Depends(current_curator_or_admin_user), ) -> dict[str, str]: try: - return {"service_account_email": get_gmail_service_account_key().client_email} - except ConfigNotFoundError: + return { + "service_account_email": get_service_account_key( + DocumentSource.GMAIL + ).client_email + } + except KvKeyNotFoundError: raise HTTPException( status_code=404, detail="Google Service Account Key not found" ) @@ -203,7 +226,7 @@ def upsert_google_service_gmail_account_key( service_account_key: GoogleServiceAccountKey, _: User = Depends(current_admin_user) ) -> StatusResponse: try: - upsert_gmail_service_account_key(service_account_key) + upsert_service_account_key(service_account_key, DocumentSource.GMAIL) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) @@ -215,10 +238,12 @@ def upsert_google_service_gmail_account_key( @router.delete("/admin/connector/gmail/service-account-key") def delete_google_service_gmail_account_key( _: User = Depends(current_admin_user), + db_session: Session = Depends(get_session), ) -> StatusResponse: try: - delete_gmail_service_account_key() - except ConfigNotFoundError as e: + delete_service_account_key(DocumentSource.GMAIL) + cleanup_gmail_credentials(db_session=db_session) + except KvKeyNotFoundError as e: raise HTTPException(status_code=400, detail=str(e)) return StatusResponse( @@ -231,8 +256,12 @@ def check_google_service_account_key_exist( _: User = Depends(current_curator_or_admin_user), ) -> dict[str, str]: try: - return {"service_account_email": get_service_account_key().client_email} - except ConfigNotFoundError: + return { + "service_account_email": get_service_account_key( + DocumentSource.GOOGLE_DRIVE + ).client_email + } + except KvKeyNotFoundError: raise HTTPException( status_code=404, detail="Google Service Account Key not found" ) @@ -243,7 +272,7 @@ def upsert_google_service_account_key( service_account_key: GoogleServiceAccountKey, _: User = Depends(current_admin_user) ) -> StatusResponse: try: - upsert_service_account_key(service_account_key) + upsert_service_account_key(service_account_key, DocumentSource.GOOGLE_DRIVE) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) @@ -255,10 +284,12 @@ def upsert_google_service_account_key( @router.delete("/admin/connector/google-drive/service-account-key") def delete_google_service_account_key( _: User = Depends(current_admin_user), + db_session: Session = Depends(get_session), ) -> StatusResponse: try: - delete_service_account_key() - except ConfigNotFoundError as e: + delete_service_account_key(DocumentSource.GOOGLE_DRIVE) + cleanup_google_drive_credentials(db_session=db_session) + except KvKeyNotFoundError as e: raise HTTPException(status_code=400, detail=str(e)) return StatusResponse( @@ -278,9 +309,9 @@ def upsert_service_account_credential( try: credential_base = build_service_account_creds( DocumentSource.GOOGLE_DRIVE, - delegated_user_email=service_account_credential_request.google_drive_delegated_user, + primary_admin_email=service_account_credential_request.google_drive_primary_admin, ) - except ConfigNotFoundError as e: + except KvKeyNotFoundError as e: raise HTTPException(status_code=400, detail=str(e)) # first delete all existing service account credentials @@ -304,9 +335,9 @@ def upsert_gmail_service_account_credential( try: credential_base = build_service_account_creds( DocumentSource.GMAIL, - delegated_user_email=service_account_credential_request.gmail_delegated_user, + primary_admin_email=service_account_credential_request.gmail_primary_admin, ) - except ConfigNotFoundError as e: + except KvKeyNotFoundError as e: raise HTTPException(status_code=400, detail=str(e)) # first delete all existing service account credentials @@ -331,28 +362,15 @@ def check_drive_tokens( ): return AuthStatus(authenticated=False) token_json_str = str(db_credentials.credential_json[DB_CREDENTIALS_DICT_TOKEN_KEY]) - google_drive_creds = get_google_drive_creds_for_authorized_user( - token_json_str=token_json_str + google_drive_creds = get_google_oauth_creds( + token_json_str=token_json_str, + source=DocumentSource.GOOGLE_DRIVE, ) if google_drive_creds is None: return AuthStatus(authenticated=False) return AuthStatus(authenticated=True) -@router.get("/admin/connector/google-drive/authorize/{credential_id}") -def admin_google_drive_auth( - response: Response, credential_id: str, _: User = Depends(current_admin_user) -) -> AuthUrl: - # set a cookie that we can read in the callback (used for `verify_csrf`) - response.set_cookie( - key=_GOOGLE_DRIVE_CREDENTIAL_ID_COOKIE_NAME, - value=credential_id, - httponly=True, - max_age=600, - ) - return AuthUrl(auth_url=get_auth_url(credential_id=int(credential_id))) - - @router.post("/admin/connector/file/upload") def upload_files( files: list[UploadFile], @@ -477,13 +495,14 @@ def get_connector_indexing_status( get_editable: bool = Query( False, description="If true, return editable document sets" ), + tenant_id: str | None = Depends(get_current_tenant_id), ) -> list[ConnectorIndexingStatus]: indexing_statuses: list[ConnectorIndexingStatus] = [] # NOTE: If the connector is deleting behind the scenes, # accessing cc_pairs can be inconsistent and members like # connector or credential may be None. - # Additional checks are done to make sure the connector and credential still exists. + # Additional checks are done to make sure the connector and credential still exist. # TODO: make this one query ... possibly eager load or wrap in a read transaction # to avoid the complexity of trying to error check throughout the function cc_pairs = get_connector_credential_pairs( @@ -531,6 +550,12 @@ def get_connector_indexing_status( relationship.user_group_id ) + search_settings: SearchSettings | None = None + if not secondary_index: + search_settings = get_current_search_settings(db_session) + else: + search_settings = get_secondary_search_settings(db_session) + for cc_pair in cc_pairs: # TODO remove this to enable ingestion API if cc_pair.name == "DefaultCCPair": @@ -542,6 +567,13 @@ def get_connector_indexing_status( # This may happen if background deletion is happening continue + in_progress = False + if search_settings: + redis_connector = RedisConnector(tenant_id, cc_pair.id) + redis_connector_index = redis_connector.new_index(search_settings.id) + if redis_connector_index.fenced: + in_progress = True + latest_index_attempt = cc_pair_to_latest_index_attempt.get( (connector.id, credential.id) ) @@ -587,6 +619,7 @@ def get_connector_indexing_status( connector_id=connector.id, credential_id=credential.id, db_session=db_session, + tenant_id=tenant_id, ), is_deletable=check_deletion_attempt_is_allowed( connector_credential_pair=cc_pair, @@ -595,6 +628,7 @@ def get_connector_indexing_status( allow_scheduled=True, ) is None, + in_progress=in_progress, ) ) @@ -663,15 +697,18 @@ def create_connector_with_mock_credential( connector_response = create_connector( db_session=db_session, connector_data=connector_data ) + mock_credential = CredentialBase( credential_json={}, admin_public=True, source=connector_data.source ) credential = create_credential( mock_credential, user=user, db_session=db_session ) + access_type = ( AccessType.PUBLIC if connector_data.is_public else AccessType.PRIVATE ) + response = add_credential_to_connector( db_session=db_session, user=user, @@ -750,7 +787,13 @@ def connector_run_once( run_info: RunConnectorRequest, _: User = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), + tenant_id: str = Depends(get_current_tenant_id), ) -> StatusResponse[list[int]]: + """Used to trigger indexing on a set of cc_pairs associated with a + single connector.""" + + r = get_redis_client(tenant_id=tenant_id) + connector_id = run_info.connector_id specified_credential_ids = run_info.credential_ids @@ -781,6 +824,7 @@ def connector_run_once( detail="Connector has no valid credentials, cannot create index attempts.", ) + # Prevents index attempts for cc pairs that already have an index attempt currently running skipped_credentials = [ credential_id for credential_id in credential_ids @@ -790,29 +834,38 @@ def connector_run_once( credential_id=credential_id, ), only_current=True, - disinclude_finished=True, db_session=db_session, + disinclude_finished=True, ) ] search_settings = get_current_search_settings(db_session) connector_credential_pairs = [ - get_connector_credential_pair(run_info.connector_id, credential_id, db_session) + get_connector_credential_pair(connector_id, credential_id, db_session) for credential_id in credential_ids if credential_id not in skipped_credentials ] - index_attempt_ids = [ - create_index_attempt( - connector_credential_pair_id=connector_credential_pair.id, - search_settings_id=search_settings.id, - from_beginning=run_info.from_beginning, - db_session=db_session, - ) - for connector_credential_pair in connector_credential_pairs - if connector_credential_pair is not None - ] + index_attempt_ids = [] + for cc_pair in connector_credential_pairs: + if cc_pair is not None: + attempt_id = try_creating_indexing_task( + primary_app, + cc_pair, + search_settings, + run_info.from_beginning, + db_session, + r, + tenant_id, + ) + if attempt_id: + logger.info( + f"try_creating_indexing_task succeeded: cc_pair={cc_pair.id} attempt_id={attempt_id}" + ) + index_attempt_ids.append(attempt_id) + else: + logger.info(f"try_creating_indexing_task failed: cc_pair={cc_pair.id}") if not index_attempt_ids: raise HTTPException( @@ -841,7 +894,7 @@ def gmail_auth( httponly=True, max_age=600, ) - return AuthUrl(auth_url=get_gmail_auth_url(int(credential_id))) + return AuthUrl(auth_url=get_auth_url(int(credential_id))) @router.get("/connector/google-drive/authorize/{credential_id}") @@ -873,8 +926,8 @@ def gmail_callback( credential_id = int(credential_id_cookie) verify_csrf(credential_id, callback.state) if ( - update_gmail_credential_access_tokens( - callback.code, credential_id, user, db_session + update_credential_access_tokens( + callback.code, credential_id, user, db_session, DocumentSource.GMAIL ) is None ): @@ -899,10 +952,11 @@ def google_drive_callback( ) credential_id = int(credential_id_cookie) verify_csrf(credential_id, callback.state) - if ( - update_credential_access_tokens(callback.code, credential_id, user, db_session) - is None - ): + + credentials: Credentials | None = update_credential_access_tokens( + callback.code, credential_id, user, db_session, DocumentSource.GOOGLE_DRIVE + ) + if credentials is None: raise HTTPException( status_code=500, detail="Unable to fetch Google Drive access tokens" ) diff --git a/backend/danswer/server/documents/credential.py b/backend/danswer/server/documents/credential.py index 3d965481bf5..42c72b8f34f 100644 --- a/backend/danswer/server/documents/credential.py +++ b/backend/danswer/server/documents/credential.py @@ -8,6 +8,8 @@ from danswer.auth.users import current_curator_or_admin_user from danswer.auth.users import current_user from danswer.db.credentials import alter_credential +from danswer.db.credentials import cleanup_gmail_credentials +from danswer.db.credentials import cleanup_google_drive_credentials from danswer.db.credentials import create_credential from danswer.db.credentials import CREDENTIAL_PERMISSIONS_TO_IGNORE from danswer.db.credentials import delete_credential @@ -79,18 +81,6 @@ def get_cc_source_full_info( ] -@router.get("/credential/{id}") -def list_credentials_by_id( - user: User | None = Depends(current_user), - db_session: Session = Depends(get_session), -) -> list[CredentialSnapshot]: - credentials = fetch_credentials(db_session=db_session, user=user) - return [ - CredentialSnapshot.from_credential_db_model(credential) - for credential in credentials - ] - - @router.delete("/admin/credential/{credential_id}") def delete_credential_by_id_admin( credential_id: int, @@ -138,6 +128,12 @@ def create_credential_from_model( object_is_public=credential_info.curator_public, ) + # Temporary fix for empty Google App credentials + if credential_info.source == DocumentSource.GMAIL: + cleanup_gmail_credentials(db_session=db_session) + if credential_info.source == DocumentSource.GOOGLE_DRIVE: + cleanup_google_drive_credentials(db_session=db_session) + credential = create_credential(credential_info, user, db_session) return ObjectCreationIdResponse( id=credential.id, diff --git a/backend/danswer/server/documents/models.py b/backend/danswer/server/documents/models.py index ee266eca8b8..d885641e398 100644 --- a/backend/danswer/server/documents/models.py +++ b/backend/danswer/server/documents/models.py @@ -222,6 +222,7 @@ class CCPairFullInfo(BaseModel): access_type: AccessType is_editable_for_current_user: bool deletion_failure_message: str | None + indexing: bool @classmethod def from_models( @@ -232,6 +233,7 @@ def from_models( last_index_attempt: IndexAttempt | None, num_docs_indexed: int, # not ideal, but this must be computed separately is_editable_for_current_user: bool, + indexing: bool, ) -> "CCPairFullInfo": # figure out if we need to artificially deflate the number of docs indexed. # This is required since the total number of docs indexed by a CC Pair is @@ -265,10 +267,11 @@ def from_models( access_type=cc_pair_model.access_type, is_editable_for_current_user=is_editable_for_current_user, deletion_failure_message=cc_pair_model.deletion_failure_message, + indexing=indexing, ) -class CCPairPruningTask(BaseModel): +class CeleryTaskStatus(BaseModel): id: str name: str status: TaskStatus @@ -307,6 +310,10 @@ class ConnectorIndexingStatus(BaseModel): deletion_attempt: DeletionAttemptSnapshot | None is_deletable: bool + # index attempt in db can be marked successful while celery/redis + # is stil running/cleaning up + in_progress: bool + class ConnectorCredentialPairIdentifier(BaseModel): connector_id: int @@ -370,16 +377,16 @@ class GoogleServiceAccountKey(BaseModel): class GoogleServiceAccountCredentialRequest(BaseModel): - google_drive_delegated_user: str | None = None # email of user to impersonate - gmail_delegated_user: str | None = None # email of user to impersonate + google_drive_primary_admin: str | None = None # email of user to impersonate + gmail_primary_admin: str | None = None # email of user to impersonate @model_validator(mode="after") def check_user_delegation(self) -> "GoogleServiceAccountCredentialRequest": - if (self.google_drive_delegated_user is None) == ( - self.gmail_delegated_user is None + if (self.google_drive_primary_admin is None) == ( + self.gmail_primary_admin is None ): raise ValueError( - "Exactly one of google_drive_delegated_user or gmail_delegated_user must be set" + "Exactly one of google_drive_primary_admin or gmail_primary_admin must be set" ) return self diff --git a/backend/danswer/server/features/folder/models.py b/backend/danswer/server/features/folder/models.py index d7b161414a3..3f7e1304cbc 100644 --- a/backend/danswer/server/features/folder/models.py +++ b/backend/danswer/server/features/folder/models.py @@ -1,3 +1,5 @@ +from uuid import UUID + from pydantic import BaseModel from danswer.server.query_and_chat.models import ChatSessionDetails @@ -23,7 +25,7 @@ class FolderUpdateRequest(BaseModel): class FolderChatSessionRequest(BaseModel): - chat_session_id: int + chat_session_id: UUID class DeleteFolderOptions(BaseModel): diff --git a/backend/danswer/server/features/notifications/api.py b/backend/danswer/server/features/notifications/api.py new file mode 100644 index 00000000000..a4f5415a6a1 --- /dev/null +++ b/backend/danswer/server/features/notifications/api.py @@ -0,0 +1,47 @@ +from fastapi import APIRouter +from fastapi import Depends +from fastapi import HTTPException +from sqlalchemy.orm import Session + +from danswer.auth.users import current_user +from danswer.db.engine import get_session +from danswer.db.models import User +from danswer.db.notification import dismiss_notification +from danswer.db.notification import get_notification_by_id +from danswer.db.notification import get_notifications +from danswer.server.settings.models import Notification as NotificationModel +from danswer.utils.logger import setup_logger + +logger = setup_logger() + +router = APIRouter(prefix="/notifications") + + +@router.get("") +def get_notifications_api( + user: User = Depends(current_user), + db_session: Session = Depends(get_session), +) -> list[NotificationModel]: + notifications = [ + NotificationModel.from_model(notif) + for notif in get_notifications(user, db_session, include_dismissed=False) + ] + return notifications + + +@router.post("/{notification_id}/dismiss") +def dismiss_notification_endpoint( + notification_id: int, + user: User | None = Depends(current_user), + db_session: Session = Depends(get_session), +) -> None: + try: + notification = get_notification_by_id(notification_id, user, db_session) + except PermissionError: + raise HTTPException( + status_code=403, detail="Not authorized to dismiss this notification" + ) + except ValueError: + raise HTTPException(status_code=404, detail="Notification not found") + + dismiss_notification(notification, db_session) diff --git a/backend/danswer/server/features/persona/api.py b/backend/danswer/server/features/persona/api.py index bcc4800b860..1e0e84a5896 100644 --- a/backend/danswer/server/features/persona/api.py +++ b/backend/danswer/server/features/persona/api.py @@ -13,8 +13,10 @@ from danswer.auth.users import current_curator_or_admin_user from danswer.auth.users import current_user from danswer.configs.constants import FileOrigin +from danswer.configs.constants import NotificationType from danswer.db.engine import get_session from danswer.db.models import User +from danswer.db.notification import create_notification from danswer.db.persona import create_update_persona from danswer.db.persona import get_persona_by_id from danswer.db.persona import get_personas @@ -28,12 +30,14 @@ from danswer.file_store.models import ChatFileType from danswer.llm.answering.prompts.utils import build_dummy_prompt from danswer.server.features.persona.models import CreatePersonaRequest +from danswer.server.features.persona.models import ImageGenerationToolStatus +from danswer.server.features.persona.models import PersonaSharedNotificationData from danswer.server.features.persona.models import PersonaSnapshot from danswer.server.features.persona.models import PromptTemplateResponse from danswer.server.models import DisplayPriorityRequest +from danswer.tools.utils import is_image_generation_available from danswer.utils.logger import setup_logger - logger = setup_logger() @@ -183,11 +187,12 @@ class PersonaShareRequest(BaseModel): user_ids: list[UUID] +# We notify each user when a user is shared with them @basic_router.patch("/{persona_id}/share") def share_persona( persona_id: int, persona_share_request: PersonaShareRequest, - user: User | None = Depends(current_user), + user: User = Depends(current_user), db_session: Session = Depends(get_session), ) -> None: update_persona_shared_users( @@ -197,6 +202,18 @@ def share_persona( db_session=db_session, ) + for user_id in persona_share_request.user_ids: + # Don't notify the user that they have access to their own persona + if user_id != user.id: + create_notification( + user_id=user_id, + notif_type=NotificationType.PERSONA_SHARED, + db_session=db_session, + additional_data=PersonaSharedNotificationData( + persona_id=persona_id, + ).model_dump(), + ) + @basic_router.delete("/{persona_id}") def delete_persona( @@ -211,23 +228,46 @@ def delete_persona( ) +@basic_router.get("/image-generation-tool") +def get_image_generation_tool( + _: User + | None = Depends(current_user), # User param not used but kept for consistency + db_session: Session = Depends(get_session), +) -> ImageGenerationToolStatus: # Use bool instead of str for boolean values + is_available = is_image_generation_available(db_session=db_session) + return ImageGenerationToolStatus(is_available=is_available) + + @basic_router.get("") def list_personas( user: User | None = Depends(current_user), db_session: Session = Depends(get_session), include_deleted: bool = False, + persona_ids: list[int] = Query(None), ) -> list[PersonaSnapshot]: - return [ - PersonaSnapshot.from_model(persona) - for persona in get_personas( - user=user, - include_deleted=include_deleted, - db_session=db_session, - get_editable=False, - joinedload_all=True, + personas = get_personas( + user=user, + include_deleted=include_deleted, + db_session=db_session, + get_editable=False, + joinedload_all=True, + ) + + if persona_ids: + personas = [p for p in personas if p.id in persona_ids] + + # Filter out personas with unavailable tools + personas = [ + p + for p in personas + if not ( + any(tool.in_code_tool_id == "ImageGenerationTool" for tool in p.tools) + and not is_image_generation_available(db_session=db_session) ) ] + return [PersonaSnapshot.from_model(p) for p in personas] + @basic_router.get("/{persona_id}") def get_persona( diff --git a/backend/danswer/server/features/persona/models.py b/backend/danswer/server/features/persona/models.py index 016defda369..5fa99952b1f 100644 --- a/backend/danswer/server/features/persona/models.py +++ b/backend/danswer/server/features/persona/models.py @@ -9,7 +9,7 @@ from danswer.search.enums import RecencyBiasSetting from danswer.server.features.document_set.models import DocumentSet from danswer.server.features.prompt.models import PromptSnapshot -from danswer.server.features.tool.api import ToolSnapshot +from danswer.server.features.tool.models import ToolSnapshot from danswer.server.models import MinimalUserSnapshot from danswer.utils.logger import setup_logger @@ -120,3 +120,11 @@ def from_model( class PromptTemplateResponse(BaseModel): final_prompt_template: str + + +class PersonaSharedNotificationData(BaseModel): + persona_id: int + + +class ImageGenerationToolStatus(BaseModel): + is_available: bool diff --git a/backend/danswer/server/features/tool/api.py b/backend/danswer/server/features/tool/api.py index 1d441593784..48f857780ba 100644 --- a/backend/danswer/server/features/tool/api.py +++ b/backend/danswer/server/features/tool/api.py @@ -18,9 +18,17 @@ from danswer.server.features.tool.models import CustomToolCreate from danswer.server.features.tool.models import CustomToolUpdate from danswer.server.features.tool.models import ToolSnapshot -from danswer.tools.custom.openapi_parsing import MethodSpec -from danswer.tools.custom.openapi_parsing import openapi_to_method_specs -from danswer.tools.custom.openapi_parsing import validate_openapi_schema +from danswer.tools.tool_implementations.custom.openapi_parsing import MethodSpec +from danswer.tools.tool_implementations.custom.openapi_parsing import ( + openapi_to_method_specs, +) +from danswer.tools.tool_implementations.custom.openapi_parsing import ( + validate_openapi_schema, +) +from danswer.tools.tool_implementations.images.image_generation_tool import ( + ImageGenerationTool, +) +from danswer.tools.utils import is_image_generation_available router = APIRouter(prefix="/tool") admin_router = APIRouter(prefix="/admin/tool") @@ -127,4 +135,9 @@ def list_tools( _: User | None = Depends(current_user), ) -> list[ToolSnapshot]: tools = get_tools(db_session) - return [ToolSnapshot.from_model(tool) for tool in tools] + return [ + ToolSnapshot.from_model(tool) + for tool in tools + if tool.in_code_tool_id != ImageGenerationTool.name + or is_image_generation_available(db_session=db_session) + ] diff --git a/backend/danswer/server/manage/administrative.py b/backend/danswer/server/manage/administrative.py index 1ebe5bd0691..1ceeb776abc 100644 --- a/backend/danswer/server/manage/administrative.py +++ b/backend/danswer/server/manage/administrative.py @@ -10,7 +10,7 @@ from danswer.auth.users import current_admin_user from danswer.auth.users import current_curator_or_admin_user -from danswer.background.celery.celery_app import celery_app +from danswer.background.celery.versioned_apps.primary import app as primary_app from danswer.configs.app_configs import GENERATIVE_MODEL_ACCESS_CHECK_FREQ from danswer.configs.constants import DanswerCeleryPriority from danswer.configs.constants import DocumentSource @@ -19,7 +19,7 @@ from danswer.db.connector_credential_pair import ( update_connector_credential_pair_from_id, ) -from danswer.db.deletion_attempt import check_deletion_attempt_is_allowed +from danswer.db.engine import get_current_tenant_id from danswer.db.engine import get_session from danswer.db.enums import ConnectorCredentialPairStatus from danswer.db.feedback import fetch_docs_ranked_by_boost @@ -29,9 +29,9 @@ from danswer.db.models import User from danswer.document_index.document_index_utils import get_both_index_names from danswer.document_index.factory import get_default_document_index -from danswer.dynamic_configs.factory import get_dynamic_config_store -from danswer.dynamic_configs.interface import ConfigNotFoundError from danswer.file_store.file_store import get_default_file_store +from danswer.key_value_store.factory import get_kv_store +from danswer.key_value_store.interface import KvKeyNotFoundError from danswer.llm.factory import get_default_llms from danswer.llm.utils import test_llm from danswer.server.documents.models import ConnectorCredentialPairIdentifier @@ -114,7 +114,7 @@ def validate_existing_genai_api_key( _: User = Depends(current_admin_user), ) -> None: # Only validate every so often - kv_store = get_dynamic_config_store() + kv_store = get_kv_store() curr_time = datetime.now(tz=timezone.utc) try: last_check = datetime.fromtimestamp( @@ -123,7 +123,7 @@ def validate_existing_genai_api_key( check_freq_sec = timedelta(seconds=GENERATIVE_MODEL_ACCESS_CHECK_FREQ) if curr_time - last_check < check_freq_sec: return - except ConfigNotFoundError: + except KvKeyNotFoundError: # First time checking the key, nothing unusual pass @@ -146,6 +146,7 @@ def create_deletion_attempt_for_connector_id( connector_credential_pair_identifier: ConnectorCredentialPairIdentifier, user: User = Depends(current_curator_or_admin_user), db_session: Session = Depends(get_session), + tenant_id: str = Depends(get_current_tenant_id), ) -> None: connector_id = connector_credential_pair_identifier.connector_id credential_id = connector_credential_pair_identifier.credential_id @@ -173,15 +174,19 @@ def create_deletion_attempt_for_connector_id( cc_pair_id=cc_pair.id, db_session=db_session, include_secondary_index=True ) + # TODO(rkuo): 2024-10-24 - check_deletion_attempt_is_allowed shouldn't be necessary + # any more due to background locking improvements. + # Remove the below permanently if everything is behaving for 30 days. + # Check if the deletion attempt should be allowed - deletion_attempt_disallowed_reason = check_deletion_attempt_is_allowed( - connector_credential_pair=cc_pair, db_session=db_session - ) - if deletion_attempt_disallowed_reason: - raise HTTPException( - status_code=400, - detail=deletion_attempt_disallowed_reason, - ) + # deletion_attempt_disallowed_reason = check_deletion_attempt_is_allowed( + # connector_credential_pair=cc_pair, db_session=db_session + # ) + # if deletion_attempt_disallowed_reason: + # raise HTTPException( + # status_code=400, + # detail=deletion_attempt_disallowed_reason, + # ) # mark as deleting update_connector_credential_pair_from_id( @@ -193,9 +198,10 @@ def create_deletion_attempt_for_connector_id( db_session.commit() # run the beat task to pick up this deletion from the db immediately - celery_app.send_task( + primary_app.send_task( "check_for_connector_deletion_task", priority=DanswerCeleryPriority.HIGH, + kwargs={"tenant_id": tenant_id}, ) if cc_pair.connector.source == DocumentSource.FILE: diff --git a/backend/danswer/server/manage/embedding/api.py b/backend/danswer/server/manage/embedding/api.py index eac872810ef..5d6e55e7a6d 100644 --- a/backend/danswer/server/manage/embedding/api.py +++ b/backend/danswer/server/manage/embedding/api.py @@ -43,6 +43,8 @@ def test_embedding_configuration( api_url=test_llm_request.api_url, provider_type=test_llm_request.provider_type, model_name=test_llm_request.model_name, + api_version=test_llm_request.api_version, + deployment_name=test_llm_request.deployment_name, normalize=False, query_prefix=None, passage_prefix=None, diff --git a/backend/danswer/server/manage/embedding/models.py b/backend/danswer/server/manage/embedding/models.py index d6210118df5..a7e7cc8e1ac 100644 --- a/backend/danswer/server/manage/embedding/models.py +++ b/backend/danswer/server/manage/embedding/models.py @@ -17,6 +17,8 @@ class TestEmbeddingRequest(BaseModel): api_key: str | None = None api_url: str | None = None model_name: str | None = None + api_version: str | None = None + deployment_name: str | None = None # This disables the "model_" protected namespace for pydantic model_config = {"protected_namespaces": ()} @@ -26,6 +28,8 @@ class CloudEmbeddingProvider(BaseModel): provider_type: EmbeddingProvider api_key: str | None = None api_url: str | None = None + api_version: str | None = None + deployment_name: str | None = None @classmethod def from_request( @@ -35,6 +39,8 @@ def from_request( provider_type=cloud_provider_model.provider_type, api_key=cloud_provider_model.api_key, api_url=cloud_provider_model.api_url, + api_version=cloud_provider_model.api_version, + deployment_name=cloud_provider_model.deployment_name, ) @@ -42,3 +48,5 @@ class CloudEmbeddingProviderCreationRequest(BaseModel): provider_type: EmbeddingProvider api_key: str | None = None api_url: str | None = None + api_version: str | None = None + deployment_name: str | None = None diff --git a/backend/danswer/server/manage/llm/api.py b/backend/danswer/server/manage/llm/api.py index 23f16047e91..9cac96236b0 100644 --- a/backend/danswer/server/manage/llm/api.py +++ b/backend/danswer/server/manage/llm/api.py @@ -54,7 +54,9 @@ def test_llm_configuration( api_base=test_llm_request.api_base, api_version=test_llm_request.api_version, custom_config=test_llm_request.custom_config, + deployment_name=test_llm_request.deployment_name, ) + functions_with_args: list[tuple[Callable, tuple]] = [(test_llm, (llm,))] if ( @@ -69,6 +71,7 @@ def test_llm_configuration( api_base=test_llm_request.api_base, api_version=test_llm_request.api_version, custom_config=test_llm_request.custom_config, + deployment_name=test_llm_request.deployment_name, ) functions_with_args.append((test_llm, (fast_llm,))) @@ -141,6 +144,20 @@ def put_llm_provider( detail=f"LLM Provider with name {llm_provider.name} already exists", ) + # Ensure default_model_name and fast_default_model_name are in display_model_names + # This is necessary for custom models and Bedrock/Azure models + if llm_provider.display_model_names is None: + llm_provider.display_model_names = [] + + if llm_provider.default_model_name not in llm_provider.display_model_names: + llm_provider.display_model_names.append(llm_provider.default_model_name) + + if ( + llm_provider.fast_default_model_name + and llm_provider.fast_default_model_name not in llm_provider.display_model_names + ): + llm_provider.display_model_names.append(llm_provider.fast_default_model_name) + try: return upsert_llm_provider( llm_provider=llm_provider, diff --git a/backend/danswer/server/manage/llm/models.py b/backend/danswer/server/manage/llm/models.py index 3ef66971003..9b371099c57 100644 --- a/backend/danswer/server/manage/llm/models.py +++ b/backend/danswer/server/manage/llm/models.py @@ -21,6 +21,7 @@ class TestLLMRequest(BaseModel): # model level default_model_name: str fast_default_model_name: str | None = None + deployment_name: str | None = None class LLMProviderDescriptor(BaseModel): @@ -66,6 +67,7 @@ class LLMProvider(BaseModel): is_public: bool = True groups: list[int] = Field(default_factory=list) display_model_names: list[str] | None = None + deployment_name: str | None = None class LLMProviderUpsertRequest(LLMProvider): @@ -100,4 +102,5 @@ def from_model(cls, llm_provider_model: "LLMProviderModel") -> "FullLLMProvider" ), is_public=llm_provider_model.is_public, groups=[group.id for group in llm_provider_model.groups], + deployment_name=llm_provider_model.deployment_name, ) diff --git a/backend/danswer/server/manage/models.py b/backend/danswer/server/manage/models.py index e3be4c4891d..e24b96e9a1e 100644 --- a/backend/danswer/server/manage/models.py +++ b/backend/danswer/server/manage/models.py @@ -57,6 +57,8 @@ class UserInfo(BaseModel): oidc_expiry: datetime | None = None current_token_created_at: datetime | None = None current_token_expiry_length: int | None = None + is_cloud_superuser: bool = False + organization_name: str | None = None @classmethod def from_model( @@ -64,6 +66,8 @@ def from_model( user: User, current_token_created_at: datetime | None = None, expiry_length: int | None = None, + is_cloud_superuser: bool = False, + organization_name: str | None = None, ) -> "UserInfo": return cls( id=str(user.id), @@ -80,6 +84,7 @@ def from_model( visible_assistants=user.visible_assistants, ) ), + organization_name=organization_name, # set to None if TRACK_EXTERNAL_IDP_EXPIRY is False so that we avoid cases # where they previously had this set + used OIDC, and now they switched to # basic auth are now constantly getting redirected back to the login page @@ -87,6 +92,7 @@ def from_model( oidc_expiry=user.oidc_expiry if TRACK_EXTERNAL_IDP_EXPIRY else None, current_token_created_at=current_token_created_at, current_token_expiry_length=expiry_length, + is_cloud_superuser=is_cloud_superuser, ) diff --git a/backend/danswer/server/manage/search_settings.py b/backend/danswer/server/manage/search_settings.py index c8433467f6c..79f690e5db6 100644 --- a/backend/danswer/server/manage/search_settings.py +++ b/backend/danswer/server/manage/search_settings.py @@ -21,6 +21,9 @@ from danswer.db.search_settings import update_current_search_settings from danswer.db.search_settings import update_search_settings_status from danswer.document_index.factory import get_default_document_index +from danswer.file_processing.unstructured import delete_unstructured_api_key +from danswer.file_processing.unstructured import get_unstructured_api_key +from danswer.file_processing.unstructured import update_unstructured_api_key from danswer.natural_language_processing.search_nlp_models import clean_model_name from danswer.search.models import SavedSearchSettings from danswer.search.models import SearchSettingsCreationRequest @@ -30,7 +33,6 @@ from danswer.utils.logger import setup_logger from shared_configs.configs import ALT_INDEX_SUFFIX - router = APIRouter(prefix="/search-settings") logger = setup_logger() @@ -113,6 +115,7 @@ def set_new_search_settings( for cc_pair in get_connector_credential_pairs(db_session): resync_cc_pair(cc_pair, db_session=db_session) + db_session.commit() return IdReturn(id=new_search_settings.id) @@ -196,3 +199,27 @@ def update_saved_search_settings( update_current_search_settings( search_settings=search_settings, db_session=db_session ) + + +@router.get("/unstructured-api-key-set") +def unstructured_api_key_set( + _: User | None = Depends(current_admin_user), +) -> bool: + api_key = get_unstructured_api_key() + print(api_key) + return api_key is not None + + +@router.put("/upsert-unstructured-api-key") +def upsert_unstructured_api_key( + unstructured_api_key: str, + _: User | None = Depends(current_admin_user), +) -> None: + update_unstructured_api_key(unstructured_api_key) + + +@router.delete("/delete-unstructured-api-key") +def delete_unstructured_api_key_endpoint( + _: User | None = Depends(current_admin_user), +) -> None: + delete_unstructured_api_key() diff --git a/backend/danswer/server/manage/slack_bot.py b/backend/danswer/server/manage/slack_bot.py index 9a06b225cce..abee8b8644e 100644 --- a/backend/danswer/server/manage/slack_bot.py +++ b/backend/danswer/server/manage/slack_bot.py @@ -18,7 +18,7 @@ from danswer.db.slack_bot_config import insert_slack_bot_config from danswer.db.slack_bot_config import remove_slack_bot_config from danswer.db.slack_bot_config import update_slack_bot_config -from danswer.dynamic_configs.interface import ConfigNotFoundError +from danswer.key_value_store.interface import KvKeyNotFoundError from danswer.server.manage.models import SlackBotConfig from danswer.server.manage.models import SlackBotConfigCreationRequest from danswer.server.manage.models import SlackBotTokens @@ -212,5 +212,5 @@ def put_tokens( def get_tokens(_: User | None = Depends(current_admin_user)) -> SlackBotTokens: try: return fetch_tokens() - except ConfigNotFoundError: + except KvKeyNotFoundError: raise HTTPException(status_code=404, detail="No tokens found") diff --git a/backend/danswer/server/manage/users.py b/backend/danswer/server/manage/users.py index e72b85dedad..59c4de89a71 100644 --- a/backend/danswer/server/manage/users.py +++ b/backend/danswer/server/manage/users.py @@ -2,17 +2,23 @@ from datetime import datetime from datetime import timezone +import jwt +from email_validator import EmailNotValidError +from email_validator import EmailUndeliverableError from email_validator import validate_email from fastapi import APIRouter from fastapi import Body from fastapi import Depends from fastapi import HTTPException +from fastapi import Request from fastapi import status +from psycopg2.errors import UniqueViolation from pydantic import BaseModel from sqlalchemy import Column from sqlalchemy import desc from sqlalchemy import select from sqlalchemy import update +from sqlalchemy.exc import IntegrityError from sqlalchemy.orm import Session from danswer.auth.invited_users import get_invited_users @@ -24,11 +30,16 @@ from danswer.auth.users import current_admin_user from danswer.auth.users import current_curator_or_admin_user from danswer.auth.users import current_user +from danswer.auth.users import get_tenant_id_for_email from danswer.auth.users import optional_user from danswer.configs.app_configs import AUTH_TYPE +from danswer.configs.app_configs import ENABLE_EMAIL_INVITES from danswer.configs.app_configs import SESSION_EXPIRE_TIME_SECONDS +from danswer.configs.app_configs import SUPER_USERS from danswer.configs.app_configs import VALID_EMAIL_DOMAINS from danswer.configs.constants import AuthType +from danswer.db.auth import get_total_users_count +from danswer.db.engine import CURRENT_TENANT_ID_CONTEXTVAR from danswer.db.engine import get_session from danswer.db.models import AccessToken from danswer.db.models import DocumentSet__User @@ -38,7 +49,7 @@ from danswer.db.models import User__UserGroup from danswer.db.users import get_user_by_email from danswer.db.users import list_users -from danswer.dynamic_configs.factory import get_dynamic_config_store +from danswer.key_value_store.factory import get_kv_store from danswer.server.manage.models import AllUsersResponse from danswer.server.manage.models import UserByEmail from danswer.server.manage.models import UserInfo @@ -48,10 +59,15 @@ from danswer.server.models import FullUserSnapshot from danswer.server.models import InvitedUserSnapshot from danswer.server.models import MinimalUserSnapshot +from danswer.server.utils import send_user_email_invite from danswer.utils.logger import setup_logger from ee.danswer.db.api_key import is_api_key_email_address from ee.danswer.db.external_perm import delete_user__ext_group_for_user__no_commit from ee.danswer.db.user_group import remove_curator_status__no_commit +from ee.danswer.server.tenants.billing import register_tenant_users +from ee.danswer.server.tenants.provisioning import add_users_to_tenant +from ee.danswer.server.tenants.provisioning import remove_users_from_tenant +from shared_configs.configs import MULTI_TENANT logger = setup_logger() @@ -164,30 +180,99 @@ def list_all_users( def bulk_invite_users( emails: list[str] = Body(..., embed=True), current_user: User | None = Depends(current_admin_user), + db_session: Session = Depends(get_session), ) -> int: """emails are string validated. If any email fails validation, no emails are invited and an exception is raised.""" + if current_user is None: raise HTTPException( status_code=400, detail="Auth is disabled, cannot invite users" ) + tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get() normalized_emails = [] - for email in emails: - email_info = validate_email(email) # can raise EmailNotValidError - normalized_emails.append(email_info.normalized) # type: ignore - all_emails = list(set(normalized_emails) | set(get_invited_users())) - return write_invited_users(all_emails) + try: + for email in emails: + email_info = validate_email(email) + normalized_emails.append(email_info.normalized) # type: ignore + + except (EmailUndeliverableError, EmailNotValidError) as e: + raise HTTPException( + status_code=400, + detail=f"Invalid email address: {email} - {str(e)}", + ) + + if MULTI_TENANT: + try: + add_users_to_tenant(normalized_emails, tenant_id) + + except IntegrityError as e: + if isinstance(e.orig, UniqueViolation): + raise HTTPException( + status_code=400, + detail="User has already been invited to a Danswer organization", + ) + raise + except Exception as e: + logger.error(f"Failed to add users to tenant {tenant_id}: {str(e)}") + + initial_invited_users = get_invited_users() + + all_emails = list(set(normalized_emails) | set(initial_invited_users)) + number_of_invited_users = write_invited_users(all_emails) + + if not MULTI_TENANT: + return number_of_invited_users + try: + logger.info("Registering tenant users") + register_tenant_users( + CURRENT_TENANT_ID_CONTEXTVAR.get(), get_total_users_count(db_session) + ) + if ENABLE_EMAIL_INVITES: + try: + for email in all_emails: + send_user_email_invite(email, current_user) + except Exception as e: + logger.error(f"Error sending email invite to invited users: {e}") + + return number_of_invited_users + except Exception as e: + logger.error(f"Failed to register tenant users: {str(e)}") + logger.info( + "Reverting changes: removing users from tenant and resetting invited users" + ) + write_invited_users(initial_invited_users) # Reset to original state + remove_users_from_tenant(normalized_emails, tenant_id) + raise e @router.patch("/manage/admin/remove-invited-user") def remove_invited_user( user_email: UserByEmail, _: User | None = Depends(current_admin_user), + db_session: Session = Depends(get_session), ) -> int: user_emails = get_invited_users() remaining_users = [user for user in user_emails if user != user_email.user_email] - return write_invited_users(remaining_users) + + tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get() + remove_users_from_tenant([user_email.user_email], tenant_id) + number_of_invited_users = write_invited_users(remaining_users) + + try: + if MULTI_TENANT: + register_tenant_users( + CURRENT_TENANT_ID_CONTEXTVAR.get(), get_total_users_count(db_session) + ) + except Exception: + logger.error( + "Request to update number of seats taken in control plane failed. " + "This may cause synchronization issues/out of date enforcement of seat limits." + ) + raise + + return number_of_invited_users @router.patch("/manage/admin/deactivate-user") @@ -330,6 +415,35 @@ async def get_user_role(user: User = Depends(current_user)) -> UserRoleResponse: return UserRoleResponse(role=user.role) +def get_current_token_expiration_jwt( + user: User | None, request: Request +) -> datetime | None: + if user is None: + return None + + try: + # Get the JWT from the cookie + jwt_token = request.cookies.get("fastapiusersauth") + if not jwt_token: + logger.error("No JWT token found in cookies") + return None + + # Decode the JWT + decoded_token = jwt.decode(jwt_token, options={"verify_signature": False}) + + # Get the 'exp' (expiration) claim from the token + exp = decoded_token.get("exp") + if exp: + return datetime.fromtimestamp(exp) + else: + logger.error("No 'exp' claim found in JWT") + return None + + except Exception as e: + logger.error(f"Error decoding JWT: {e}") + return None + + def get_current_token_creation( user: User | None, db_session: Session ) -> datetime | None: @@ -363,11 +477,12 @@ def verify_user_logged_in( # NOTE: this does not use `current_user` / `current_admin_user` because we don't want # to enforce user verification here - the frontend always wants to get the info about # the current user regardless of if they are currently verified + if user is None: # if auth type is disabled, return a dummy user with preferences from # the key-value store if AUTH_TYPE == AuthType.DISABLED: - store = get_dynamic_config_store() + store = get_kv_store() return fetch_no_auth_user(store) raise HTTPException( @@ -380,11 +495,17 @@ def verify_user_logged_in( detail="Access denied. User's OIDC token has expired.", ) - token_created_at = get_current_token_creation(user, db_session) + token_created_at = ( + None if MULTI_TENANT else get_current_token_creation(user, db_session) + ) + organization_name = get_tenant_id_for_email(user.email) + user_info = UserInfo.from_model( user, current_token_created_at=token_created_at, expiry_length=SESSION_EXPIRE_TIME_SECONDS, + is_cloud_superuser=user.email in SUPER_USERS, + organization_name=organization_name, ) return user_info @@ -405,7 +526,7 @@ def update_user_default_model( ) -> None: if user is None: if AUTH_TYPE == AuthType.DISABLED: - store = get_dynamic_config_store() + store = get_kv_store() no_auth_user = fetch_no_auth_user(store) no_auth_user.preferences.default_model = request.default_model set_no_auth_user_preferences(store, no_auth_user.preferences) @@ -433,7 +554,7 @@ def update_user_assistant_list( ) -> None: if user is None: if AUTH_TYPE == AuthType.DISABLED: - store = get_dynamic_config_store() + store = get_kv_store() no_auth_user = fetch_no_auth_user(store) no_auth_user.preferences.chosen_assistants = request.chosen_assistants @@ -487,7 +608,7 @@ def update_user_assistant_visibility( ) -> None: if user is None: if AUTH_TYPE == AuthType.DISABLED: - store = get_dynamic_config_store() + store = get_kv_store() no_auth_user = fetch_no_auth_user(store) preferences = no_auth_user.preferences updated_preferences = update_assistant_list(preferences, assistant_id, show) diff --git a/backend/danswer/server/query_and_chat/chat_backend.py b/backend/danswer/server/query_and_chat/chat_backend.py index c7f5983417d..c1f4a7b3970 100644 --- a/backend/danswer/server/query_and_chat/chat_backend.py +++ b/backend/danswer/server/query_and_chat/chat_backend.py @@ -1,8 +1,11 @@ import asyncio import io +import json import uuid from collections.abc import Callable from collections.abc import Generator +from typing import Tuple +from uuid import UUID from fastapi import APIRouter from fastapi import Depends @@ -11,15 +14,18 @@ from fastapi import Response from fastapi import UploadFile from fastapi.responses import StreamingResponse +from PIL import Image from pydantic import BaseModel from sqlalchemy.orm import Session from danswer.auth.users import current_user from danswer.chat.chat_utils import create_chat_chain +from danswer.chat.chat_utils import extract_headers from danswer.chat.process_message import stream_chat_message from danswer.configs.app_configs import WEB_DOMAIN from danswer.configs.constants import FileOrigin from danswer.configs.constants import MessageType +from danswer.configs.model_configs import LITELLM_PASS_THROUGH_HEADERS from danswer.db.chat import create_chat_session from danswer.db.chat import create_new_chat_message from danswer.db.chat import delete_chat_session @@ -48,7 +54,6 @@ from danswer.llm.exceptions import GenAIDisabledException from danswer.llm.factory import get_default_llms from danswer.llm.factory import get_llms_for_persona -from danswer.llm.headers import get_litellm_additional_request_headers from danswer.natural_language_processing.utils import get_tokenizer from danswer.secondary_llm_flows.chat_session_naming import ( get_renamed_conversation_name, @@ -69,8 +74,10 @@ from danswer.server.query_and_chat.models import SearchFeedbackRequest from danswer.server.query_and_chat.models import UpdateChatSessionThreadRequest from danswer.server.query_and_chat.token_limit import check_token_rate_limits +from danswer.utils.headers import get_custom_tool_additional_request_headers from danswer.utils.logger import setup_logger + logger = setup_logger() router = APIRouter(prefix="/chat") @@ -126,7 +133,7 @@ def update_chat_session_model( @router.get("/get-chat-session/{session_id}") def get_chat_session( - session_id: int, + session_id: UUID, is_shared: bool = False, user: User | None = Depends(current_user), db_session: Session = Depends(get_session), @@ -226,7 +233,9 @@ def rename_chat_session( try: llm, _ = get_default_llms( - additional_headers=get_litellm_additional_request_headers(request.headers) + additional_headers=extract_headers( + request.headers, LITELLM_PASS_THROUGH_HEADERS + ) ) except GenAIDisabledException: # This may be longer than what the LLM tends to produce but is the most @@ -247,7 +256,7 @@ def rename_chat_session( @router.patch("/chat-session/{session_id}") def patch_chat_session( - session_id: int, + session_id: UUID, chat_session_update_req: ChatSessionUpdateRequest, user: User | None = Depends(current_user), db_session: Session = Depends(get_session), @@ -264,7 +273,7 @@ def patch_chat_session( @router.delete("/delete-chat-session/{session_id}") def delete_chat_session_by_id( - session_id: int, + session_id: UUID, user: User | None = Depends(current_user), db_session: Session = Depends(get_session), ) -> None: @@ -275,13 +284,14 @@ def delete_chat_session_by_id( raise HTTPException(status_code=400, detail=str(e)) -async def is_disconnected(request: Request) -> Callable[[], bool]: +async def is_connected(request: Request) -> Callable[[], bool]: main_loop = asyncio.get_event_loop() - def is_disconnected_sync() -> bool: + def is_connected_sync() -> bool: future = asyncio.run_coroutine_threadsafe(request.is_disconnected(), main_loop) try: - return not future.result(timeout=0.01) + is_connected = not future.result(timeout=0.01) + return is_connected except asyncio.TimeoutError: logger.error("Asyncio timed out") return True @@ -292,7 +302,7 @@ def is_disconnected_sync() -> bool: ) return True - return is_disconnected_sync + return is_connected_sync @router.post("/send-message") @@ -301,15 +311,28 @@ def handle_new_chat_message( request: Request, user: User | None = Depends(current_user), _: None = Depends(check_token_rate_limits), - is_disconnected_func: Callable[[], bool] = Depends(is_disconnected), + is_connected_func: Callable[[], bool] = Depends(is_connected), ) -> StreamingResponse: - """This endpoint is both used for all the following purposes: + """ + This endpoint is both used for all the following purposes: - Sending a new message in the session - Regenerating a message in the session (just send the same one again) - Editing a message (similar to regenerating but sending a different message) - Kicking off a seeded chat session (set `use_existing_user_message`) - To avoid extra overhead/latency, this assumes (and checks) that previous messages on the path - have already been set as latest""" + + Assumes that previous messages have been set as the latest to minimize overhead. + + Args: + chat_message_req (CreateChatMessageRequest): Details about the new chat message. + request (Request): The current HTTP request context. + user (User | None): The current user, obtained via dependency injection. + _ (None): Rate limit check is run if user/group/global rate limits are enabled. + is_connected_func (Callable[[], bool]): Function to check client disconnection, + used to stop the streaming response if the client disconnects. + + Returns: + StreamingResponse: Streams the response to the new chat message. + """ logger.debug(f"Received new chat message: {chat_message_req.message}") if ( @@ -319,18 +342,19 @@ def handle_new_chat_message( ): raise HTTPException(status_code=400, detail="Empty chat message is invalid") - import json - def stream_generator() -> Generator[str, None, None]: try: for packet in stream_chat_message( new_msg_req=chat_message_req, user=user, use_existing_user_message=chat_message_req.use_existing_user_message, - litellm_additional_headers=get_litellm_additional_request_headers( + litellm_additional_headers=extract_headers( + request.headers, LITELLM_PASS_THROUGH_HEADERS + ), + custom_tool_additional_headers=get_custom_tool_additional_request_headers( request.headers ), - is_connected=is_disconnected_func, + is_connected=is_connected_func, ): yield json.dumps(packet) if isinstance(packet, dict) else packet @@ -338,6 +362,9 @@ def stream_generator() -> Generator[str, None, None]: logger.exception(f"Error in chat message streaming: {e}") yield json.dumps({"error": str(e)}) + finally: + logger.debug("Stream generator finished") + return StreamingResponse(stream_generator(), media_type="text/event-stream") @@ -508,6 +535,21 @@ def seed_chat( """File upload""" +def convert_to_jpeg(file: UploadFile) -> Tuple[io.BytesIO, str]: + try: + with Image.open(file.file) as img: + if img.mode != "RGB": + img = img.convert("RGB") + jpeg_io = io.BytesIO() + img.save(jpeg_io, format="JPEG", quality=85) + jpeg_io.seek(0) + return jpeg_io, "image/jpeg" + except Exception as e: + raise HTTPException( + status_code=400, detail=f"Failed to convert image: {str(e)}" + ) + + @router.post("/file") def upload_files_for_chat( files: list[UploadFile], @@ -515,9 +557,9 @@ def upload_files_for_chat( _: User | None = Depends(current_user), ) -> dict[str, list[FileDescriptor]]: image_content_types = {"image/jpeg", "image/png", "image/webp"} + csv_content_types = {"text/csv"} text_content_types = { "text/plain", - "text/csv", "text/markdown", "text/x-markdown", "text/x-config", @@ -536,8 +578,10 @@ def upload_files_for_chat( "application/epub+zip", } - allowed_content_types = image_content_types.union(text_content_types).union( - document_content_types + allowed_content_types = ( + image_content_types.union(text_content_types) + .union(document_content_types) + .union(csv_content_types) ) for file in files: @@ -547,6 +591,10 @@ def upload_files_for_chat( elif file.content_type in text_content_types: error_detail = "Unsupported text file type. Supported text types include .txt, .csv, .md, .mdx, .conf, " ".log, .tsv." + elif file.content_type in csv_content_types: + error_detail = ( + "Unsupported CSV file type. Supported CSV types include .csv." + ) else: error_detail = ( "Unsupported document file type. Supported document types include .pdf, .docx, .pptx, .xlsx, " @@ -570,25 +618,38 @@ def upload_files_for_chat( for file in files: if file.content_type in image_content_types: file_type = ChatFileType.IMAGE + # Convert image to JPEG + file_content, new_content_type = convert_to_jpeg(file) + elif file.content_type in csv_content_types: + file_type = ChatFileType.CSV + file_content = io.BytesIO(file.file.read()) + new_content_type = file.content_type or "" elif file.content_type in document_content_types: file_type = ChatFileType.DOC + file_content = io.BytesIO(file.file.read()) + new_content_type = file.content_type or "" else: file_type = ChatFileType.PLAIN_TEXT + file_content = io.BytesIO(file.file.read()) + new_content_type = file.content_type or "" - # store the raw file + # store the file (now JPEG for images) file_id = str(uuid.uuid4()) file_store.save_file( file_name=file_id, - content=file.file, + content=file_content, display_name=file.filename, file_origin=FileOrigin.CHAT_UPLOAD, - file_type=file.content_type or file_type.value, + file_type=new_content_type or file_type.value, ) # if the file is a doc, extract text and store that so we don't need # to re-extract it every time we send a message if file_type == ChatFileType.DOC: - extracted_text = extract_file_text(file_name=file.filename, file=file.file) + extracted_text = extract_file_text( + file=file.file, + file_name=file.filename or "", + ) text_file_id = str(uuid.uuid4()) file_store.save_file( file_name=text_file_id, diff --git a/backend/danswer/server/query_and_chat/models.py b/backend/danswer/server/query_and_chat/models.py index c9109b141c3..13b3b1ec0a8 100644 --- a/backend/danswer/server/query_and_chat/models.py +++ b/backend/danswer/server/query_and_chat/models.py @@ -1,5 +1,6 @@ from datetime import datetime from typing import Any +from uuid import UUID from pydantic import BaseModel from pydantic import model_validator @@ -34,7 +35,7 @@ class SimpleQueryRequest(BaseModel): class UpdateChatSessionThreadRequest(BaseModel): # If not specified, use Danswer default persona - chat_session_id: int + chat_session_id: UUID new_alternate_model: str @@ -45,7 +46,7 @@ class ChatSessionCreationRequest(BaseModel): class CreateChatSessionID(BaseModel): - chat_session_id: int + chat_session_id: UUID class ChatFeedbackRequest(BaseModel): @@ -75,7 +76,7 @@ def check_is_positive_or_feedback_text(self) -> "ChatFeedbackRequest": class CreateChatMessageRequest(ChunkContext): """Before creating messages, be sure to create a chat_session and get an id""" - chat_session_id: int + chat_session_id: UUID # This is the primary-key (unique identifier) for the previous message of the tree parent_message_id: int | None # New message contents @@ -107,6 +108,10 @@ class CreateChatMessageRequest(ChunkContext): # used for seeded chats to kick off the generation of an AI answer use_existing_user_message: bool = False + # forces the LLM to return a structured response, see + # https://platform.openai.com/docs/guides/structured-outputs/introduction + structured_response_format: dict | None = None + @model_validator(mode="after") def check_search_doc_ids_or_retrieval_options(self) -> "CreateChatMessageRequest": if self.search_doc_ids is None and self.retrieval_options is None: @@ -115,13 +120,18 @@ def check_search_doc_ids_or_retrieval_options(self) -> "CreateChatMessageRequest ) return self + def model_dump(self, *args: Any, **kwargs: Any) -> dict[str, Any]: + data = super().model_dump(*args, **kwargs) + data["chat_session_id"] = str(data["chat_session_id"]) + return data + class ChatMessageIdentifier(BaseModel): message_id: int class ChatRenameRequest(BaseModel): - chat_session_id: int + chat_session_id: UUID name: str | None = None @@ -134,7 +144,7 @@ class RenameChatSessionResponse(BaseModel): class ChatSessionDetails(BaseModel): - id: int + id: UUID name: str persona_id: int | None = None time_created: str @@ -175,10 +185,10 @@ class ChatMessageDetail(BaseModel): overridden_model: str | None alternate_assistant_id: int | None = None # Dict mapping citation number to db_doc_id - chat_session_id: int | None = None + chat_session_id: UUID | None = None citations: dict[int, int] | None = None files: list[FileDescriptor] - tool_calls: list[ToolCallFinalResult] + tool_call: ToolCallFinalResult | None def model_dump(self, *args: list, **kwargs: dict[str, Any]) -> dict[str, Any]: # type: ignore initial_dict = super().model_dump(mode="json", *args, **kwargs) # type: ignore @@ -187,14 +197,14 @@ def model_dump(self, *args: list, **kwargs: dict[str, Any]) -> dict[str, Any]: class SearchSessionDetailResponse(BaseModel): - search_session_id: int + search_session_id: UUID description: str documents: list[SearchDoc] messages: list[ChatMessageDetail] class ChatSessionDetailResponse(BaseModel): - chat_session_id: int + chat_session_id: UUID description: str persona_id: int | None = None persona_name: str | None diff --git a/backend/danswer/server/query_and_chat/query_backend.py b/backend/danswer/server/query_and_chat/query_backend.py index 96f674276f4..1b8d5dc4b5e 100644 --- a/backend/danswer/server/query_and_chat/query_backend.py +++ b/backend/danswer/server/query_and_chat/query_backend.py @@ -1,3 +1,7 @@ +import json +from collections.abc import Generator +from uuid import UUID + from fastapi import APIRouter from fastapi import Depends from fastapi import HTTPException @@ -186,7 +190,7 @@ def get_user_search_sessions( @basic_router.get("/search-session/{session_id}") def get_search_session( - session_id: int, + session_id: UUID, is_shared: bool = False, user: User | None = Depends(current_user), db_session: Session = Depends(get_session), @@ -265,10 +269,17 @@ def get_answer_with_quote( logger.notice(f"Received query for one shot answer with quotes: {query}") - packets = stream_search_answer( - query_req=query_request, - user=user, - max_document_tokens=None, - max_history_tokens=0, - ) - return StreamingResponse(packets, media_type="application/json") + def stream_generator() -> Generator[str, None, None]: + try: + for packet in stream_search_answer( + query_req=query_request, + user=user, + max_document_tokens=None, + max_history_tokens=0, + ): + yield json.dumps(packet) if isinstance(packet, dict) else packet + except Exception as e: + logger.exception(f"Error in search answer streaming: {e}") + yield json.dumps({"error": str(e)}) + + return StreamingResponse(stream_generator(), media_type="application/json") diff --git a/backend/danswer/server/query_and_chat/token_limit.py b/backend/danswer/server/query_and_chat/token_limit.py index 3f5d76bac7f..d439e15a379 100644 --- a/backend/danswer/server/query_and_chat/token_limit.py +++ b/backend/danswer/server/query_and_chat/token_limit.py @@ -13,6 +13,7 @@ from danswer.auth.users import current_user from danswer.db.engine import get_session_context_manager +from danswer.db.engine import get_session_with_tenant from danswer.db.models import ChatMessage from danswer.db.models import ChatSession from danswer.db.models import TokenRateLimit @@ -20,6 +21,7 @@ from danswer.utils.logger import setup_logger from danswer.utils.variable_functionality import fetch_versioned_implementation from ee.danswer.db.token_limit import fetch_all_global_token_rate_limits +from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR logger = setup_logger() @@ -39,11 +41,11 @@ def check_token_rate_limits( versioned_rate_limit_strategy = fetch_versioned_implementation( "danswer.server.query_and_chat.token_limit", "_check_token_rate_limits" ) - return versioned_rate_limit_strategy(user) + return versioned_rate_limit_strategy(user, CURRENT_TENANT_ID_CONTEXTVAR.get()) -def _check_token_rate_limits(_: User | None) -> None: - _user_is_rate_limited_by_global() +def _check_token_rate_limits(_: User | None, tenant_id: str | None) -> None: + _user_is_rate_limited_by_global(tenant_id) """ @@ -51,8 +53,8 @@ def _check_token_rate_limits(_: User | None) -> None: """ -def _user_is_rate_limited_by_global() -> None: - with get_session_context_manager() as db_session: +def _user_is_rate_limited_by_global(tenant_id: str | None) -> None: + with get_session_with_tenant(tenant_id) as db_session: global_rate_limits = fetch_all_global_token_rate_limits( db_session=db_session, enabled_only=True, ordered=False ) diff --git a/backend/danswer/server/settings/api.py b/backend/danswer/server/settings/api.py index 5b8564c3d3a..4f598a18353 100644 --- a/backend/danswer/server/settings/api.py +++ b/backend/danswer/server/settings/api.py @@ -15,12 +15,10 @@ from danswer.db.models import User from danswer.db.notification import create_notification from danswer.db.notification import dismiss_all_notifications -from danswer.db.notification import dismiss_notification -from danswer.db.notification import get_notification_by_id from danswer.db.notification import get_notifications from danswer.db.notification import update_notification_last_shown -from danswer.dynamic_configs.factory import get_dynamic_config_store -from danswer.dynamic_configs.interface import ConfigNotFoundError +from danswer.key_value_store.factory import get_kv_store +from danswer.key_value_store.interface import KvKeyNotFoundError from danswer.server.settings.models import Notification from danswer.server.settings.models import Settings from danswer.server.settings.models import UserSettings @@ -55,79 +53,70 @@ def fetch_settings( """Settings and notifications are stuffed into this single endpoint to reduce number of Postgres calls""" general_settings = load_settings() - user_notifications = get_user_notifications(user, db_session) + settings_notifications = get_settings_notifications(user, db_session) try: - kv_store = get_dynamic_config_store() + kv_store = get_kv_store() needs_reindexing = cast(bool, kv_store.load(KV_REINDEX_KEY)) - except ConfigNotFoundError: + except KvKeyNotFoundError: needs_reindexing = False return UserSettings( **general_settings.model_dump(), - notifications=user_notifications, + notifications=settings_notifications, needs_reindexing=needs_reindexing, ) -@basic_router.post("/notifications/{notification_id}/dismiss") -def dismiss_notification_endpoint( - notification_id: int, - user: User | None = Depends(current_user), - db_session: Session = Depends(get_session), -) -> None: - try: - notification = get_notification_by_id(notification_id, user, db_session) - except PermissionError: - raise HTTPException( - status_code=403, detail="Not authorized to dismiss this notification" - ) - except ValueError: - raise HTTPException(status_code=404, detail="Notification not found") - - dismiss_notification(notification, db_session) - - -def get_user_notifications( +def get_settings_notifications( user: User | None, db_session: Session ) -> list[Notification]: - """Get notifications for the user, currently the logic is very specific to the reindexing flag""" + """Get notifications for settings page, including product gating and reindex notifications""" + # Check for product gating notification + product_notif = get_notifications( + user=None, + notif_type=NotificationType.TRIAL_ENDS_TWO_DAYS, + db_session=db_session, + ) + notifications = [Notification.from_model(product_notif[0])] if product_notif else [] + + # Only show reindex notifications to admins is_admin = is_user_admin(user) if not is_admin: - # Reindexing flag should only be shown to admins, basic users can't trigger it anyway - return [] + return notifications - kv_store = get_dynamic_config_store() + # Check if reindexing is needed + kv_store = get_kv_store() try: needs_index = cast(bool, kv_store.load(KV_REINDEX_KEY)) if not needs_index: dismiss_all_notifications( notif_type=NotificationType.REINDEX, db_session=db_session ) - return [] - except ConfigNotFoundError: + return notifications + except KvKeyNotFoundError: # If something goes wrong and the flag is gone, better to not start a reindexing # it's a heavyweight long running job and maybe this flag is cleaned up later logger.warning("Could not find reindex flag") - return [] + return notifications try: # Need a transaction in order to prevent under-counting current notifications - db_session.begin() - reindex_notifs = get_notifications( user=user, notif_type=NotificationType.REINDEX, db_session=db_session ) if not reindex_notifs: notif = create_notification( - user=user, + user_id=user.id if user else None, notif_type=NotificationType.REINDEX, db_session=db_session, ) db_session.flush() db_session.commit() - return [Notification.from_model(notif)] + + notifications.append(Notification.from_model(notif)) + return notifications if len(reindex_notifs) > 1: logger.error("User has multiple reindex notifications") @@ -138,8 +127,9 @@ def get_user_notifications( ) db_session.commit() - return [Notification.from_model(reindex_notif)] + notifications.append(Notification.from_model(reindex_notif)) + return notifications except SQLAlchemyError: logger.exception("Error while processing notifications") db_session.rollback() - return [] + return notifications diff --git a/backend/danswer/server/settings/models.py b/backend/danswer/server/settings/models.py index ae7e7236c8d..af93595501d 100644 --- a/backend/danswer/server/settings/models.py +++ b/backend/danswer/server/settings/models.py @@ -12,12 +12,19 @@ class PageType(str, Enum): SEARCH = "search" +class GatingType(str, Enum): + FULL = "full" # Complete restriction of access to the product or service + PARTIAL = "partial" # Full access but warning (no credit card on file) + NONE = "none" # No restrictions, full access to all features + + class Notification(BaseModel): id: int notif_type: NotificationType dismissed: bool last_shown: datetime first_shown: datetime + additional_data: dict | None = None @classmethod def from_model(cls, notif: NotificationDBModel) -> "Notification": @@ -27,6 +34,7 @@ def from_model(cls, notif: NotificationDBModel) -> "Notification": dismissed=notif.dismissed, last_shown=notif.last_shown, first_shown=notif.first_shown, + additional_data=notif.additional_data, ) @@ -38,6 +46,7 @@ class Settings(BaseModel): default_page: PageType = PageType.SEARCH maximum_chat_retention_days: int | None = None gpu_enabled: bool | None = None + product_gating: GatingType = GatingType.NONE def check_validity(self) -> None: chat_page_enabled = self.chat_page_enabled diff --git a/backend/danswer/server/settings/store.py b/backend/danswer/server/settings/store.py index 6f2872f40f9..c3875c6aecb 100644 --- a/backend/danswer/server/settings/store.py +++ b/backend/danswer/server/settings/store.py @@ -1,16 +1,16 @@ from typing import cast from danswer.configs.constants import KV_SETTINGS_KEY -from danswer.dynamic_configs.factory import get_dynamic_config_store -from danswer.dynamic_configs.interface import ConfigNotFoundError +from danswer.key_value_store.factory import get_kv_store +from danswer.key_value_store.interface import KvKeyNotFoundError from danswer.server.settings.models import Settings def load_settings() -> Settings: - dynamic_config_store = get_dynamic_config_store() + dynamic_config_store = get_kv_store() try: settings = Settings(**cast(dict, dynamic_config_store.load(KV_SETTINGS_KEY))) - except ConfigNotFoundError: + except KvKeyNotFoundError: settings = Settings() dynamic_config_store.store(KV_SETTINGS_KEY, settings.model_dump()) @@ -18,4 +18,4 @@ def load_settings() -> Settings: def store_settings(settings: Settings) -> None: - get_dynamic_config_store().store(KV_SETTINGS_KEY, settings.model_dump()) + get_kv_store().store(KV_SETTINGS_KEY, settings.model_dump()) diff --git a/backend/danswer/server/utils.py b/backend/danswer/server/utils.py index 53ed5b426ba..68e6dc8d0b8 100644 --- a/backend/danswer/server/utils.py +++ b/backend/danswer/server/utils.py @@ -1,7 +1,18 @@ import json +import smtplib from datetime import datetime +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText +from textwrap import dedent from typing import Any +from danswer.configs.app_configs import SMTP_PASS +from danswer.configs.app_configs import SMTP_PORT +from danswer.configs.app_configs import SMTP_SERVER +from danswer.configs.app_configs import SMTP_USER +from danswer.configs.app_configs import WEB_DOMAIN +from danswer.db.models import User + class DateTimeEncoder(json.JSONEncoder): """Custom JSON encoder that converts datetime objects to ISO format strings.""" @@ -43,3 +54,31 @@ def mask_credential_dict(credential_dict: dict[str, Any]) -> dict[str, str]: masked_creds[key] = mask_string(val) return masked_creds + + +def send_user_email_invite(user_email: str, current_user: User) -> None: + msg = MIMEMultipart() + msg["Subject"] = "Invitation to Join Danswer Workspace" + msg["From"] = current_user.email + msg["To"] = user_email + + email_body = dedent( + f"""\ + Hello, + + You have been invited to join a workspace on Danswer. + + To join the workspace, please visit the following link: + + {WEB_DOMAIN}/auth/login + + Best regards, + The Danswer Team + """ + ) + + msg.attach(MIMEText(email_body, "plain")) + with smtplib.SMTP(SMTP_SERVER, SMTP_PORT) as smtp_server: + smtp_server.starttls() + smtp_server.login(SMTP_USER, SMTP_PASS) + smtp_server.send_message(msg) diff --git a/backend/danswer/setup.py b/backend/danswer/setup.py new file mode 100644 index 00000000000..7abd7482a78 --- /dev/null +++ b/backend/danswer/setup.py @@ -0,0 +1,358 @@ +import time + +from sqlalchemy.orm import Session + +from danswer.chat.load_yamls import load_chat_yamls +from danswer.configs.app_configs import DISABLE_INDEX_UPDATE_ON_SWAP +from danswer.configs.app_configs import MANAGED_VESPA +from danswer.configs.constants import KV_REINDEX_KEY +from danswer.configs.constants import KV_SEARCH_SETTINGS +from danswer.configs.model_configs import FAST_GEN_AI_MODEL_VERSION +from danswer.configs.model_configs import GEN_AI_API_KEY +from danswer.configs.model_configs import GEN_AI_MODEL_VERSION +from danswer.db.connector import check_connectors_exist +from danswer.db.connector import create_initial_default_connector +from danswer.db.connector_credential_pair import associate_default_cc_pair +from danswer.db.connector_credential_pair import get_connector_credential_pairs +from danswer.db.connector_credential_pair import resync_cc_pair +from danswer.db.credentials import create_initial_public_credential +from danswer.db.document import check_docs_exist +from danswer.db.index_attempt import cancel_indexing_attempts_past_model +from danswer.db.index_attempt import expire_index_attempts +from danswer.db.llm import fetch_default_provider +from danswer.db.llm import update_default_provider +from danswer.db.llm import upsert_llm_provider +from danswer.db.persona import delete_old_default_personas +from danswer.db.search_settings import get_current_search_settings +from danswer.db.search_settings import get_secondary_search_settings +from danswer.db.search_settings import update_current_search_settings +from danswer.db.search_settings import update_secondary_search_settings +from danswer.db.swap_index import check_index_swap +from danswer.document_index.factory import get_default_document_index +from danswer.document_index.interfaces import DocumentIndex +from danswer.document_index.vespa.index import VespaIndex +from danswer.indexing.models import IndexingSetting +from danswer.key_value_store.factory import get_kv_store +from danswer.key_value_store.interface import KvKeyNotFoundError +from danswer.natural_language_processing.search_nlp_models import EmbeddingModel +from danswer.natural_language_processing.search_nlp_models import warm_up_bi_encoder +from danswer.natural_language_processing.search_nlp_models import warm_up_cross_encoder +from danswer.search.models import SavedSearchSettings +from danswer.search.retrieval.search_runner import download_nltk_data +from danswer.seeding.load_docs import seed_initial_documents +from danswer.server.manage.llm.models import LLMProviderUpsertRequest +from danswer.server.settings.store import load_settings +from danswer.server.settings.store import store_settings +from danswer.tools.built_in_tools import auto_add_search_tool_to_personas +from danswer.tools.built_in_tools import load_builtin_tools +from danswer.tools.built_in_tools import refresh_built_in_tools_cache +from danswer.utils.gpu_utils import gpu_status_request +from danswer.utils.logger import setup_logger +from shared_configs.configs import ALT_INDEX_SUFFIX +from shared_configs.configs import MODEL_SERVER_HOST +from shared_configs.configs import MODEL_SERVER_PORT +from shared_configs.configs import MULTI_TENANT +from shared_configs.configs import SUPPORTED_EMBEDDING_MODELS +from shared_configs.model_server_models import SupportedEmbeddingModel + + +logger = setup_logger() + + +def setup_danswer(db_session: Session, tenant_id: str | None) -> None: + """ + Setup Danswer for a particular tenant. In the Single Tenant case, it will set it up for the default schema + on server startup. In the MT case, it will be called when the tenant is created. + + The Tenant Service calls the tenants/create endpoint which runs this. + """ + check_index_swap(db_session=db_session) + search_settings = get_current_search_settings(db_session) + secondary_search_settings = get_secondary_search_settings(db_session) + + # Break bad state for thrashing indexes + if secondary_search_settings and DISABLE_INDEX_UPDATE_ON_SWAP: + expire_index_attempts( + search_settings_id=search_settings.id, db_session=db_session + ) + + for cc_pair in get_connector_credential_pairs(db_session): + resync_cc_pair(cc_pair, db_session=db_session) + + # Expire all old embedding models indexing attempts, technically redundant + cancel_indexing_attempts_past_model(db_session) + + logger.notice(f'Using Embedding model: "{search_settings.model_name}"') + if search_settings.query_prefix or search_settings.passage_prefix: + logger.notice(f'Query embedding prefix: "{search_settings.query_prefix}"') + logger.notice(f'Passage embedding prefix: "{search_settings.passage_prefix}"') + + if search_settings: + if not search_settings.disable_rerank_for_streaming: + logger.notice("Reranking is enabled.") + + if search_settings.multilingual_expansion: + logger.notice( + f"Multilingual query expansion is enabled with {search_settings.multilingual_expansion}." + ) + if ( + search_settings.rerank_model_name + and not search_settings.provider_type + and not search_settings.rerank_provider_type + ): + warm_up_cross_encoder(search_settings.rerank_model_name) + + logger.notice("Verifying query preprocessing (NLTK) data is downloaded") + download_nltk_data() + + # setup Postgres with default credential, llm providers, etc. + setup_postgres(db_session) + + translate_saved_search_settings(db_session) + + # Does the user need to trigger a reindexing to bring the document index + # into a good state, marked in the kv store + if not MULTI_TENANT: + mark_reindex_flag(db_session) + + # Ensure Vespa is setup correctly, this step is relatively near the end because Vespa + # takes a bit of time to start up + logger.notice("Verifying Document Index(s) is/are available.") + document_index = get_default_document_index( + primary_index_name=search_settings.index_name, + secondary_index_name=secondary_search_settings.index_name + if secondary_search_settings + else None, + ) + + success = setup_vespa( + document_index, + IndexingSetting.from_db_model(search_settings), + IndexingSetting.from_db_model(secondary_search_settings) + if secondary_search_settings + else None, + ) + if not success: + raise RuntimeError("Could not connect to Vespa within the specified timeout.") + + logger.notice(f"Model Server: http://{MODEL_SERVER_HOST}:{MODEL_SERVER_PORT}") + if search_settings.provider_type is None: + warm_up_bi_encoder( + embedding_model=EmbeddingModel.from_db_model( + search_settings=search_settings, + server_host=MODEL_SERVER_HOST, + server_port=MODEL_SERVER_PORT, + ), + ) + + # update multipass indexing setting based on GPU availability + update_default_multipass_indexing(db_session) + + seed_initial_documents(db_session, tenant_id) + + +def translate_saved_search_settings(db_session: Session) -> None: + kv_store = get_kv_store() + + try: + search_settings_dict = kv_store.load(KV_SEARCH_SETTINGS) + if isinstance(search_settings_dict, dict): + # Update current search settings + current_settings = get_current_search_settings(db_session) + + # Update non-preserved fields + if current_settings: + current_settings_dict = SavedSearchSettings.from_db_model( + current_settings + ).dict() + + new_current_settings = SavedSearchSettings( + **{**current_settings_dict, **search_settings_dict} + ) + update_current_search_settings(db_session, new_current_settings) + + # Update secondary search settings + secondary_settings = get_secondary_search_settings(db_session) + if secondary_settings: + secondary_settings_dict = SavedSearchSettings.from_db_model( + secondary_settings + ).dict() + + new_secondary_settings = SavedSearchSettings( + **{**secondary_settings_dict, **search_settings_dict} + ) + update_secondary_search_settings( + db_session, + new_secondary_settings, + ) + # Delete the KV store entry after successful update + kv_store.delete(KV_SEARCH_SETTINGS) + logger.notice("Search settings updated and KV store entry deleted.") + else: + logger.notice("KV store search settings is empty.") + except KvKeyNotFoundError: + logger.notice("No search config found in KV store.") + + +def mark_reindex_flag(db_session: Session) -> None: + kv_store = get_kv_store() + try: + value = kv_store.load(KV_REINDEX_KEY) + logger.debug(f"Re-indexing flag has value {value}") + return + except KvKeyNotFoundError: + # Only need to update the flag if it hasn't been set + pass + + # If their first deployment is after the changes, it will + # enable this when the other changes go in, need to avoid + # this being set to False, then the user indexes things on the old version + docs_exist = check_docs_exist(db_session) + connectors_exist = check_connectors_exist(db_session) + if docs_exist or connectors_exist: + kv_store.store(KV_REINDEX_KEY, True) + else: + kv_store.store(KV_REINDEX_KEY, False) + + +def setup_vespa( + document_index: DocumentIndex, + index_setting: IndexingSetting, + secondary_index_setting: IndexingSetting | None, +) -> bool: + # Vespa startup is a bit slow, so give it a few seconds + WAIT_SECONDS = 5 + VESPA_ATTEMPTS = 5 + for x in range(VESPA_ATTEMPTS): + try: + logger.notice(f"Setting up Vespa (attempt {x+1}/{VESPA_ATTEMPTS})...") + document_index.ensure_indices_exist( + index_embedding_dim=index_setting.model_dim, + secondary_index_embedding_dim=secondary_index_setting.model_dim + if secondary_index_setting + else None, + ) + + logger.notice("Vespa setup complete.") + return True + except Exception: + logger.notice( + f"Vespa setup did not succeed. The Vespa service may not be ready yet. Retrying in {WAIT_SECONDS} seconds." + ) + time.sleep(WAIT_SECONDS) + + logger.error( + f"Vespa setup did not succeed. Attempt limit reached. ({VESPA_ATTEMPTS})" + ) + return False + + +def setup_postgres(db_session: Session) -> None: + logger.notice("Verifying default connector/credential exist.") + create_initial_public_credential(db_session) + create_initial_default_connector(db_session) + associate_default_cc_pair(db_session) + + logger.notice("Loading default Prompts and Personas") + delete_old_default_personas(db_session) + load_chat_yamls(db_session) + + logger.notice("Loading built-in tools") + load_builtin_tools(db_session) + refresh_built_in_tools_cache(db_session) + auto_add_search_tool_to_personas(db_session) + + if GEN_AI_API_KEY and fetch_default_provider(db_session) is None: + # Only for dev flows + logger.notice("Setting up default OpenAI LLM for dev.") + llm_model = GEN_AI_MODEL_VERSION or "gpt-4o-mini" + fast_model = FAST_GEN_AI_MODEL_VERSION or "gpt-4o-mini" + model_req = LLMProviderUpsertRequest( + name="DevEnvPresetOpenAI", + provider="openai", + api_key=GEN_AI_API_KEY, + api_base=None, + api_version=None, + custom_config=None, + default_model_name=llm_model, + fast_default_model_name=fast_model, + is_public=True, + groups=[], + display_model_names=[llm_model, fast_model], + model_names=[llm_model, fast_model], + ) + new_llm_provider = upsert_llm_provider( + llm_provider=model_req, db_session=db_session + ) + update_default_provider(provider_id=new_llm_provider.id, db_session=db_session) + + +def update_default_multipass_indexing(db_session: Session) -> None: + docs_exist = check_docs_exist(db_session) + connectors_exist = check_connectors_exist(db_session) + logger.debug(f"Docs exist: {docs_exist}, Connectors exist: {connectors_exist}") + + if not docs_exist and not connectors_exist: + logger.info( + "No existing docs or connectors found. Checking GPU availability for multipass indexing." + ) + gpu_available = gpu_status_request() + logger.info(f"GPU available: {gpu_available}") + + current_settings = get_current_search_settings(db_session) + + logger.notice(f"Updating multipass indexing setting to: {gpu_available}") + updated_settings = SavedSearchSettings.from_db_model(current_settings) + # Enable multipass indexing if GPU is available or if using a cloud provider + updated_settings.multipass_indexing = ( + gpu_available or current_settings.cloud_provider is not None + ) + update_current_search_settings(db_session, updated_settings) + + # Update settings with GPU availability + settings = load_settings() + settings.gpu_enabled = gpu_available + store_settings(settings) + logger.notice(f"Updated settings with GPU availability: {gpu_available}") + + else: + logger.debug( + "Existing docs or connectors found. Skipping multipass indexing update." + ) + + +def setup_multitenant_danswer() -> None: + # For Managed Vespa, the schema is sent over via the Vespa Console manually. + if not MANAGED_VESPA: + setup_vespa_multitenant(SUPPORTED_EMBEDDING_MODELS) + + +def setup_vespa_multitenant(supported_indices: list[SupportedEmbeddingModel]) -> bool: + # This is for local testing + WAIT_SECONDS = 5 + VESPA_ATTEMPTS = 5 + for x in range(VESPA_ATTEMPTS): + try: + logger.notice(f"Setting up Vespa (attempt {x+1}/{VESPA_ATTEMPTS})...") + VespaIndex.register_multitenant_indices( + indices=[index.index_name for index in supported_indices] + + [ + f"{index.index_name}{ALT_INDEX_SUFFIX}" + for index in supported_indices + ], + embedding_dims=[index.dim for index in supported_indices] + + [index.dim for index in supported_indices], + ) + + logger.notice("Vespa setup complete.") + return True + except Exception: + logger.notice( + f"Vespa setup did not succeed. The Vespa service may not be ready yet. Retrying in {WAIT_SECONDS} seconds." + ) + time.sleep(WAIT_SECONDS) + + logger.error( + f"Vespa setup did not succeed. Attempt limit reached. ({VESPA_ATTEMPTS})" + ) + return False diff --git a/backend/danswer/tools/base_tool.py b/backend/danswer/tools/base_tool.py new file mode 100644 index 00000000000..73902504462 --- /dev/null +++ b/backend/danswer/tools/base_tool.py @@ -0,0 +1,59 @@ +from typing import cast +from typing import TYPE_CHECKING + +from langchain_core.messages import HumanMessage + +from danswer.llm.utils import message_to_prompt_and_imgs +from danswer.tools.tool import Tool + +if TYPE_CHECKING: + from danswer.llm.answering.prompts.build import AnswerPromptBuilder + from danswer.tools.tool_implementations.custom.custom_tool import ( + CustomToolCallSummary, + ) + from danswer.tools.message import ToolCallSummary + from danswer.tools.models import ToolResponse + + +def build_user_message_for_non_tool_calling_llm( + message: HumanMessage, + tool_name: str, + *args: "ToolResponse", +) -> str: + query, _ = message_to_prompt_and_imgs(message) + + tool_run_summary = cast("CustomToolCallSummary", args[0].response).tool_result + return f""" +Here's the result from the {tool_name} tool: + +{tool_run_summary} + +Now respond to the following: + +{query} +""".strip() + + +class BaseTool(Tool): + def build_next_prompt( + self, + prompt_builder: "AnswerPromptBuilder", + tool_call_summary: "ToolCallSummary", + tool_responses: list["ToolResponse"], + using_tool_calling_llm: bool, + ) -> "AnswerPromptBuilder": + if using_tool_calling_llm: + prompt_builder.append_message(tool_call_summary.tool_call_request) + prompt_builder.append_message(tool_call_summary.tool_call_result) + else: + prompt_builder.update_user_prompt( + HumanMessage( + content=build_user_message_for_non_tool_calling_llm( + prompt_builder.user_message_and_token_cnt[0], + self.name, + *tool_responses, + ) + ) + ) + + return prompt_builder diff --git a/backend/danswer/tools/built_in_tools.py b/backend/danswer/tools/built_in_tools.py index 99b2ae3bbb6..fb64381f1d0 100644 --- a/backend/danswer/tools/built_in_tools.py +++ b/backend/danswer/tools/built_in_tools.py @@ -9,9 +9,13 @@ from danswer.db.models import Persona from danswer.db.models import Tool as ToolDBModel -from danswer.tools.images.image_generation_tool import ImageGenerationTool -from danswer.tools.internet_search.internet_search_tool import InternetSearchTool -from danswer.tools.search.search_tool import SearchTool +from danswer.tools.tool_implementations.images.image_generation_tool import ( + ImageGenerationTool, +) +from danswer.tools.tool_implementations.internet_search.internet_search_tool import ( + InternetSearchTool, +) +from danswer.tools.tool_implementations.search.search_tool import SearchTool from danswer.tools.tool import Tool from danswer.utils.logger import setup_logger diff --git a/backend/danswer/tools/custom/custom_tool_prompt_builder.py b/backend/danswer/tools/custom/custom_tool_prompt_builder.py deleted file mode 100644 index 8016363acc9..00000000000 --- a/backend/danswer/tools/custom/custom_tool_prompt_builder.py +++ /dev/null @@ -1,21 +0,0 @@ -from typing import cast - -from danswer.tools.custom.custom_tool import CustomToolCallSummary -from danswer.tools.models import ToolResponse - - -def build_user_message_for_custom_tool_for_non_tool_calling_llm( - query: str, - tool_name: str, - *args: ToolResponse, -) -> str: - tool_run_summary = cast(CustomToolCallSummary, args[0].response).tool_result - return f""" -Here's the result from the {tool_name} tool: - -{tool_run_summary} - -Now respond to the following: - -{query} -""".strip() diff --git a/backend/danswer/tools/models.py b/backend/danswer/tools/models.py index 6317a95e2d3..4f56aecd372 100644 --- a/backend/danswer/tools/models.py +++ b/backend/danswer/tools/models.py @@ -1,4 +1,5 @@ from typing import Any +from uuid import UUID from pydantic import BaseModel from pydantic import model_validator @@ -40,7 +41,7 @@ class ToolCallFinalResult(ToolCallKickoff): class DynamicSchemaInfo(BaseModel): - chat_session_id: int | None + chat_session_id: UUID | None message_id: int | None diff --git a/backend/danswer/tools/tool.py b/backend/danswer/tools/tool.py index 81b9b457178..1b1c43ab8da 100644 --- a/backend/danswer/tools/tool.py +++ b/backend/danswer/tools/tool.py @@ -1,11 +1,17 @@ import abc from collections.abc import Generator from typing import Any +from typing import TYPE_CHECKING -from danswer.dynamic_configs.interface import JSON_ro +from danswer.key_value_store.interface import JSON_ro from danswer.llm.answering.models import PreviousMessage from danswer.llm.interfaces import LLM -from danswer.tools.models import ToolResponse + + +if TYPE_CHECKING: + from danswer.llm.answering.prompts.build import AnswerPromptBuilder + from danswer.tools.message import ToolCallSummary + from danswer.tools.models import ToolResponse class Tool(abc.ABC): @@ -32,7 +38,7 @@ def tool_definition(self) -> dict: @abc.abstractmethod def build_tool_message_content( - self, *args: ToolResponse + self, *args: "ToolResponse" ) -> str | list[str | dict[str, Any]]: raise NotImplementedError @@ -51,13 +57,26 @@ def get_args_for_non_tool_calling_llm( """Actual execution of the tool""" @abc.abstractmethod - def run(self, **kwargs: Any) -> Generator[ToolResponse, None, None]: + def run(self, **kwargs: Any) -> Generator["ToolResponse", None, None]: raise NotImplementedError @abc.abstractmethod - def final_result(self, *args: ToolResponse) -> JSON_ro: + def final_result(self, *args: "ToolResponse") -> JSON_ro: """ This is the "final summary" result of the tool. It is the result that will be stored in the database. """ raise NotImplementedError + + """Some tools may want to modify the prompt based on the tool call summary and tool responses. + Default behavior is to continue with just the raw tool call request/result passed to the LLM.""" + + @abc.abstractmethod + def build_next_prompt( + self, + prompt_builder: "AnswerPromptBuilder", + tool_call_summary: "ToolCallSummary", + tool_responses: list["ToolResponse"], + using_tool_calling_llm: bool, + ) -> "AnswerPromptBuilder": + raise NotImplementedError diff --git a/backend/danswer/tools/custom/base_tool_types.py b/backend/danswer/tools/tool_implementations/custom/base_tool_types.py similarity index 100% rename from backend/danswer/tools/custom/base_tool_types.py rename to backend/danswer/tools/tool_implementations/custom/base_tool_types.py diff --git a/backend/danswer/tools/custom/custom_tool.py b/backend/danswer/tools/tool_implementations/custom/custom_tool.py similarity index 55% rename from backend/danswer/tools/custom/custom_tool.py rename to backend/danswer/tools/tool_implementations/custom/custom_tool.py index 3d36d7bb055..eace6d53a3b 100644 --- a/backend/danswer/tools/custom/custom_tool.py +++ b/backend/danswer/tools/tool_implementations/custom/custom_tool.py @@ -1,34 +1,61 @@ +import csv import json +import uuid from collections.abc import Generator +from io import BytesIO +from io import StringIO from typing import Any from typing import cast +from typing import Dict +from typing import List import requests from langchain_core.messages import HumanMessage from langchain_core.messages import SystemMessage from pydantic import BaseModel -from danswer.dynamic_configs.interface import JSON_ro +from danswer.configs.constants import FileOrigin +from danswer.db.engine import get_session_with_tenant +from danswer.file_store.file_store import get_default_file_store +from danswer.file_store.models import ChatFileType +from danswer.file_store.models import InMemoryChatFile +from danswer.key_value_store.interface import JSON_ro from danswer.llm.answering.models import PreviousMessage +from danswer.llm.answering.prompts.build import AnswerPromptBuilder from danswer.llm.interfaces import LLM -from danswer.tools.custom.base_tool_types import ToolResultType -from danswer.tools.custom.custom_tool_prompts import ( - SHOULD_USE_CUSTOM_TOOL_SYSTEM_PROMPT, -) -from danswer.tools.custom.custom_tool_prompts import SHOULD_USE_CUSTOM_TOOL_USER_PROMPT -from danswer.tools.custom.custom_tool_prompts import TOOL_ARG_SYSTEM_PROMPT -from danswer.tools.custom.custom_tool_prompts import TOOL_ARG_USER_PROMPT -from danswer.tools.custom.custom_tool_prompts import USE_TOOL -from danswer.tools.custom.openapi_parsing import MethodSpec -from danswer.tools.custom.openapi_parsing import openapi_to_method_specs -from danswer.tools.custom.openapi_parsing import openapi_to_url -from danswer.tools.custom.openapi_parsing import REQUEST_BODY -from danswer.tools.custom.openapi_parsing import validate_openapi_schema +from danswer.tools.base_tool import BaseTool +from danswer.tools.message import ToolCallSummary from danswer.tools.models import CHAT_SESSION_ID_PLACEHOLDER from danswer.tools.models import DynamicSchemaInfo from danswer.tools.models import MESSAGE_ID_PLACEHOLDER -from danswer.tools.tool import Tool -from danswer.tools.tool import ToolResponse +from danswer.tools.models import ToolResponse +from danswer.tools.tool_implementations.custom.custom_tool_prompts import ( + SHOULD_USE_CUSTOM_TOOL_SYSTEM_PROMPT, +) +from danswer.tools.tool_implementations.custom.custom_tool_prompts import ( + SHOULD_USE_CUSTOM_TOOL_USER_PROMPT, +) +from danswer.tools.tool_implementations.custom.custom_tool_prompts import ( + TOOL_ARG_SYSTEM_PROMPT, +) +from danswer.tools.tool_implementations.custom.custom_tool_prompts import ( + TOOL_ARG_USER_PROMPT, +) +from danswer.tools.tool_implementations.custom.custom_tool_prompts import USE_TOOL +from danswer.tools.tool_implementations.custom.openapi_parsing import MethodSpec +from danswer.tools.tool_implementations.custom.openapi_parsing import ( + openapi_to_method_specs, +) +from danswer.tools.tool_implementations.custom.openapi_parsing import openapi_to_url +from danswer.tools.tool_implementations.custom.openapi_parsing import REQUEST_BODY +from danswer.tools.tool_implementations.custom.openapi_parsing import ( + validate_openapi_schema, +) +from danswer.tools.tool_implementations.custom.prompt import ( + build_custom_image_generation_user_prompt, +) +from danswer.utils.headers import header_list_to_header_dict +from danswer.utils.headers import HeaderItemDict from danswer.utils.logger import setup_logger logger = setup_logger() @@ -36,17 +63,22 @@ CUSTOM_TOOL_RESPONSE_ID = "custom_tool_response" +class CustomToolFileResponse(BaseModel): + file_ids: List[str] # References to saved images or CSVs + + class CustomToolCallSummary(BaseModel): tool_name: str - tool_result: ToolResultType + response_type: str # e.g., 'json', 'image', 'csv', 'graph' + tool_result: Any # The response data -class CustomTool(Tool): +class CustomTool(BaseTool): def __init__( self, method_spec: MethodSpec, base_url: str, - custom_headers: list[dict[str, str]] | None = [], + custom_headers: list[HeaderItemDict] | None = None, ) -> None: self._base_url = base_url self._method_spec = method_spec @@ -55,9 +87,7 @@ def __init__( self._name = self._method_spec.name self._description = self._method_spec.summary self.headers = ( - {header["key"]: header["value"] for header in custom_headers} - if custom_headers - else {} + header_list_to_header_dict(custom_headers) if custom_headers else {} ) @property @@ -81,6 +111,12 @@ def build_tool_message_content( self, *args: ToolResponse ) -> str | list[str | dict[str, Any]]: response = cast(CustomToolCallSummary, args[0].response) + + if response.response_type == "image" or response.response_type == "csv": + image_response = cast(CustomToolFileResponse, response.tool_result) + return json.dumps({"file_ids": image_response.file_ids}) + + # For JSON or other responses, return as-is return json.dumps(response.tool_result) """For LLMs which do NOT support explicit tool calling""" @@ -148,6 +184,38 @@ def get_args_for_non_tool_calling_llm( ) return None + def _save_and_get_file_references( + self, file_content: bytes | str, content_type: str + ) -> List[str]: + with get_session_with_tenant() as db_session: + file_store = get_default_file_store(db_session) + + file_id = str(uuid.uuid4()) + + # Handle both binary and text content + if isinstance(file_content, str): + content = BytesIO(file_content.encode()) + else: + content = BytesIO(file_content) + + file_store.save_file( + file_name=file_id, + content=content, + display_name=file_id, + file_origin=FileOrigin.CHAT_UPLOAD, + file_type=content_type, + file_metadata={ + "content_type": content_type, + }, + ) + + return [file_id] + + def _parse_csv(self, csv_text: str) -> List[Dict[str, Any]]: + csv_file = StringIO(csv_text) + reader = csv.DictReader(csv_file) + return [row for row in reader] + """Actual execution of the tool""" def run(self, **kwargs: Any) -> Generator[ToolResponse, None, None]: @@ -167,25 +235,108 @@ def run(self, **kwargs: Any) -> Generator[ToolResponse, None, None]: url = self._method_spec.build_url(self._base_url, path_params, query_params) method = self._method_spec.method - # Log request details + response = requests.request( method, url, json=request_body, headers=self.headers ) + content_type = response.headers.get("Content-Type", "") + + if "text/csv" in content_type: + file_ids = self._save_and_get_file_references( + response.content, content_type + ) + tool_result = CustomToolFileResponse(file_ids=file_ids) + response_type = "csv" + + elif "image/" in content_type: + file_ids = self._save_and_get_file_references( + response.content, content_type + ) + tool_result = CustomToolFileResponse(file_ids=file_ids) + response_type = "image" + + else: + tool_result = response.json() + response_type = "json" + + logger.info( + f"Returning tool response for {self._name} with type {response_type}" + ) yield ToolResponse( id=CUSTOM_TOOL_RESPONSE_ID, response=CustomToolCallSummary( - tool_name=self._name, tool_result=response.json() + tool_name=self._name, + response_type=response_type, + tool_result=tool_result, ), ) + def build_next_prompt( + self, + prompt_builder: AnswerPromptBuilder, + tool_call_summary: ToolCallSummary, + tool_responses: list[ToolResponse], + using_tool_calling_llm: bool, + ) -> AnswerPromptBuilder: + response = cast(CustomToolCallSummary, tool_responses[0].response) + + # Handle non-file responses using parent class behavior + if response.response_type not in ["image", "csv"]: + return super().build_next_prompt( + prompt_builder, + tool_call_summary, + tool_responses, + using_tool_calling_llm, + ) + + # Handle image and CSV file responses + file_type = ( + ChatFileType.IMAGE + if response.response_type == "image" + else ChatFileType.CSV + ) + + # Load files from storage + files = [] + with get_session_with_tenant() as db_session: + file_store = get_default_file_store(db_session) + + for file_id in response.tool_result.file_ids: + try: + file_io = file_store.read_file(file_id, mode="b") + files.append( + InMemoryChatFile( + file_id=file_id, + filename=file_id, + content=file_io.read(), + file_type=file_type, + ) + ) + except Exception: + logger.exception(f"Failed to read file {file_id}") + + # Update prompt with file content + prompt_builder.update_user_prompt( + build_custom_image_generation_user_prompt( + query=prompt_builder.get_user_message_content(), + files=files, + file_type=file_type, + ) + ) + + return prompt_builder + def final_result(self, *args: ToolResponse) -> JSON_ro: - return cast(CustomToolCallSummary, args[0].response).tool_result + response = cast(CustomToolCallSummary, args[0].response) + if isinstance(response.tool_result, CustomToolFileResponse): + return response.tool_result.model_dump() + return response.tool_result def build_custom_tools_from_openapi_schema_and_headers( openapi_schema: dict[str, Any], - custom_headers: list[dict[str, str]] | None = [], + custom_headers: list[HeaderItemDict] | None = None, dynamic_schema_info: DynamicSchemaInfo | None = None, ) -> list[CustomTool]: if dynamic_schema_info: diff --git a/backend/danswer/tools/custom/custom_tool_prompts.py b/backend/danswer/tools/tool_implementations/custom/custom_tool_prompts.py similarity index 100% rename from backend/danswer/tools/custom/custom_tool_prompts.py rename to backend/danswer/tools/tool_implementations/custom/custom_tool_prompts.py diff --git a/backend/danswer/tools/custom/openapi_parsing.py b/backend/danswer/tools/tool_implementations/custom/openapi_parsing.py similarity index 100% rename from backend/danswer/tools/custom/openapi_parsing.py rename to backend/danswer/tools/tool_implementations/custom/openapi_parsing.py diff --git a/backend/danswer/tools/tool_implementations/custom/prompt.py b/backend/danswer/tools/tool_implementations/custom/prompt.py new file mode 100644 index 00000000000..9911594a917 --- /dev/null +++ b/backend/danswer/tools/tool_implementations/custom/prompt.py @@ -0,0 +1,25 @@ +from langchain_core.messages import HumanMessage + +from danswer.file_store.models import ChatFileType +from danswer.file_store.models import InMemoryChatFile +from danswer.llm.utils import build_content_with_imgs + + +CUSTOM_IMG_GENERATION_SUMMARY_PROMPT = """ +You have just created the attached {file_type} file in response to the following query: "{query}". + +Can you please summarize it in a sentence or two? Do NOT include image urls or bulleted lists. +""" + + +def build_custom_image_generation_user_prompt( + query: str, file_type: ChatFileType, files: list[InMemoryChatFile] | None = None +) -> HumanMessage: + return HumanMessage( + content=build_content_with_imgs( + message=CUSTOM_IMG_GENERATION_SUMMARY_PROMPT.format( + query=query, file_type=file_type.value + ).strip(), + files=files, + ) + ) diff --git a/backend/danswer/tools/images/image_generation_tool.py b/backend/danswer/tools/tool_implementations/images/image_generation_tool.py similarity index 83% rename from backend/danswer/tools/images/image_generation_tool.py rename to backend/danswer/tools/tool_implementations/images/image_generation_tool.py index 6e2515a8e9f..3da53751812 100644 --- a/backend/danswer/tools/images/image_generation_tool.py +++ b/backend/danswer/tools/tool_implementations/images/image_generation_tool.py @@ -9,15 +9,20 @@ from danswer.chat.chat_utils import combine_message_chain from danswer.configs.model_configs import GEN_AI_HISTORY_CUTOFF -from danswer.dynamic_configs.interface import JSON_ro +from danswer.key_value_store.interface import JSON_ro from danswer.llm.answering.models import PreviousMessage -from danswer.llm.headers import build_llm_extra_headers +from danswer.llm.answering.prompts.build import AnswerPromptBuilder from danswer.llm.interfaces import LLM from danswer.llm.utils import build_content_with_imgs from danswer.llm.utils import message_to_string from danswer.prompts.constants import GENERAL_SEP_PAT +from danswer.tools.message import ToolCallSummary +from danswer.tools.models import ToolResponse from danswer.tools.tool import Tool -from danswer.tools.tool import ToolResponse +from danswer.tools.tool_implementations.images.prompt import ( + build_image_generation_user_prompt, +) +from danswer.utils.headers import build_llm_extra_headers from danswer.utils.logger import setup_logger from danswer.utils.threadpool_concurrency import run_functions_tuples_in_parallel @@ -112,7 +117,10 @@ def tool_definition(self) -> dict: }, "shape": { "type": "string", - "description": "Optional. Image shape: 'square', 'portrait', or 'landscape'", + "description": ( + "Optional - only specify if you want a specific shape." + " Image shape: 'square', 'portrait', or 'landscape'." + ), "enum": [shape.value for shape in ImageShape], }, }, @@ -258,3 +266,34 @@ def final_result(self, *args: ToolResponse) -> JSON_ro: image_generation_response.model_dump() for image_generation_response in image_generation_responses ] + + def build_next_prompt( + self, + prompt_builder: AnswerPromptBuilder, + tool_call_summary: ToolCallSummary, + tool_responses: list[ToolResponse], + using_tool_calling_llm: bool, + ) -> AnswerPromptBuilder: + img_generation_response = cast( + list[ImageGenerationResponse] | None, + next( + ( + response.response + for response in tool_responses + if response.id == IMAGE_GENERATION_RESPONSE_ID + ), + None, + ), + ) + if img_generation_response is None: + raise ValueError("No image generation response found") + + img_urls = [img.url for img in img_generation_response] + prompt_builder.update_user_prompt( + build_image_generation_user_prompt( + query=prompt_builder.get_user_message_content(), + img_urls=img_urls, + ) + ) + + return prompt_builder diff --git a/backend/danswer/tools/images/prompt.py b/backend/danswer/tools/tool_implementations/images/prompt.py similarity index 100% rename from backend/danswer/tools/images/prompt.py rename to backend/danswer/tools/tool_implementations/images/prompt.py diff --git a/backend/danswer/tools/internet_search/internet_search_tool.py b/backend/danswer/tools/tool_implementations/internet_search/internet_search_tool.py similarity index 81% rename from backend/danswer/tools/internet_search/internet_search_tool.py rename to backend/danswer/tools/tool_implementations/internet_search/internet_search_tool.py index 3012eb465f4..12142bc4852 100644 --- a/backend/danswer/tools/internet_search/internet_search_tool.py +++ b/backend/danswer/tools/tool_implementations/internet_search/internet_search_tool.py @@ -10,19 +10,32 @@ from danswer.chat.models import LlmDoc from danswer.configs.constants import DocumentSource from danswer.configs.model_configs import GEN_AI_HISTORY_CUTOFF -from danswer.dynamic_configs.interface import JSON_ro +from danswer.key_value_store.interface import JSON_ro +from danswer.llm.answering.models import AnswerStyleConfig from danswer.llm.answering.models import PreviousMessage +from danswer.llm.answering.models import PromptConfig +from danswer.llm.answering.prompts.build import AnswerPromptBuilder from danswer.llm.interfaces import LLM from danswer.llm.utils import message_to_string from danswer.prompts.chat_prompts import INTERNET_SEARCH_QUERY_REPHRASE from danswer.prompts.constants import GENERAL_SEP_PAT from danswer.search.models import SearchDoc from danswer.secondary_llm_flows.query_expansion import history_based_query_rephrase -from danswer.tools.internet_search.models import InternetSearchResponse -from danswer.tools.internet_search.models import InternetSearchResult -from danswer.tools.search.search_tool import FINAL_CONTEXT_DOCUMENTS_ID +from danswer.tools.message import ToolCallSummary +from danswer.tools.models import ToolResponse from danswer.tools.tool import Tool -from danswer.tools.tool import ToolResponse +from danswer.tools.tool_implementations.internet_search.models import ( + InternetSearchResponse, +) +from danswer.tools.tool_implementations.internet_search.models import ( + InternetSearchResult, +) +from danswer.tools.tool_implementations.search_like_tool_utils import ( + build_next_prompt_for_search_like_tool, +) +from danswer.tools.tool_implementations.search_like_tool_utils import ( + FINAL_CONTEXT_DOCUMENTS_ID, +) from danswer.utils.logger import setup_logger logger = setup_logger() @@ -97,8 +110,17 @@ class InternetSearchTool(Tool): _DISPLAY_NAME = "[Beta] Internet Search Tool" _DESCRIPTION = "Perform an internet search for up-to-date information." - def __init__(self, api_key: str, num_results: int = 10) -> None: + def __init__( + self, + api_key: str, + answer_style_config: AnswerStyleConfig, + prompt_config: PromptConfig, + num_results: int = 10, + ) -> None: self.api_key = api_key + self.answer_style_config = answer_style_config + self.prompt_config = prompt_config + self.host = "https://api.bing.microsoft.com/v7.0" self.headers = { "Ocp-Apim-Subscription-Key": api_key, @@ -231,3 +253,19 @@ def run(self, **kwargs: str) -> Generator[ToolResponse, None, None]: def final_result(self, *args: ToolResponse) -> JSON_ro: search_response = cast(InternetSearchResponse, args[0].response) return search_response.model_dump() + + def build_next_prompt( + self, + prompt_builder: AnswerPromptBuilder, + tool_call_summary: ToolCallSummary, + tool_responses: list[ToolResponse], + using_tool_calling_llm: bool, + ) -> AnswerPromptBuilder: + return build_next_prompt_for_search_like_tool( + prompt_builder=prompt_builder, + tool_call_summary=tool_call_summary, + tool_responses=tool_responses, + using_tool_calling_llm=using_tool_calling_llm, + answer_style_config=self.answer_style_config, + prompt_config=self.prompt_config, + ) diff --git a/backend/danswer/tools/internet_search/models.py b/backend/danswer/tools/tool_implementations/internet_search/models.py similarity index 100% rename from backend/danswer/tools/internet_search/models.py rename to backend/danswer/tools/tool_implementations/internet_search/models.py diff --git a/backend/danswer/tools/search/search_tool.py b/backend/danswer/tools/tool_implementations/search/search_tool.py similarity index 86% rename from backend/danswer/tools/search/search_tool.py rename to backend/danswer/tools/tool_implementations/search/search_tool.py index cbfaf4f3d92..6eda3013ab3 100644 --- a/backend/danswer/tools/search/search_tool.py +++ b/backend/danswer/tools/tool_implementations/search/search_tool.py @@ -16,11 +16,14 @@ from danswer.configs.model_configs import GEN_AI_MODEL_FALLBACK_MAX_TOKENS from danswer.db.models import Persona from danswer.db.models import User -from danswer.dynamic_configs.interface import JSON_ro +from danswer.key_value_store.interface import JSON_ro +from danswer.llm.answering.llm_response_handler import LLMCall +from danswer.llm.answering.models import AnswerStyleConfig from danswer.llm.answering.models import ContextualPruningConfig from danswer.llm.answering.models import DocumentPruningConfig from danswer.llm.answering.models import PreviousMessage from danswer.llm.answering.models import PromptConfig +from danswer.llm.answering.prompts.build import AnswerPromptBuilder from danswer.llm.answering.prompts.citations_prompt import compute_max_llm_input_tokens from danswer.llm.answering.prune_and_merge import prune_and_merge_sections from danswer.llm.answering.prune_and_merge import prune_sections @@ -35,9 +38,16 @@ from danswer.search.pipeline import SearchPipeline from danswer.secondary_llm_flows.choose_search import check_if_need_search from danswer.secondary_llm_flows.query_expansion import history_based_query_rephrase -from danswer.tools.search.search_utils import llm_doc_to_dict +from danswer.tools.message import ToolCallSummary +from danswer.tools.models import ToolResponse from danswer.tools.tool import Tool -from danswer.tools.tool import ToolResponse +from danswer.tools.tool_implementations.search.search_utils import llm_doc_to_dict +from danswer.tools.tool_implementations.search_like_tool_utils import ( + build_next_prompt_for_search_like_tool, +) +from danswer.tools.tool_implementations.search_like_tool_utils import ( + FINAL_CONTEXT_DOCUMENTS_ID, +) from danswer.utils.logger import setup_logger logger = setup_logger() @@ -45,7 +55,6 @@ SEARCH_RESPONSE_SUMMARY_ID = "search_response_summary" SEARCH_DOC_CONTENT_ID = "search_doc_content" SECTION_RELEVANCE_LIST_ID = "section_relevance_list" -FINAL_CONTEXT_DOCUMENTS_ID = "final_context_documents" SEARCH_EVALUATION_ID = "llm_doc_eval" @@ -85,6 +94,7 @@ def __init__( llm: LLM, fast_llm: LLM, pruning_config: DocumentPruningConfig, + answer_style_config: AnswerStyleConfig, evaluation_type: LLMEvaluationType, # if specified, will not actually run a search and will instead return these # sections. Used when the user selects specific docs to talk to @@ -136,6 +146,7 @@ def __init__( num_chunk_multiple = self.chunks_above + self.chunks_below + 1 + self.answer_style_config = answer_style_config self.contextual_pruning_config = ( ContextualPruningConfig.from_doc_pruning_config( num_chunk_multiple=num_chunk_multiple, doc_pruning_config=pruning_config @@ -353,4 +364,36 @@ def final_result(self, *args: ToolResponse) -> JSON_ro: # NOTE: need to do this json.loads(doc.json()) stuff because there are some # subfields that are not serializable by default (datetime) # this forces pydantic to make them JSON serializable for us - return [json.loads(doc.json()) for doc in final_docs] + return [json.loads(doc.model_dump_json()) for doc in final_docs] + + def build_next_prompt( + self, + prompt_builder: AnswerPromptBuilder, + tool_call_summary: ToolCallSummary, + tool_responses: list[ToolResponse], + using_tool_calling_llm: bool, + ) -> AnswerPromptBuilder: + return build_next_prompt_for_search_like_tool( + prompt_builder=prompt_builder, + tool_call_summary=tool_call_summary, + tool_responses=tool_responses, + using_tool_calling_llm=using_tool_calling_llm, + answer_style_config=self.answer_style_config, + prompt_config=self.prompt_config, + ) + + """Other utility functions""" + + @classmethod + def get_search_result(cls, llm_call: LLMCall) -> list[LlmDoc] | None: + if not llm_call.tool_call_info: + return None + + for yield_item in llm_call.tool_call_info: + if ( + isinstance(yield_item, ToolResponse) + and yield_item.id == FINAL_CONTEXT_DOCUMENTS_ID + ): + return cast(list[LlmDoc], yield_item.response) + + return None diff --git a/backend/danswer/tools/search/search_utils.py b/backend/danswer/tools/tool_implementations/search/search_utils.py similarity index 100% rename from backend/danswer/tools/search/search_utils.py rename to backend/danswer/tools/tool_implementations/search/search_utils.py diff --git a/backend/danswer/tools/tool_implementations/search_like_tool_utils.py b/backend/danswer/tools/tool_implementations/search_like_tool_utils.py new file mode 100644 index 00000000000..6701f1602ea --- /dev/null +++ b/backend/danswer/tools/tool_implementations/search_like_tool_utils.py @@ -0,0 +1,71 @@ +from typing import cast + +from danswer.chat.models import LlmDoc +from danswer.llm.answering.models import AnswerStyleConfig +from danswer.llm.answering.models import PromptConfig +from danswer.llm.answering.prompts.build import AnswerPromptBuilder +from danswer.llm.answering.prompts.citations_prompt import ( + build_citations_system_message, +) +from danswer.llm.answering.prompts.citations_prompt import build_citations_user_message +from danswer.llm.answering.prompts.quotes_prompt import build_quotes_user_message +from danswer.tools.message import ToolCallSummary +from danswer.tools.models import ToolResponse + + +FINAL_CONTEXT_DOCUMENTS_ID = "final_context_documents" + + +def build_next_prompt_for_search_like_tool( + prompt_builder: AnswerPromptBuilder, + tool_call_summary: ToolCallSummary, + tool_responses: list[ToolResponse], + using_tool_calling_llm: bool, + answer_style_config: AnswerStyleConfig, + prompt_config: PromptConfig, +) -> AnswerPromptBuilder: + if not using_tool_calling_llm: + final_context_docs_response = next( + response + for response in tool_responses + if response.id == FINAL_CONTEXT_DOCUMENTS_ID + ) + final_context_documents = cast( + list[LlmDoc], final_context_docs_response.response + ) + else: + # if using tool calling llm, then the final context documents are the tool responses + final_context_documents = [] + + if answer_style_config.citation_config: + prompt_builder.update_system_prompt( + build_citations_system_message(prompt_config) + ) + prompt_builder.update_user_prompt( + build_citations_user_message( + message=prompt_builder.user_message_and_token_cnt[0], + prompt_config=prompt_config, + context_docs=final_context_documents, + all_doc_useful=( + answer_style_config.citation_config.all_docs_useful + if answer_style_config.citation_config + else False + ), + history_message=prompt_builder.single_message_history or "", + ) + ) + elif answer_style_config.quotes_config: + prompt_builder.update_user_prompt( + build_quotes_user_message( + message=prompt_builder.user_message_and_token_cnt[0], + context_docs=final_context_documents, + history_str=prompt_builder.single_message_history or "", + prompt=prompt_config, + ) + ) + + if using_tool_calling_llm: + prompt_builder.append_message(tool_call_summary.tool_call_request) + prompt_builder.append_message(tool_call_summary.tool_call_result) + + return prompt_builder diff --git a/backend/danswer/tools/tool_runner.py b/backend/danswer/tools/tool_runner.py index 58b94bdb0c8..fb3eb8b9932 100644 --- a/backend/danswer/tools/tool_runner.py +++ b/backend/danswer/tools/tool_runner.py @@ -6,8 +6,8 @@ from danswer.llm.interfaces import LLM from danswer.tools.models import ToolCallFinalResult from danswer.tools.models import ToolCallKickoff +from danswer.tools.models import ToolResponse from danswer.tools.tool import Tool -from danswer.tools.tool import ToolResponse from danswer.utils.threadpool_concurrency import run_functions_tuples_in_parallel diff --git a/backend/danswer/tools/utils.py b/backend/danswer/tools/utils.py index 9e20105edef..52d60feb912 100644 --- a/backend/danswer/tools/utils.py +++ b/backend/danswer/tools/utils.py @@ -1,5 +1,11 @@ import json +from sqlalchemy.orm import Session + +from danswer.configs.app_configs import AZURE_DALLE_API_KEY +from danswer.db.connector import check_connectors_exist +from danswer.db.document import check_docs_exist +from danswer.db.models import LLMProvider from danswer.natural_language_processing.utils import BaseTokenizer from danswer.tools.tool import Tool @@ -26,3 +32,18 @@ def compute_tool_tokens(tool: Tool, llm_tokenizer: BaseTokenizer) -> int: def compute_all_tool_tokens(tools: list[Tool], llm_tokenizer: BaseTokenizer) -> int: return sum(compute_tool_tokens(tool, llm_tokenizer) for tool in tools) + + +def is_image_generation_available(db_session: Session) -> bool: + providers = db_session.query(LLMProvider).all() + for provider in providers: + if provider.provider == "openai": + return True + + return bool(AZURE_DALLE_API_KEY) + + +def is_document_search_available(db_session: Session) -> bool: + docs_exist = check_docs_exist(db_session) + connectors_exist = check_connectors_exist(db_session) + return docs_exist or connectors_exist diff --git a/backend/danswer/utils/headers.py b/backend/danswer/utils/headers.py new file mode 100644 index 00000000000..5ccf61a51e1 --- /dev/null +++ b/backend/danswer/utils/headers.py @@ -0,0 +1,79 @@ +from typing import TypedDict + +from fastapi.datastructures import Headers + +from danswer.configs.model_configs import LITELLM_EXTRA_HEADERS +from danswer.configs.model_configs import LITELLM_PASS_THROUGH_HEADERS +from danswer.configs.tool_configs import CUSTOM_TOOL_PASS_THROUGH_HEADERS +from danswer.utils.logger import setup_logger + +logger = setup_logger() + + +class HeaderItemDict(TypedDict): + key: str + value: str + + +def clean_header_list(headers_to_clean: list[HeaderItemDict]) -> dict[str, str]: + cleaned_headers: dict[str, str] = {} + for item in headers_to_clean: + key = item["key"] + value = item["value"] + if key in cleaned_headers: + logger.warning( + f"Duplicate header {key} found in custom headers, ignoring..." + ) + continue + cleaned_headers[key] = value + return cleaned_headers + + +def header_dict_to_header_list(header_dict: dict[str, str]) -> list[HeaderItemDict]: + return [{"key": key, "value": value} for key, value in header_dict.items()] + + +def header_list_to_header_dict(header_list: list[HeaderItemDict]) -> dict[str, str]: + return {header["key"]: header["value"] for header in header_list} + + +def get_relevant_headers( + headers: dict[str, str] | Headers, desired_headers: list[str] | None +) -> dict[str, str]: + if not desired_headers: + return {} + + pass_through_headers: dict[str, str] = {} + for key in desired_headers: + if key in headers: + pass_through_headers[key] = headers[key] + else: + # fastapi makes all header keys lowercase, handling that here + lowercase_key = key.lower() + if lowercase_key in headers: + pass_through_headers[lowercase_key] = headers[lowercase_key] + + return pass_through_headers + + +def get_litellm_additional_request_headers( + headers: dict[str, str] | Headers +) -> dict[str, str]: + return get_relevant_headers(headers, LITELLM_PASS_THROUGH_HEADERS) + + +def build_llm_extra_headers( + additional_headers: dict[str, str] | None = None +) -> dict[str, str]: + extra_headers: dict[str, str] = {} + if additional_headers: + extra_headers.update(additional_headers) + if LITELLM_EXTRA_HEADERS: + extra_headers.update(LITELLM_EXTRA_HEADERS) + return extra_headers + + +def get_custom_tool_additional_request_headers( + headers: dict[str, str] | Headers +) -> dict[str, str]: + return get_relevant_headers(headers, CUSTOM_TOOL_PASS_THROUGH_HEADERS) diff --git a/backend/danswer/utils/logger.py b/backend/danswer/utils/logger.py index 96d4ae2a25e..bd784513898 100644 --- a/backend/danswer/utils/logger.py +++ b/backend/danswer/utils/logger.py @@ -1,3 +1,4 @@ +import contextvars import logging import os from collections.abc import MutableMapping @@ -7,11 +8,19 @@ from shared_configs.configs import DEV_LOGGING_ENABLED from shared_configs.configs import LOG_FILE_NAME from shared_configs.configs import LOG_LEVEL +from shared_configs.configs import MULTI_TENANT +from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA from shared_configs.configs import SLACK_CHANNEL_ID +from shared_configs.configs import TENANT_ID_PREFIX +from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR logging.addLevelName(logging.INFO + 5, "NOTICE") +pruning_ctx: contextvars.ContextVar[dict[str, Any]] = contextvars.ContextVar( + "pruning_ctx", default=dict() +) + class IndexAttemptSingleton: """Used to tell if this process is an indexing job, and if so what is the @@ -57,14 +66,34 @@ def process( ) -> tuple[str, MutableMapping[str, Any]]: # If this is an indexing job, add the attempt ID to the log message # This helps filter the logs for this specific indexing - attempt_id = IndexAttemptSingleton.get_index_attempt_id() + index_attempt_id = IndexAttemptSingleton.get_index_attempt_id() cc_pair_id = IndexAttemptSingleton.get_connector_credential_pair_id() - if attempt_id is not None: - msg = f"[Attempt ID: {attempt_id}] {msg}" - - if cc_pair_id is not None: - msg = f"[CC Pair ID: {cc_pair_id}] {msg}" + pruning_ctx_dict = pruning_ctx.get() + if len(pruning_ctx_dict) > 0: + if "request_id" in pruning_ctx_dict: + msg = f"[Prune: {pruning_ctx_dict['request_id']}] {msg}" + + if "cc_pair_id" in pruning_ctx_dict: + msg = f"[CC Pair: {pruning_ctx_dict['cc_pair_id']}] {msg}" + else: + if index_attempt_id is not None: + msg = f"[Index Attempt: {index_attempt_id}] {msg}" + + if cc_pair_id is not None: + msg = f"[CC Pair: {cc_pair_id}] {msg}" + + # Add tenant information if it differs from default + # This will always be the case for authenticated API requests + if MULTI_TENANT: + tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get() + if tenant_id != POSTGRES_DEFAULT_SCHEMA: + # Strip tenant_ prefix and take first 8 chars for cleaner logs + tenant_display = tenant_id.removeprefix(TENANT_ID_PREFIX) + short_tenant = ( + tenant_display[:8] if len(tenant_display) > 8 else tenant_display + ) + msg = f"[t:{short_tenant}] {msg}" # For Slack Bot, logs the channel relevant to the request channel_id = self.extra.get(SLACK_CHANNEL_ID) if self.extra else None @@ -182,3 +211,25 @@ def setup_logger( logger.notice = lambda msg, *args, **kwargs: logger.log(logging.getLevelName("NOTICE"), msg, *args, **kwargs) # type: ignore return DanswerLoggingAdapter(logger, extra=extra) + + +def print_loggers() -> None: + """Print information about all loggers. Use to debug logging issues.""" + root_logger = logging.getLogger() + loggers: list[logging.Logger | logging.PlaceHolder] = [root_logger] + loggers.extend(logging.Logger.manager.loggerDict.values()) + + for logger in loggers: + if isinstance(logger, logging.PlaceHolder): + # Skip placeholders that aren't actual loggers + continue + + print(f"Logger: '{logger.name}' (Level: {logging.getLevelName(logger.level)})") + if logger.handlers: + for handler in logger.handlers: + print(f" Handler: {handler}") + else: + print(" No handlers") + + print(f" Propagate: {logger.propagate}") + print() diff --git a/backend/danswer/connectors/cross_connector_utils/retry_wrapper.py b/backend/danswer/utils/retry_wrapper.py similarity index 79% rename from backend/danswer/connectors/cross_connector_utils/retry_wrapper.py rename to backend/danswer/utils/retry_wrapper.py index 7312d1349f7..2d6d79ca5eb 100644 --- a/backend/danswer/connectors/cross_connector_utils/retry_wrapper.py +++ b/backend/danswer/utils/retry_wrapper.py @@ -22,18 +22,18 @@ def retry_builder( jitter: tuple[float, float] | float = 1, ) -> Callable[[F], F]: """Builds a generic wrapper/decorator for calls to external APIs that - may fail due to rate limiting, flakes, or other reasons. Applies expontential + may fail due to rate limiting, flakes, or other reasons. Applies exponential backoff with jitter to retry the call.""" - @retry( - tries=tries, - delay=delay, - max_delay=max_delay, - backoff=backoff, - jitter=jitter, - logger=cast(Logger, logger), - ) def retry_with_default(func: F) -> F: + @retry( + tries=tries, + delay=delay, + max_delay=max_delay, + backoff=backoff, + jitter=jitter, + logger=cast(Logger, logger), + ) def wrapped_func(*args: list, **kwargs: dict[str, Any]) -> Any: return func(*args, **kwargs) diff --git a/backend/danswer/utils/sitemap.py b/backend/danswer/utils/sitemap.py index ababbec4575..551b2bb3bf0 100644 --- a/backend/danswer/utils/sitemap.py +++ b/backend/danswer/utils/sitemap.py @@ -1,39 +1,78 @@ -from datetime import datetime -from urllib import robotparser +import re +import xml.etree.ElementTree as ET +from typing import Set +from urllib.parse import urljoin -from usp.tree import sitemap_tree_for_homepage # type: ignore +import requests from danswer.utils.logger import setup_logger logger = setup_logger() -def test_url(rp: robotparser.RobotFileParser | None, url: str) -> bool: - if not rp: - return True - else: - return rp.can_fetch("*", url) +def _get_sitemap_locations_from_robots(base_url: str) -> Set[str]: + """Extract sitemap URLs from robots.txt""" + sitemap_urls: set = set() + try: + robots_url = urljoin(base_url, "/robots.txt") + resp = requests.get(robots_url, timeout=10) + if resp.status_code == 200: + for line in resp.text.splitlines(): + if line.lower().startswith("sitemap:"): + sitemap_url = line.split(":", 1)[1].strip() + sitemap_urls.add(sitemap_url) + except Exception as e: + logger.warning(f"Error fetching robots.txt: {e}") + return sitemap_urls + + +def _extract_urls_from_sitemap(sitemap_url: str) -> Set[str]: + """Extract URLs from a sitemap XML file""" + urls: set[str] = set() + try: + resp = requests.get(sitemap_url, timeout=10) + if resp.status_code != 200: + return urls + root = ET.fromstring(resp.content) -def init_robots_txt(site: str) -> robotparser.RobotFileParser: - ts = datetime.now().timestamp() - robots_url = f"{site}/robots.txt?ts={ts}" - rp = robotparser.RobotFileParser() - rp.set_url(robots_url) - rp.read() - return rp + # Handle both regular sitemaps and sitemap indexes + # Remove namespace for easier parsing + namespace = re.match(r"\{.*\}", root.tag) + ns = namespace.group(0) if namespace else "" + + if root.tag == f"{ns}sitemapindex": + # This is a sitemap index + for sitemap in root.findall(f".//{ns}loc"): + if sitemap.text: + sub_urls = _extract_urls_from_sitemap(sitemap.text) + urls.update(sub_urls) + else: + # This is a regular sitemap + for url in root.findall(f".//{ns}loc"): + if url.text: + urls.add(url.text) + + except Exception as e: + logger.warning(f"Error processing sitemap {sitemap_url}: {e}") + + return urls def list_pages_for_site(site: str) -> list[str]: - rp: robotparser.RobotFileParser | None = None - try: - rp = init_robots_txt(site) - except Exception: - logger.warning("Failed to load robots.txt") + """Get list of pages from a site's sitemaps""" + site = site.rstrip("/") + all_urls = set() - tree = sitemap_tree_for_homepage(site) + # Try both common sitemap locations + sitemap_paths = ["/sitemap.xml", "/sitemap_index.xml"] + for path in sitemap_paths: + sitemap_url = urljoin(site, path) + all_urls.update(_extract_urls_from_sitemap(sitemap_url)) - pages = [page.url for page in tree.all_pages() if test_url(rp, page.url)] - pages = list(dict.fromkeys(pages)) + # Check robots.txt for additional sitemaps + sitemap_locations = _get_sitemap_locations_from_robots(site) + for sitemap_url in sitemap_locations: + all_urls.update(_extract_urls_from_sitemap(sitemap_url)) - return pages + return list(all_urls) diff --git a/backend/danswer/utils/telemetry.py b/backend/danswer/utils/telemetry.py index d8a021877e6..f5fb23ef86f 100644 --- a/backend/danswer/utils/telemetry.py +++ b/backend/danswer/utils/telemetry.py @@ -12,8 +12,8 @@ from danswer.configs.constants import KV_INSTANCE_DOMAIN_KEY from danswer.db.engine import get_sqlalchemy_engine from danswer.db.models import User -from danswer.dynamic_configs.factory import get_dynamic_config_store -from danswer.dynamic_configs.interface import ConfigNotFoundError +from danswer.key_value_store.factory import get_kv_store +from danswer.key_value_store.interface import KvKeyNotFoundError _DANSWER_TELEMETRY_ENDPOINT = "https://telemetry.danswer.ai/anonymous_telemetry" _CACHED_UUID: str | None = None @@ -34,11 +34,11 @@ def get_or_generate_uuid() -> str: if _CACHED_UUID is not None: return _CACHED_UUID - kv_store = get_dynamic_config_store() + kv_store = get_kv_store() try: _CACHED_UUID = cast(str, kv_store.load(KV_CUSTOMER_UUID_KEY)) - except ConfigNotFoundError: + except KvKeyNotFoundError: _CACHED_UUID = str(uuid.uuid4()) kv_store.store(KV_CUSTOMER_UUID_KEY, _CACHED_UUID, encrypt=True) @@ -51,11 +51,11 @@ def _get_or_generate_instance_domain() -> str | None: if _CACHED_INSTANCE_DOMAIN is not None: return _CACHED_INSTANCE_DOMAIN - kv_store = get_dynamic_config_store() + kv_store = get_kv_store() try: _CACHED_INSTANCE_DOMAIN = cast(str, kv_store.load(KV_INSTANCE_DOMAIN_KEY)) - except ConfigNotFoundError: + except KvKeyNotFoundError: with Session(get_sqlalchemy_engine()) as db_session: first_user = db_session.query(User).first() if first_user: diff --git a/backend/danswer/utils/variable_functionality.py b/backend/danswer/utils/variable_functionality.py index 55f296aa8e7..dfe6def2a56 100644 --- a/backend/danswer/utils/variable_functionality.py +++ b/backend/danswer/utils/variable_functionality.py @@ -16,7 +16,7 @@ def __init__(self) -> None: def set_ee(self) -> None: self._is_ee = True - def get_is_ee_version(self) -> bool: + def is_ee_version(self) -> bool: return self._is_ee @@ -24,7 +24,7 @@ def get_is_ee_version(self) -> bool: def set_is_ee_based_on_env_variable() -> None: - if ENTERPRISE_EDITION_ENABLED and not global_version.get_is_ee_version(): + if ENTERPRISE_EDITION_ENABLED and not global_version.is_ee_version(): logger.notice("Enterprise Edition enabled") global_version.set_ee() @@ -54,7 +54,7 @@ def fetch_versioned_implementation(module: str, attribute: str) -> Any: implementation cannot be found or loaded. """ logger.debug("Fetching versioned implementation for %s.%s", module, attribute) - is_ee = global_version.get_is_ee_version() + is_ee = global_version.is_ee_version() module_full = f"ee.{module}" if is_ee else module try: diff --git a/backend/ee/danswer/auth/api_key.py b/backend/ee/danswer/auth/api_key.py index d4f99d13891..9ea827d27dc 100644 --- a/backend/ee/danswer/auth/api_key.py +++ b/backend/ee/danswer/auth/api_key.py @@ -1,5 +1,7 @@ import secrets import uuid +from urllib.parse import quote +from urllib.parse import unquote from fastapi import Request from passlib.hash import sha256_crypt @@ -10,6 +12,11 @@ _API_KEY_HEADER_NAME = "Authorization" +# NOTE for others who are curious: In the context of a header, "X-" often refers +# to non-standard, experimental, or custom headers in HTTP or other protocols. It +# indicates that the header is not part of the official standards defined by +# organizations like the Internet Engineering Task Force (IETF). +_API_KEY_HEADER_ALTERNATIVE_NAME = "X-Danswer-Authorization" _BEARER_PREFIX = "Bearer " _API_KEY_PREFIX = "dn_" _API_KEY_LEN = 192 @@ -25,8 +32,35 @@ class ApiKeyDescriptor(BaseModel): user_id: uuid.UUID -def generate_api_key() -> str: - return _API_KEY_PREFIX + secrets.token_urlsafe(_API_KEY_LEN) +def generate_api_key(tenant_id: str | None = None) -> str: + # For backwards compatibility, if no tenant_id, generate old style key + if not tenant_id: + return _API_KEY_PREFIX + secrets.token_urlsafe(_API_KEY_LEN) + + encoded_tenant = quote(tenant_id) # URL encode the tenant ID + return f"{_API_KEY_PREFIX}{encoded_tenant}.{secrets.token_urlsafe(_API_KEY_LEN)}" + + +def extract_tenant_from_api_key_header(request: Request) -> str | None: + """Extract tenant ID from request. Returns None if auth is disabled or invalid format.""" + raw_api_key_header = request.headers.get( + _API_KEY_HEADER_ALTERNATIVE_NAME + ) or request.headers.get(_API_KEY_HEADER_NAME) + + if not raw_api_key_header or not raw_api_key_header.startswith(_BEARER_PREFIX): + return None + + api_key = raw_api_key_header[len(_BEARER_PREFIX) :].strip() + + if not api_key.startswith(_API_KEY_PREFIX): + return None + + parts = api_key[len(_API_KEY_PREFIX) :].split(".", 1) + if len(parts) != 2: + return None + + tenant_id = parts[0] + return unquote(tenant_id) if tenant_id else None def hash_api_key(api_key: str) -> str: @@ -43,7 +77,9 @@ def build_displayable_api_key(api_key: str) -> str: def get_hashed_api_key_from_request(request: Request) -> str | None: - raw_api_key_header = request.headers.get(_API_KEY_HEADER_NAME) + raw_api_key_header = request.headers.get( + _API_KEY_HEADER_ALTERNATIVE_NAME + ) or request.headers.get(_API_KEY_HEADER_NAME) if raw_api_key_header is None: return None diff --git a/backend/ee/danswer/auth/users.py b/backend/ee/danswer/auth/users.py index 18dff6ab064..1ad384555c1 100644 --- a/backend/ee/danswer/auth/users.py +++ b/backend/ee/danswer/auth/users.py @@ -1,9 +1,13 @@ from fastapi import Depends from fastapi import HTTPException from fastapi import Request +from fastapi import status from sqlalchemy.orm import Session +from danswer.auth.users import current_admin_user from danswer.configs.app_configs import AUTH_TYPE +from danswer.configs.app_configs import SUPER_CLOUD_API_KEY +from danswer.configs.app_configs import SUPER_USERS from danswer.configs.constants import AuthType from danswer.db.engine import get_session from danswer.db.models import User @@ -68,3 +72,19 @@ def get_default_admin_user_emails_() -> list[str]: if seed_config and seed_config.admin_user_emails: return seed_config.admin_user_emails return [] + + +async def current_cloud_superuser( + request: Request, + user: User | None = Depends(current_admin_user), +) -> User | None: + api_key = request.headers.get("Authorization", "").replace("Bearer ", "") + if api_key != SUPER_CLOUD_API_KEY: + raise HTTPException(status_code=401, detail="Invalid API key") + + if user and user.email not in SUPER_USERS: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="Access denied. User must be a cloud superuser to perform this action.", + ) + return user diff --git a/backend/ee/danswer/background/celery/celery_app.py b/backend/ee/danswer/background/celery/apps/primary.py similarity index 65% rename from backend/ee/danswer/background/celery/celery_app.py rename to backend/ee/danswer/background/celery/apps/primary.py index 5dd0f72009f..fecc21b58ef 100644 --- a/backend/ee/danswer/background/celery/celery_app.py +++ b/backend/ee/danswer/background/celery/apps/primary.py @@ -1,12 +1,8 @@ -from datetime import timedelta - -from sqlalchemy.orm import Session - -from danswer.background.celery.celery_app import celery_app +from danswer.background.celery.apps.primary import celery_app from danswer.background.task_utils import build_celery_task_wrapper from danswer.configs.app_configs import JOB_TIMEOUT from danswer.db.chat import delete_chat_sessions_older_than -from danswer.db.engine import get_sqlalchemy_engine +from danswer.db.engine import get_session_with_tenant from danswer.server.settings.store import load_settings from danswer.utils.logger import setup_logger from danswer.utils.variable_functionality import global_version @@ -32,6 +28,8 @@ run_external_group_permission_sync, ) from ee.danswer.server.reporting.usage_export_generation import create_new_usage_report +from shared_configs.configs import MULTI_TENANT +from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR logger = setup_logger() @@ -41,22 +39,28 @@ @build_celery_task_wrapper(name_sync_external_doc_permissions_task) @celery_app.task(soft_time_limit=JOB_TIMEOUT) -def sync_external_doc_permissions_task(cc_pair_id: int) -> None: - with Session(get_sqlalchemy_engine()) as db_session: +def sync_external_doc_permissions_task( + cc_pair_id: int, *, tenant_id: str | None +) -> None: + with get_session_with_tenant(tenant_id) as db_session: run_external_doc_permission_sync(db_session=db_session, cc_pair_id=cc_pair_id) @build_celery_task_wrapper(name_sync_external_group_permissions_task) @celery_app.task(soft_time_limit=JOB_TIMEOUT) -def sync_external_group_permissions_task(cc_pair_id: int) -> None: - with Session(get_sqlalchemy_engine()) as db_session: +def sync_external_group_permissions_task( + cc_pair_id: int, *, tenant_id: str | None +) -> None: + with get_session_with_tenant(tenant_id) as db_session: run_external_group_permission_sync(db_session=db_session, cc_pair_id=cc_pair_id) @build_celery_task_wrapper(name_chat_ttl_task) @celery_app.task(soft_time_limit=JOB_TIMEOUT) -def perform_ttl_management_task(retention_limit_days: int) -> None: - with Session(get_sqlalchemy_engine()) as db_session: +def perform_ttl_management_task( + retention_limit_days: int, *, tenant_id: str | None +) -> None: + with get_session_with_tenant(tenant_id) as db_session: delete_chat_sessions_older_than(retention_limit_days, db_session) @@ -67,16 +71,16 @@ def perform_ttl_management_task(retention_limit_days: int) -> None: name="check_sync_external_doc_permissions_task", soft_time_limit=JOB_TIMEOUT, ) -def check_sync_external_doc_permissions_task() -> None: +def check_sync_external_doc_permissions_task(*, tenant_id: str | None) -> None: """Runs periodically to sync external permissions""" - with Session(get_sqlalchemy_engine()) as db_session: + with get_session_with_tenant(tenant_id) as db_session: cc_pairs = get_all_auto_sync_cc_pairs(db_session) for cc_pair in cc_pairs: if should_perform_external_doc_permissions_check( cc_pair=cc_pair, db_session=db_session ): sync_external_doc_permissions_task.apply_async( - kwargs=dict(cc_pair_id=cc_pair.id), + kwargs=dict(cc_pair_id=cc_pair.id, tenant_id=tenant_id), ) @@ -84,16 +88,16 @@ def check_sync_external_doc_permissions_task() -> None: name="check_sync_external_group_permissions_task", soft_time_limit=JOB_TIMEOUT, ) -def check_sync_external_group_permissions_task() -> None: +def check_sync_external_group_permissions_task(*, tenant_id: str | None) -> None: """Runs periodically to sync external group permissions""" - with Session(get_sqlalchemy_engine()) as db_session: + with get_session_with_tenant(tenant_id) as db_session: cc_pairs = get_all_auto_sync_cc_pairs(db_session) for cc_pair in cc_pairs: if should_perform_external_group_permissions_check( cc_pair=cc_pair, db_session=db_session ): sync_external_group_permissions_task.apply_async( - kwargs=dict(cc_pair_id=cc_pair.id), + kwargs=dict(cc_pair_id=cc_pair.id, tenant_id=tenant_id), ) @@ -101,51 +105,35 @@ def check_sync_external_group_permissions_task() -> None: name="check_ttl_management_task", soft_time_limit=JOB_TIMEOUT, ) -def check_ttl_management_task() -> None: +def check_ttl_management_task(*, tenant_id: str | None) -> None: """Runs periodically to check if any ttl tasks should be run and adds them to the queue""" + token = None + if MULTI_TENANT and tenant_id is not None: + token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id) + settings = load_settings() retention_limit_days = settings.maximum_chat_retention_days - with Session(get_sqlalchemy_engine()) as db_session: + with get_session_with_tenant(tenant_id) as db_session: if should_perform_chat_ttl_check(retention_limit_days, db_session): perform_ttl_management_task.apply_async( - kwargs=dict(retention_limit_days=retention_limit_days), + kwargs=dict( + retention_limit_days=retention_limit_days, tenant_id=tenant_id + ), ) + if token is not None: + CURRENT_TENANT_ID_CONTEXTVAR.reset(token) @celery_app.task( name="autogenerate_usage_report_task", soft_time_limit=JOB_TIMEOUT, ) -def autogenerate_usage_report_task() -> None: +def autogenerate_usage_report_task(*, tenant_id: str | None) -> None: """This generates usage report under the /admin/generate-usage/report endpoint""" - with Session(get_sqlalchemy_engine()) as db_session: + with get_session_with_tenant(tenant_id) as db_session: create_new_usage_report( db_session=db_session, user_id=None, period=None, ) - - -##### -# Celery Beat (Periodic Tasks) Settings -##### -celery_app.conf.beat_schedule = { - "sync-external-doc-permissions": { - "task": "check_sync_external_doc_permissions_task", - "schedule": timedelta(seconds=5), # TODO: optimize this - }, - "sync-external-group-permissions": { - "task": "check_sync_external_group_permissions_task", - "schedule": timedelta(seconds=5), # TODO: optimize this - }, - "autogenerate_usage_report": { - "task": "autogenerate_usage_report_task", - "schedule": timedelta(days=30), # TODO: change this to config flag - }, - "check-ttl-management": { - "task": "check_ttl_management_task", - "schedule": timedelta(hours=1), - }, - **(celery_app.conf.beat_schedule or {}), -} diff --git a/backend/ee/danswer/background/celery/tasks/beat_schedule.py b/backend/ee/danswer/background/celery/tasks/beat_schedule.py new file mode 100644 index 00000000000..05e2b92bde1 --- /dev/null +++ b/backend/ee/danswer/background/celery/tasks/beat_schedule.py @@ -0,0 +1,33 @@ +from datetime import timedelta +from typing import Any + +from danswer.background.celery.tasks.beat_schedule import ( + tasks_to_schedule as base_tasks_to_schedule, +) + +ee_tasks_to_schedule = [ + { + "name": "sync-external-doc-permissions", + "task": "check_sync_external_doc_permissions_task", + "schedule": timedelta(seconds=30), # TODO: optimize this + }, + { + "name": "sync-external-group-permissions", + "task": "check_sync_external_group_permissions_task", + "schedule": timedelta(seconds=60), # TODO: optimize this + }, + { + "name": "autogenerate_usage_report", + "task": "autogenerate_usage_report_task", + "schedule": timedelta(days=30), # TODO: change this to config flag + }, + { + "name": "check-ttl-management", + "task": "check_ttl_management_task", + "schedule": timedelta(hours=1), + }, +] + + +def get_tasks_to_schedule() -> list[dict[str, Any]]: + return ee_tasks_to_schedule + base_tasks_to_schedule diff --git a/backend/ee/danswer/background/celery/tasks/vespa/tasks.py b/backend/ee/danswer/background/celery/tasks/vespa/tasks.py index d194b2ef9a9..84d67f0a3ac 100644 --- a/backend/ee/danswer/background/celery/tasks/vespa/tasks.py +++ b/backend/ee/danswer/background/celery/tasks/vespa/tasks.py @@ -3,8 +3,8 @@ from redis import Redis from sqlalchemy.orm import Session -from danswer.background.celery.celery_app import task_logger -from danswer.background.celery.celery_redis import RedisUserGroup +from danswer.background.celery.apps.app_base import task_logger +from danswer.redis.redis_usergroup import RedisUserGroup from danswer.utils.logger import setup_logger from ee.danswer.db.user_group import delete_user_group from ee.danswer.db.user_group import fetch_user_group @@ -13,28 +13,33 @@ logger = setup_logger() -def monitor_usergroup_taskset(key_bytes: bytes, r: Redis, db_session: Session) -> None: +def monitor_usergroup_taskset( + tenant_id: str | None, key_bytes: bytes, r: Redis, db_session: Session +) -> None: """This function is likely to move in the worker refactor happening next.""" - key = key_bytes.decode("utf-8") - usergroup_id = RedisUserGroup.get_id_from_fence_key(key) - if not usergroup_id: - task_logger.warning("Could not parse usergroup id from {key}") - return - - rug = RedisUserGroup(usergroup_id) - fence_value = r.get(rug.fence_key) - if fence_value is None: + fence_key = key_bytes.decode("utf-8") + usergroup_id_str = RedisUserGroup.get_id_from_fence_key(fence_key) + if not usergroup_id_str: + task_logger.warning(f"Could not parse usergroup id from {fence_key}") return try: - initial_count = int(cast(int, fence_value)) + usergroup_id = int(usergroup_id_str) except ValueError: - task_logger.error("The value is not an integer.") + task_logger.exception(f"usergroup_id ({usergroup_id_str}) is not an integer!") + raise + + rug = RedisUserGroup(tenant_id, usergroup_id) + if not rug.fenced: + return + + initial_count = rug.payload + if initial_count is None: return count = cast(int, r.scard(rug.taskset_key)) task_logger.info( - f"User group sync: usergroup_id={usergroup_id} remaining={count} initial={initial_count}" + f"User group sync progress: usergroup_id={usergroup_id} remaining={count} initial={initial_count}" ) if count > 0: return @@ -48,5 +53,4 @@ def monitor_usergroup_taskset(key_bytes: bytes, r: Redis, db_session: Session) - mark_user_group_as_synced(db_session=db_session, user_group=user_group) task_logger.info(f"Synced usergroup. id='{usergroup_id}'") - r.delete(rug.taskset_key) - r.delete(rug.fence_key) + rug.reset() diff --git a/backend/ee/danswer/background/celery_utils.py b/backend/ee/danswer/background/celery_utils.py index c42812f81c3..80278d8c433 100644 --- a/backend/ee/danswer/background/celery_utils.py +++ b/backend/ee/danswer/background/celery_utils.py @@ -1,3 +1,6 @@ +from datetime import datetime +from datetime import timezone + from sqlalchemy.orm import Session from danswer.db.enums import AccessType @@ -12,10 +15,32 @@ from ee.danswer.background.task_name_builders import ( name_sync_external_group_permissions_task, ) +from ee.danswer.external_permissions.sync_params import PERMISSION_SYNC_PERIODS logger = setup_logger() +def _is_time_to_run_sync(cc_pair: ConnectorCredentialPair) -> bool: + source_sync_period = PERMISSION_SYNC_PERIODS.get(cc_pair.connector.source) + + # If RESTRICTED_FETCH_PERIOD[source] is None, we always run the sync. + if not source_sync_period: + return True + + # If the last sync is None, it has never been run so we run the sync + if cc_pair.last_time_perm_sync is None: + return True + + last_sync = cc_pair.last_time_perm_sync.replace(tzinfo=timezone.utc) + current_time = datetime.now(timezone.utc) + + # If the last sync is greater than the full fetch period, we run the sync + if (current_time - last_sync).total_seconds() > source_sync_period: + return True + + return False + + def should_perform_chat_ttl_check( retention_limit_days: int | None, db_session: Session ) -> bool: @@ -28,7 +53,7 @@ def should_perform_chat_ttl_check( if not latest_task: return True - if latest_task and check_task_is_live_and_not_timed_out(latest_task, db_session): + if check_task_is_live_and_not_timed_out(latest_task, db_session): logger.debug(f"{task_name} is already being performed. Skipping.") return False return True @@ -50,6 +75,9 @@ def should_perform_external_doc_permissions_check( logger.debug(f"{task_name} is already being performed. Skipping.") return False + if not _is_time_to_run_sync(cc_pair): + return False + return True @@ -69,4 +97,7 @@ def should_perform_external_group_permissions_check( logger.debug(f"{task_name} is already being performed. Skipping.") return False + if not _is_time_to_run_sync(cc_pair): + return False + return True diff --git a/backend/ee/danswer/background/task_name_builders.py b/backend/ee/danswer/background/task_name_builders.py index c494329d366..aea6648a02d 100644 --- a/backend/ee/danswer/background/task_name_builders.py +++ b/backend/ee/danswer/background/task_name_builders.py @@ -1,10 +1,14 @@ -def name_chat_ttl_task(retention_limit_days: int) -> str: +def name_chat_ttl_task(retention_limit_days: int, tenant_id: str | None = None) -> str: return f"chat_ttl_{retention_limit_days}_days" -def name_sync_external_doc_permissions_task(cc_pair_id: int) -> str: +def name_sync_external_doc_permissions_task( + cc_pair_id: int, tenant_id: str | None = None +) -> str: return f"sync_external_doc_permissions_task__{cc_pair_id}" -def name_sync_external_group_permissions_task(cc_pair_id: int) -> str: +def name_sync_external_group_permissions_task( + cc_pair_id: int, tenant_id: str | None = None +) -> str: return f"sync_external_group_permissions_task__{cc_pair_id}" diff --git a/backend/ee/danswer/configs/app_configs.py b/backend/ee/danswer/configs/app_configs.py index 1430a499136..d36cef3daf8 100644 --- a/backend/ee/danswer/configs/app_configs.py +++ b/backend/ee/danswer/configs/app_configs.py @@ -21,3 +21,11 @@ # Auto Permission Sync ##### NUM_PERMISSION_WORKERS = int(os.environ.get("NUM_PERMISSION_WORKERS") or 2) + + +STRIPE_SECRET_KEY = os.environ.get("STRIPE_SECRET_KEY") +STRIPE_PRICE_ID = os.environ.get("STRIPE_PRICE") + +OPENAI_DEFAULT_API_KEY = os.environ.get("OPENAI_DEFAULT_API_KEY") +ANTHROPIC_DEFAULT_API_KEY = os.environ.get("ANTHROPIC_DEFAULT_API_KEY") +COHERE_DEFAULT_API_KEY = os.environ.get("COHERE_DEFAULT_API_KEY") diff --git a/backend/ee/danswer/db/api_key.py b/backend/ee/danswer/db/api_key.py index c38f32a0f84..db876067bd1 100644 --- a/backend/ee/danswer/db/api_key.py +++ b/backend/ee/danswer/db/api_key.py @@ -15,10 +15,16 @@ from ee.danswer.auth.api_key import generate_api_key from ee.danswer.auth.api_key import hash_api_key from ee.danswer.server.api_key.models import APIKeyArgs +from shared_configs.configs import MULTI_TENANT +from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR + + +def get_api_key_email_pattern() -> str: + return DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN def is_api_key_email_address(email: str) -> bool: - return email.endswith(f"{DANSWER_API_KEY_DUMMY_EMAIL_DOMAIN}") + return email.endswith(get_api_key_email_pattern()) def fetch_api_keys(db_session: Session) -> list[ApiKeyDescriptor]: @@ -60,7 +66,11 @@ def insert_api_key( db_session: Session, api_key_args: APIKeyArgs, user_id: uuid.UUID | None ) -> ApiKeyDescriptor: std_password_helper = PasswordHelper() - api_key = generate_api_key() + + # Get tenant_id from context var (will be default schema for single tenant) + tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get() + + api_key = generate_api_key(tenant_id if MULTI_TENANT else None) api_key_user_id = uuid.uuid4() display_name = api_key_args.name or UNNAMED_KEY_PLACEHOLDER diff --git a/backend/ee/danswer/db/query_history.py b/backend/ee/danswer/db/query_history.py index 868afef23ce..b6a79cb7727 100644 --- a/backend/ee/danswer/db/query_history.py +++ b/backend/ee/danswer/db/query_history.py @@ -29,6 +29,7 @@ def fetch_chat_sessions_eagerly_by_time( filters: list[ColumnElement | BinaryExpression] = [ ChatSession.time_created.between(start, end) ] + if initial_id: filters.append(ChatSession.id < initial_id) subquery = ( diff --git a/backend/ee/danswer/db/usage_export.py b/backend/ee/danswer/db/usage_export.py index bf53362e97e..ac9535bad4f 100644 --- a/backend/ee/danswer/db/usage_export.py +++ b/backend/ee/danswer/db/usage_export.py @@ -42,7 +42,7 @@ def get_empty_chat_messages_entries__paginated( message_skeletons.append( ChatMessageSkeleton( - message_id=chat_session.id, + message_id=message.id, chat_session_id=chat_session.id, user_id=str(chat_session.user_id) if chat_session.user_id else None, flow_type=flow_type, diff --git a/backend/ee/danswer/db/user_group.py b/backend/ee/danswer/db/user_group.py index 863b9170e3f..470a46e688b 100644 --- a/backend/ee/danswer/db/user_group.py +++ b/backend/ee/danswer/db/user_group.py @@ -603,7 +603,7 @@ def delete_user_group_cc_pair_relationship__no_commit( if cc_pair.status != ConnectorCredentialPairStatus.DELETING: raise ValueError( - f"Connector Credential Pair '{cc_pair_id}' is not in the DELETING state" + f"Connector Credential Pair '{cc_pair_id}' is not in the DELETING state. status={cc_pair.status}" ) delete_stmt = delete(UserGroup__ConnectorCredentialPair).where( diff --git a/backend/ee/danswer/external_permissions/confluence/confluence_sync_utils.py b/backend/ee/danswer/external_permissions/confluence/confluence_sync_utils.py deleted file mode 100644 index e911e2649ba..00000000000 --- a/backend/ee/danswer/external_permissions/confluence/confluence_sync_utils.py +++ /dev/null @@ -1,18 +0,0 @@ -from typing import Any - -from atlassian import Confluence # type:ignore - - -def build_confluence_client( - connector_specific_config: dict[str, Any], raw_credentials_json: dict[str, Any] -) -> Confluence: - is_cloud = connector_specific_config.get("is_cloud", False) - return Confluence( - api_version="cloud" if is_cloud else "latest", - # Remove trailing slash from wiki_base if present - url=connector_specific_config["wiki_base"].rstrip("/"), - # passing in username causes issues for Confluence data center - username=raw_credentials_json["confluence_username"] if is_cloud else None, - password=raw_credentials_json["confluence_access_token"] if is_cloud else None, - token=raw_credentials_json["confluence_access_token"] if not is_cloud else None, - ) diff --git a/backend/ee/danswer/external_permissions/confluence/doc_sync.py b/backend/ee/danswer/external_permissions/confluence/doc_sync.py index b6812adb9e7..a7bc898b8b7 100644 --- a/backend/ee/danswer/external_permissions/confluence/doc_sync.py +++ b/backend/ee/danswer/external_permissions/confluence/doc_sync.py @@ -1,41 +1,75 @@ +""" +Rules defined here: +https://confluence.atlassian.com/conf85/check-who-can-view-a-page-1283360557.html +""" from typing import Any -from atlassian import Confluence # type:ignore from sqlalchemy.orm import Session from danswer.access.models import ExternalAccess -from danswer.connectors.confluence.confluence_utils import ( - build_confluence_document_id, -) -from danswer.connectors.confluence.rate_limit_handler import ( - make_confluence_call_handle_rate_limit, -) +from danswer.connectors.confluence.connector import ConfluenceConnector +from danswer.connectors.confluence.onyx_confluence import OnyxConfluence +from danswer.connectors.confluence.utils import get_user_email_from_username__server +from danswer.connectors.models import SlimDocument from danswer.db.models import ConnectorCredentialPair from danswer.db.users import batch_add_non_web_user_if_not_exists__no_commit from danswer.utils.logger import setup_logger from ee.danswer.db.document import upsert_document_external_perms__no_commit -from ee.danswer.external_permissions.confluence.confluence_sync_utils import ( - build_confluence_client, -) - logger = setup_logger() +_VIEWSPACE_PERMISSION_TYPE = "VIEWSPACE" _REQUEST_PAGINATION_LIMIT = 100 -def _get_space_permissions( - db_session: Session, - confluence_client: Confluence, - space_id: str, +def _get_server_space_permissions( + confluence_client: OnyxConfluence, space_key: str ) -> ExternalAccess: - get_space_permissions = make_confluence_call_handle_rate_limit( - confluence_client.get_space_permissions + space_permissions = confluence_client.get_space_permissions(space_key=space_key) + + viewspace_permissions = [] + for permission_category in space_permissions: + if permission_category.get("type") == _VIEWSPACE_PERMISSION_TYPE: + viewspace_permissions.extend( + permission_category.get("spacePermissions", []) + ) + + user_names = set() + group_names = set() + for permission in viewspace_permissions: + if user_name := permission.get("userName"): + user_names.add(user_name) + if group_name := permission.get("groupName"): + group_names.add(group_name) + + user_emails = set() + for user_name in user_names: + user_email = get_user_email_from_username__server(confluence_client, user_name) + if user_email: + user_emails.add(user_email) + else: + logger.warning(f"Email for user {user_name} not found in Confluence") + + return ExternalAccess( + external_user_emails=user_emails, + external_user_group_ids=group_names, + # TODO: Check if the space is publicly accessible + # Currently, we assume the space is not public + # We need to check if anonymous access is turned on for the site and space + # This information is paywalled so it remains unimplemented + is_public=False, ) - space_permissions = get_space_permissions(space_id).get("permissions", []) + +def _get_cloud_space_permissions( + confluence_client: OnyxConfluence, space_key: str +) -> ExternalAccess: + space_permissions_result = confluence_client.get_space( + space_key=space_key, expand="permissions" + ) + space_permissions = space_permissions_result.get("permissions", []) + user_emails = set() - # Confluence enforces that group names are unique group_names = set() is_externally_public = False for permission in space_permissions: @@ -54,9 +88,7 @@ def _get_space_permissions( # If the permission specifies read access for anonymous users, then # the space is publicly accessible is_externally_public = True - batch_add_non_web_user_if_not_exists__no_commit( - db_session=db_session, emails=list(user_emails) - ) + return ExternalAccess( external_user_emails=user_emails, external_user_group_ids=group_names, @@ -64,159 +96,124 @@ def _get_space_permissions( ) -def _get_restrictions_for_page( - db_session: Session, - page: dict[str, Any], - space_permissions: ExternalAccess, -) -> ExternalAccess: - """ - WARNING: This function includes no pagination. So if a page is private within - the space and has over 200 users or over 200 groups with explicitly read access, - this function will leave out some users or groups. - 200 is a large amount so it is unlikely, but just be aware. - """ - restrictions_json = page.get("restrictions", {}) - read_access_dict = restrictions_json.get("read", {}).get("restrictions", {}) - - read_access_user_jsons = read_access_dict.get("user", {}).get("results", []) - read_access_group_jsons = read_access_dict.get("group", {}).get("results", []) - - is_space_public = read_access_user_jsons == [] and read_access_group_jsons == [] - - if not is_space_public: - read_access_user_emails = [ - user["email"] for user in read_access_user_jsons if user.get("email") - ] - read_access_groups = [group["name"] for group in read_access_group_jsons] - batch_add_non_web_user_if_not_exists__no_commit( - db_session=db_session, emails=list(read_access_user_emails) - ) - external_access = ExternalAccess( - external_user_emails=set(read_access_user_emails), - external_user_group_ids=set(read_access_groups), - is_public=False, - ) - else: - external_access = space_permissions - - return external_access - - -def _fetch_attachment_document_ids_for_page_paginated( - confluence_client: Confluence, page: dict[str, Any] -) -> list[str]: - """ - Starts by just extracting the first page of attachments from - the page. If all attachments are in the first page, then - no calls to the api are made from this function. - """ - get_attachments_from_content = make_confluence_call_handle_rate_limit( - confluence_client.get_attachments_from_content - ) - - attachment_doc_ids = [] - attachments_dict = page["children"]["attachment"] +def _get_space_permissions( + confluence_client: OnyxConfluence, + is_cloud: bool, +) -> dict[str, ExternalAccess]: + # Gets all the spaces in the Confluence instance + all_space_keys = [] start = 0 - while True: - attachments_list = attachments_dict["results"] - attachment_doc_ids.extend( - [ - build_confluence_document_id( - base_url=confluence_client.url, - content_url=attachment["_links"]["download"], - ) - for attachment in attachments_list - ] + spaces_batch = confluence_client.get_all_spaces( + start=start, limit=_REQUEST_PAGINATION_LIMIT ) + for space in spaces_batch.get("results", []): + all_space_keys.append(space.get("key")) - if "next" not in attachments_dict["_links"]: + if len(spaces_batch.get("results", [])) < _REQUEST_PAGINATION_LIMIT: break - start += len(attachments_list) - attachments_dict = get_attachments_from_content( - page_id=page["id"], - start=start, - limit=_REQUEST_PAGINATION_LIMIT, - ) + start += len(spaces_batch.get("results", [])) - return attachment_doc_ids + # Gets the permissions for each space + space_permissions_by_space_key: dict[str, ExternalAccess] = {} + for space_key in all_space_keys: + if is_cloud: + space_permissions = _get_cloud_space_permissions( + confluence_client=confluence_client, space_key=space_key + ) + else: + space_permissions = _get_server_space_permissions( + confluence_client=confluence_client, space_key=space_key + ) + # Stores the permissions for each space + space_permissions_by_space_key[space_key] = space_permissions -def _fetch_all_pages_paginated( - confluence_client: Confluence, - space_id: str, -) -> list[dict[str, Any]]: - get_all_pages_from_space = make_confluence_call_handle_rate_limit( - confluence_client.get_all_pages_from_space - ) + return space_permissions_by_space_key - # For each page, this fetches the page's attachments and restrictions. - expansion_strings = [ - "children.attachment", - "restrictions.read.restrictions.user", - "restrictions.read.restrictions.group", - ] - expansion_string = ",".join(expansion_strings) - all_pages = [] - start = 0 - while True: - pages_dict = get_all_pages_from_space( - space=space_id, - start=start, - limit=_REQUEST_PAGINATION_LIMIT, - expand=expansion_string, - ) - all_pages.extend(pages_dict) +def _extract_read_access_restrictions( + confluence_client: OnyxConfluence, restrictions: dict[str, Any] +) -> ExternalAccess | None: + """ + Converts a page's restrictions dict into an ExternalAccess object. + If there are no restrictions, then return None + """ + read_access = restrictions.get("read", {}) + read_access_restrictions = read_access.get("restrictions", {}) + + # Extract the users with read access + read_access_user = read_access_restrictions.get("user", {}) + read_access_user_jsons = read_access_user.get("results", []) + read_access_user_emails = [] + for user in read_access_user_jsons: + # If the user has an email, then add it to the list + if user.get("email"): + read_access_user_emails.append(user["email"]) + # If the user has a username and not an email, then get the email from Confluence + elif user.get("username"): + email = get_user_email_from_username__server( + confluence_client=confluence_client, user_name=user["username"] + ) + if email: + read_access_user_emails.append(email) + else: + logger.warning( + f"Email for user {user['username']} not found in Confluence" + ) + else: + logger.warning(f"User {user} does not have an email or username") - response_size = len(pages_dict) - if response_size < _REQUEST_PAGINATION_LIMIT: - break - start += response_size + # Extract the groups with read access + read_access_group = read_access_restrictions.get("group", {}) + read_access_group_jsons = read_access_group.get("results", []) + read_access_group_names = [ + group["name"] for group in read_access_group_jsons if group.get("name") + ] + + # If there are no restrictions found, then the page + # inherits the space's restrictions so return None + is_space_public = read_access_user_emails == [] and read_access_group_names == [] + if is_space_public: + return None - return all_pages + return ExternalAccess( + external_user_emails=set(read_access_user_emails), + external_user_group_ids=set(read_access_group_names), + # there is no way for a page to be individually public if the space isn't public + is_public=False, + ) def _fetch_all_page_restrictions_for_space( - db_session: Session, - confluence_client: Confluence, - space_id: str, - space_permissions: ExternalAccess, + confluence_client: OnyxConfluence, + slim_docs: list[SlimDocument], + space_permissions_by_space_key: dict[str, ExternalAccess], ) -> dict[str, ExternalAccess]: - all_pages = _fetch_all_pages_paginated( - confluence_client=confluence_client, - space_id=space_id, - ) - + """ + For all pages, if a page has restrictions, then use those restrictions. + Otherwise, use the space's restrictions. + """ document_restrictions: dict[str, ExternalAccess] = {} - for page in all_pages: - """ - This assigns the same permissions to all attachments of a page and - the page itself. - This is because the attachments are stored in the same Confluence space as the page. - WARNING: We create a dbDocument entry for all attachments, even though attachments - may not be their own standalone documents. This is likely fine as we just upsert a - document with just permissions. - """ - attachment_document_ids = [ - build_confluence_document_id( - base_url=confluence_client.url, - content_url=page["_links"]["webui"], - ) - ] - attachment_document_ids.extend( - _fetch_attachment_document_ids_for_page_paginated( - confluence_client=confluence_client, page=page + + for slim_doc in slim_docs: + if slim_doc.perm_sync_data is None: + raise ValueError( + f"No permission sync data found for document {slim_doc.id}" ) + restrictions = _extract_read_access_restrictions( + confluence_client=confluence_client, + restrictions=slim_doc.perm_sync_data.get("restrictions", {}), ) - page_permissions = _get_restrictions_for_page( - db_session=db_session, - page=page, - space_permissions=space_permissions, - ) - for attachment_document_id in attachment_document_ids: - document_restrictions[attachment_document_id] = page_permissions + if restrictions: + document_restrictions[slim_doc.id] = restrictions + else: + space_key = slim_doc.perm_sync_data.get("space_key") + if space_permissions := space_permissions_by_space_key.get(space_key): + document_restrictions[slim_doc.id] = space_permissions + else: + logger.warning(f"No permissions found for document {slim_doc.id}") return document_restrictions @@ -231,24 +228,39 @@ def confluence_doc_sync( it in postgres so that when it gets created later, the permissions are already populated """ - confluence_client = build_confluence_client( - cc_pair.connector.connector_specific_config, cc_pair.credential.credential_json + confluence_connector = ConfluenceConnector( + **cc_pair.connector.connector_specific_config ) - space_permissions = _get_space_permissions( - db_session=db_session, + confluence_connector.load_credentials(cc_pair.credential.credential_json) + if confluence_connector.confluence_client is None: + raise ValueError("Failed to load credentials") + confluence_client = confluence_connector.confluence_client + + is_cloud = cc_pair.connector.connector_specific_config.get("is_cloud", False) + + space_permissions_by_space_key = _get_space_permissions( confluence_client=confluence_client, - space_id=cc_pair.connector.connector_specific_config["space"], + is_cloud=is_cloud, ) - fresh_doc_permissions = _fetch_all_page_restrictions_for_space( - db_session=db_session, + + slim_docs = [] + for doc_batch in confluence_connector.retrieve_all_slim_documents(): + slim_docs.extend(doc_batch) + + permissions_by_doc_id = _fetch_all_page_restrictions_for_space( confluence_client=confluence_client, - space_id=cc_pair.connector.connector_specific_config["space"], - space_permissions=space_permissions, + slim_docs=slim_docs, + space_permissions_by_space_key=space_permissions_by_space_key, ) - for doc_id, ext_access in fresh_doc_permissions.items(): + + all_emails = set() + for doc_id, page_specific_access in permissions_by_doc_id.items(): upsert_document_external_perms__no_commit( db_session=db_session, doc_id=doc_id, - external_access=ext_access, + external_access=page_specific_access, source_type=cc_pair.connector.source, ) + all_emails.update(page_specific_access.external_user_emails) + + batch_add_non_web_user_if_not_exists__no_commit(db_session, list(all_emails)) diff --git a/backend/ee/danswer/external_permissions/confluence/group_sync.py b/backend/ee/danswer/external_permissions/confluence/group_sync.py index 33bc60cc6d5..a55bb777bc5 100644 --- a/backend/ee/danswer/external_permissions/confluence/group_sync.py +++ b/backend/ee/danswer/external_permissions/confluence/group_sync.py @@ -1,77 +1,41 @@ -from collections.abc import Iterator +from typing import Any -from atlassian import Confluence # type:ignore -from requests import HTTPError from sqlalchemy.orm import Session -from danswer.connectors.confluence.rate_limit_handler import ( - make_confluence_call_handle_rate_limit, -) +from danswer.connectors.confluence.onyx_confluence import OnyxConfluence +from danswer.connectors.confluence.utils import build_confluence_client +from danswer.connectors.confluence.utils import get_user_email_from_username__server from danswer.db.models import ConnectorCredentialPair from danswer.db.users import batch_add_non_web_user_if_not_exists__no_commit from danswer.utils.logger import setup_logger from ee.danswer.db.external_perm import ExternalUserGroup from ee.danswer.db.external_perm import replace_user__ext_group_for_cc_pair__no_commit -from ee.danswer.external_permissions.confluence.confluence_sync_utils import ( - build_confluence_client, -) logger = setup_logger() -_PAGE_SIZE = 100 - - -def _get_confluence_group_names_paginated( - confluence_client: Confluence, -) -> Iterator[str]: - get_all_groups = make_confluence_call_handle_rate_limit( - confluence_client.get_all_groups - ) - - start = 0 - while True: - try: - groups = get_all_groups(start=start, limit=_PAGE_SIZE) - except HTTPError as e: - if e.response.status_code in (403, 404): - return - raise e - - for group in groups: - if group_name := group.get("name"): - yield group_name - - if len(groups) < _PAGE_SIZE: - break - start += _PAGE_SIZE - def _get_group_members_email_paginated( - confluence_client: Confluence, + confluence_client: OnyxConfluence, group_name: str, -) -> list[str]: - get_group_members = make_confluence_call_handle_rate_limit( - confluence_client.get_group_members - ) - group_member_emails: list[str] = [] - start = 0 - while True: - try: - members = get_group_members( - group_name=group_name, start=start, limit=_PAGE_SIZE - ) - except HTTPError as e: - if e.response.status_code == 403 or e.response.status_code == 404: - return group_member_emails - raise e +) -> set[str]: + members: list[dict[str, Any]] = [] + for member_batch in confluence_client.paginated_group_members_retrieval(group_name): + members.extend(member_batch) + + group_member_emails: set[str] = set() + for member in members: + email = member.get("email") + if not email: + user_name = member.get("username") + if user_name: + email = get_user_email_from_username__server( + confluence_client=confluence_client, + user_name=user_name, + ) + if email: + group_member_emails.add(email) - group_member_emails.extend( - [member.get("email") for member in members if member.get("email")] - ) - if len(members) < _PAGE_SIZE: - break - start += _PAGE_SIZE return group_member_emails @@ -79,23 +43,34 @@ def confluence_group_sync( db_session: Session, cc_pair: ConnectorCredentialPair, ) -> None: + is_cloud = cc_pair.connector.connector_specific_config.get("is_cloud", False) confluence_client = build_confluence_client( - cc_pair.connector.connector_specific_config, cc_pair.credential.credential_json + credentials_json=cc_pair.credential.credential_json, + is_cloud=is_cloud, + wiki_base=cc_pair.connector.connector_specific_config["wiki_base"], ) + # Get all group names + group_names: list[str] = [] + for group_batch in confluence_client.paginated_groups_retrieval(): + for group in group_batch: + if group_name := group.get("name"): + group_names.append(group_name) + + # For each group name, get all members and create a danswer group danswer_groups: list[ExternalUserGroup] = [] - # Confluence enforces that group names are unique - for group_name in _get_confluence_group_names_paginated(confluence_client): + for group_name in group_names: group_member_emails = _get_group_members_email_paginated( confluence_client, group_name ) group_members = batch_add_non_web_user_if_not_exists__no_commit( - db_session=db_session, emails=group_member_emails + db_session=db_session, emails=list(group_member_emails) ) if group_members: danswer_groups.append( ExternalUserGroup( - id=group_name, user_ids=[user.id for user in group_members] + id=group_name, + user_ids=[user.id for user in group_members], ) ) diff --git a/backend/ee/danswer/external_permissions/gmail/doc_sync.py b/backend/ee/danswer/external_permissions/gmail/doc_sync.py new file mode 100644 index 00000000000..2748443f022 --- /dev/null +++ b/backend/ee/danswer/external_permissions/gmail/doc_sync.py @@ -0,0 +1,68 @@ +from datetime import datetime +from datetime import timezone + +from sqlalchemy.orm import Session + +from danswer.access.models import ExternalAccess +from danswer.connectors.gmail.connector import GmailConnector +from danswer.connectors.interfaces import GenerateSlimDocumentOutput +from danswer.db.models import ConnectorCredentialPair +from danswer.db.users import batch_add_non_web_user_if_not_exists__no_commit +from danswer.utils.logger import setup_logger +from ee.danswer.db.document import upsert_document_external_perms__no_commit + +logger = setup_logger() + + +def _get_slim_doc_generator( + cc_pair: ConnectorCredentialPair, + gmail_connector: GmailConnector, +) -> GenerateSlimDocumentOutput: + current_time = datetime.now(timezone.utc) + start_time = ( + cc_pair.last_time_perm_sync.replace(tzinfo=timezone.utc).timestamp() + if cc_pair.last_time_perm_sync + else 0.0 + ) + + return gmail_connector.retrieve_all_slim_documents( + start=start_time, end=current_time.timestamp() + ) + + +def gmail_doc_sync( + db_session: Session, + cc_pair: ConnectorCredentialPair, +) -> None: + """ + Adds the external permissions to the documents in postgres + if the document doesn't already exists in postgres, we create + it in postgres so that when it gets created later, the permissions are + already populated + """ + gmail_connector = GmailConnector(**cc_pair.connector.connector_specific_config) + gmail_connector.load_credentials(cc_pair.credential.credential_json) + + slim_doc_generator = _get_slim_doc_generator(cc_pair, gmail_connector) + + for slim_doc_batch in slim_doc_generator: + for slim_doc in slim_doc_batch: + if slim_doc.perm_sync_data is None: + logger.warning(f"No permissions found for document {slim_doc.id}") + continue + if user_email := slim_doc.perm_sync_data.get("user_email"): + ext_access = ExternalAccess( + external_user_emails=set([user_email]), + external_user_group_ids=set(), + is_public=False, + ) + batch_add_non_web_user_if_not_exists__no_commit( + db_session=db_session, + emails=list(ext_access.external_user_emails), + ) + upsert_document_external_perms__no_commit( + db_session=db_session, + doc_id=slim_doc.id, + external_access=ext_access, + source_type=cc_pair.connector.source, + ) diff --git a/backend/ee/danswer/external_permissions/google_drive/doc_sync.py b/backend/ee/danswer/external_permissions/google_drive/doc_sync.py index a957558a99e..fddb0e72171 100644 --- a/backend/ee/danswer/external_permissions/google_drive/doc_sync.py +++ b/backend/ee/danswer/external_permissions/google_drive/doc_sync.py @@ -1,157 +1,123 @@ -from collections.abc import Iterator from datetime import datetime from datetime import timezone from typing import Any -from typing import cast -from googleapiclient.discovery import build # type: ignore -from googleapiclient.errors import HttpError # type: ignore from sqlalchemy.orm import Session from danswer.access.models import ExternalAccess -from danswer.connectors.cross_connector_utils.retry_wrapper import retry_builder -from danswer.connectors.factory import instantiate_connector -from danswer.connectors.google_drive.connector_auth import ( - get_google_drive_creds, -) -from danswer.connectors.google_drive.constants import FETCH_PERMISSIONS_SCOPES -from danswer.connectors.interfaces import PollConnector -from danswer.connectors.models import InputType +from danswer.connectors.google_drive.connector import GoogleDriveConnector +from danswer.connectors.google_utils.google_utils import execute_paginated_retrieval +from danswer.connectors.google_utils.resources import get_drive_service +from danswer.connectors.interfaces import GenerateSlimDocumentOutput +from danswer.connectors.models import SlimDocument from danswer.db.models import ConnectorCredentialPair from danswer.db.users import batch_add_non_web_user_if_not_exists__no_commit from danswer.utils.logger import setup_logger from ee.danswer.db.document import upsert_document_external_perms__no_commit -# Google Drive APIs are quite flakey and may 500 for an -# extended period of time. Trying to combat here by adding a very -# long retry period (~20 minutes of trying every minute) -add_retries = retry_builder(tries=5, delay=5, max_delay=30) - - logger = setup_logger() +_PERMISSION_ID_PERMISSION_MAP: dict[str, dict[str, Any]] = {} -def _get_docs_with_additional_info( - db_session: Session, - cc_pair: ConnectorCredentialPair, -) -> dict[str, Any]: - # Get all document ids that need their permissions updated - runnable_connector = instantiate_connector( - db_session=db_session, - source=cc_pair.connector.source, - input_type=InputType.POLL, - connector_specific_config=cc_pair.connector.connector_specific_config, - credential=cc_pair.credential, - ) - - assert isinstance(runnable_connector, PollConnector) +def _get_slim_doc_generator( + cc_pair: ConnectorCredentialPair, + google_drive_connector: GoogleDriveConnector, +) -> GenerateSlimDocumentOutput: current_time = datetime.now(timezone.utc) start_time = ( cc_pair.last_time_perm_sync.replace(tzinfo=timezone.utc).timestamp() if cc_pair.last_time_perm_sync else 0.0 ) - cc_pair.last_time_perm_sync = current_time - doc_batch_generator = runnable_connector.poll_source( + return google_drive_connector.retrieve_all_slim_documents( start=start_time, end=current_time.timestamp() ) - docs_with_additional_info = { - doc.id: doc.additional_info - for doc_batch in doc_batch_generator - for doc in doc_batch - } - - return docs_with_additional_info - - -def _fetch_permissions_paginated( - drive_service: Any, drive_file_id: str -) -> Iterator[dict[str, Any]]: - next_token = None - - # Check if the file is trashed - # Returning nothing here will cause the external permissions to - # be empty which will get written to vespa (failing shut) - try: - file_metadata = add_retries( - lambda: drive_service.files() - .get(fileId=drive_file_id, fields="id, trashed") - .execute() - )() - except HttpError as e: - if e.resp.status == 404 or e.resp.status == 403: - return - logger.error(f"Failed to fetch permissions: {e}") - raise - - if file_metadata.get("trashed", False): - logger.debug(f"File with ID {drive_file_id} is trashed") - return - - # Get paginated permissions for the file id - while True: - try: - permissions_resp: dict[str, Any] = add_retries( - lambda: ( - drive_service.permissions() - .list( - fileId=drive_file_id, - fields="permissions(id, emailAddress, role, type, domain)", - supportsAllDrives=True, - pageToken=next_token, - ) - .execute() - ) - )() - except HttpError as e: - if e.resp.status == 404 or e.resp.status == 403: - break - logger.error(f"Failed to fetch permissions: {e}") - raise - - for permission in permissions_resp.get("permissions", []): - yield permission - - next_token = permissions_resp.get("nextPageToken") - if not next_token: - break - - -def _fetch_google_permissions_for_document_id( - db_session: Session, - drive_file_id: str, - raw_credentials_json: dict[str, str], - company_google_domains: list[str], -) -> ExternalAccess: - # Authenticate and construct service - google_drive_creds, _ = get_google_drive_creds( - raw_credentials_json, scopes=FETCH_PERMISSIONS_SCOPES + +def _fetch_permissions_for_permission_ids( + google_drive_connector: GoogleDriveConnector, + permission_ids: list[str], + permission_info: dict[str, Any], +) -> list[dict[str, Any]]: + doc_id = permission_info.get("doc_id") + if not permission_info or not doc_id: + return [] + + # Check cache first for all permission IDs + permissions = [ + _PERMISSION_ID_PERMISSION_MAP[pid] + for pid in permission_ids + if pid in _PERMISSION_ID_PERMISSION_MAP + ] + + # If we found all permissions in cache, return them + if len(permissions) == len(permission_ids): + return permissions + + owner_email = permission_info.get("owner_email") + drive_service = get_drive_service( + creds=google_drive_connector.creds, + user_email=(owner_email or google_drive_connector.primary_admin_email), + ) + + # Otherwise, fetch all permissions and update cache + fetched_permissions = execute_paginated_retrieval( + retrieval_function=drive_service.permissions().list, + list_key="permissions", + fileId=doc_id, + fields="permissions(id, emailAddress, type, domain)", + supportsAllDrives=True, ) - if not google_drive_creds.valid: - raise ValueError("Invalid Google Drive credentials") - drive_service = build("drive", "v3", credentials=google_drive_creds) + permissions_for_doc_id = [] + # Update cache and return all permissions + for permission in fetched_permissions: + permissions_for_doc_id.append(permission) + _PERMISSION_ID_PERMISSION_MAP[permission["id"]] = permission + + return permissions_for_doc_id + +def _get_permissions_from_slim_doc( + google_drive_connector: GoogleDriveConnector, + slim_doc: SlimDocument, +) -> ExternalAccess: + permission_info = slim_doc.perm_sync_data or {} + + permissions_list = permission_info.get("permissions", []) + if not permissions_list: + if permission_ids := permission_info.get("permission_ids"): + permissions_list = _fetch_permissions_for_permission_ids( + google_drive_connector=google_drive_connector, + permission_ids=permission_ids, + permission_info=permission_info, + ) + if not permissions_list: + logger.warning(f"No permissions found for document {slim_doc.id}") + return ExternalAccess( + external_user_emails=set(), + external_user_group_ids=set(), + is_public=False, + ) + + company_domain = google_drive_connector.google_domain user_emails: set[str] = set() group_emails: set[str] = set() public = False - for permission in _fetch_permissions_paginated(drive_service, drive_file_id): + for permission in permissions_list: permission_type = permission["type"] if permission_type == "user": user_emails.add(permission["emailAddress"]) elif permission_type == "group": group_emails.add(permission["emailAddress"]) - elif permission_type == "domain": - if permission["domain"] in company_google_domains: + elif permission_type == "domain" and company_domain: + if permission["domain"] == company_domain: public = True elif permission_type == "anyone": public = True - batch_add_non_web_user_if_not_exists__no_commit(db_session, list(user_emails)) - return ExternalAccess( external_user_emails=user_emails, external_user_group_ids=group_emails, @@ -169,32 +135,26 @@ def gdrive_doc_sync( it in postgres so that when it gets created later, the permissions are already populated """ - sync_details = cc_pair.auto_sync_options - if sync_details is None: - logger.error("Sync details not found for Google Drive") - raise ValueError("Sync details not found for Google Drive") - - # Here we run the connector to grab all the ids - # this may grab ids before they are indexed but that is fine because - # we create a document in postgres to hold the permissions info - # until the indexing job has a chance to run - docs_with_additional_info = _get_docs_with_additional_info( - db_session=db_session, - cc_pair=cc_pair, + google_drive_connector = GoogleDriveConnector( + **cc_pair.connector.connector_specific_config ) - - for doc_id, doc_additional_info in docs_with_additional_info.items(): - ext_access = _fetch_google_permissions_for_document_id( - db_session=db_session, - drive_file_id=doc_additional_info, - raw_credentials_json=cc_pair.credential.credential_json, - company_google_domains=[ - cast(dict[str, str], sync_details)["company_domain"] - ], - ) - upsert_document_external_perms__no_commit( - db_session=db_session, - doc_id=doc_id, - external_access=ext_access, - source_type=cc_pair.connector.source, - ) + google_drive_connector.load_credentials(cc_pair.credential.credential_json) + + slim_doc_generator = _get_slim_doc_generator(cc_pair, google_drive_connector) + + for slim_doc_batch in slim_doc_generator: + for slim_doc in slim_doc_batch: + ext_access = _get_permissions_from_slim_doc( + google_drive_connector=google_drive_connector, + slim_doc=slim_doc, + ) + batch_add_non_web_user_if_not_exists__no_commit( + db_session=db_session, + emails=list(ext_access.external_user_emails), + ) + upsert_document_external_perms__no_commit( + db_session=db_session, + doc_id=slim_doc.id, + external_access=ext_access, + source_type=cc_pair.connector.source, + ) diff --git a/backend/ee/danswer/external_permissions/google_drive/group_sync.py b/backend/ee/danswer/external_permissions/google_drive/group_sync.py index 7bb919d4686..e9ca40b3dcb 100644 --- a/backend/ee/danswer/external_permissions/google_drive/group_sync.py +++ b/backend/ee/danswer/external_permissions/google_drive/group_sync.py @@ -1,17 +1,8 @@ -from collections.abc import Iterator -from typing import Any - -from google.oauth2.credentials import Credentials as OAuthCredentials # type: ignore -from google.oauth2.service_account import Credentials as ServiceAccountCredentials # type: ignore -from googleapiclient.discovery import build # type: ignore -from googleapiclient.errors import HttpError # type: ignore from sqlalchemy.orm import Session -from danswer.connectors.cross_connector_utils.retry_wrapper import retry_builder -from danswer.connectors.google_drive.connector_auth import ( - get_google_drive_creds, -) -from danswer.connectors.google_drive.constants import FETCH_GROUPS_SCOPES +from danswer.connectors.google_drive.connector import GoogleDriveConnector +from danswer.connectors.google_utils.google_utils import execute_paginated_retrieval +from danswer.connectors.google_utils.resources import get_admin_service from danswer.db.models import ConnectorCredentialPair from danswer.db.users import batch_add_non_web_user_if_not_exists__no_commit from danswer.utils.logger import setup_logger @@ -21,116 +12,39 @@ logger = setup_logger() -# Google Drive APIs are quite flakey and may 500 for an -# extended period of time. Trying to combat here by adding a very -# long retry period (~20 minutes of trying every minute) -add_retries = retry_builder(tries=5, delay=5, max_delay=30) - - -def _fetch_groups_paginated( - google_drive_creds: ServiceAccountCredentials | OAuthCredentials, - identity_source: str | None = None, - customer_id: str | None = None, -) -> Iterator[dict[str, Any]]: - # Note that Google Drive does not use of update the user_cache as the user email - # comes directly with the call to fetch the groups, therefore this is not a valid - # place to save on requests - if identity_source is None and customer_id is None: - raise ValueError( - "Either identity_source or customer_id must be provided to fetch groups" - ) - - cloud_identity_service = build( - "cloudidentity", "v1", credentials=google_drive_creds - ) - parent = ( - f"identitysources/{identity_source}" - if identity_source - else f"customers/{customer_id}" - ) - - while True: - try: - groups_resp: dict[str, Any] = add_retries( - lambda: (cloud_identity_service.groups().list(parent=parent).execute()) - )() - for group in groups_resp.get("groups", []): - yield group - - next_token = groups_resp.get("nextPageToken") - if not next_token: - break - except HttpError as e: - if e.resp.status == 404 or e.resp.status == 403: - break - logger.error(f"Error fetching groups: {e}") - raise - - -def _fetch_group_members_paginated( - google_drive_creds: ServiceAccountCredentials | OAuthCredentials, - group_name: str, -) -> Iterator[dict[str, Any]]: - cloud_identity_service = build( - "cloudidentity", "v1", credentials=google_drive_creds - ) - next_token = None - while True: - try: - membership_info = add_retries( - lambda: ( - cloud_identity_service.groups() - .memberships() - .searchTransitiveMemberships( - parent=group_name, pageToken=next_token - ) - .execute() - ) - )() - - for member in membership_info.get("memberships", []): - yield member - - next_token = membership_info.get("nextPageToken") - if not next_token: - break - except HttpError as e: - if e.resp.status == 404 or e.resp.status == 403: - break - logger.error(f"Error fetching group members: {e}") - raise - - def gdrive_group_sync( db_session: Session, cc_pair: ConnectorCredentialPair, ) -> None: - sync_details = cc_pair.auto_sync_options - if sync_details is None: - logger.error("Sync details not found for Google Drive") - raise ValueError("Sync details not found for Google Drive") - - google_drive_creds, _ = get_google_drive_creds( - cc_pair.credential.credential_json, - scopes=FETCH_GROUPS_SCOPES, + google_drive_connector = GoogleDriveConnector( + **cc_pair.connector.connector_specific_config + ) + google_drive_connector.load_credentials(cc_pair.credential.credential_json) + admin_service = get_admin_service( + google_drive_connector.creds, google_drive_connector.primary_admin_email ) danswer_groups: list[ExternalUserGroup] = [] - for group in _fetch_groups_paginated( - google_drive_creds, - identity_source=sync_details.get("identity_source"), - customer_id=sync_details.get("customer_id"), + for group in execute_paginated_retrieval( + admin_service.groups().list, + list_key="groups", + domain=google_drive_connector.google_domain, + fields="groups(email)", ): # The id is the group email - group_email = group["groupKey"]["id"] + group_email = group["email"] + # Gather group member emails group_member_emails: list[str] = [] - for member in _fetch_group_members_paginated(google_drive_creds, group["name"]): - member_keys = member["preferredMemberKey"] - member_emails = [member_key["id"] for member_key in member_keys] - for member_email in member_emails: - group_member_emails.append(member_email) - + for member in execute_paginated_retrieval( + admin_service.members().list, + list_key="members", + groupKey=group_email, + fields="members(email)", + ): + group_member_emails.append(member["email"]) + + # Add group members to DB and get their IDs group_members = batch_add_non_web_user_if_not_exists__no_commit( db_session=db_session, emails=group_member_emails ) diff --git a/backend/ee/danswer/external_permissions/permission_sync.py b/backend/ee/danswer/external_permissions/permission_sync.py index 3a4357f7c10..94a0b4bfa8e 100644 --- a/backend/ee/danswer/external_permissions/permission_sync.py +++ b/backend/ee/danswer/external_permissions/permission_sync.py @@ -6,38 +6,15 @@ from danswer.access.access import get_access_for_documents from danswer.db.connector_credential_pair import get_connector_credential_pair_from_id from danswer.db.document import get_document_ids_for_connector_credential_pair -from danswer.db.models import ConnectorCredentialPair from danswer.document_index.factory import get_current_primary_default_document_index from danswer.document_index.interfaces import UpdateRequest from danswer.utils.logger import setup_logger from ee.danswer.external_permissions.sync_params import DOC_PERMISSIONS_FUNC_MAP from ee.danswer.external_permissions.sync_params import GROUP_PERMISSIONS_FUNC_MAP -from ee.danswer.external_permissions.sync_params import PERMISSION_SYNC_PERIODS logger = setup_logger() -def _is_time_to_run_sync(cc_pair: ConnectorCredentialPair) -> bool: - source_sync_period = PERMISSION_SYNC_PERIODS.get(cc_pair.connector.source) - - # If RESTRICTED_FETCH_PERIOD[source] is None, we always run the sync. - if not source_sync_period: - return True - - # If the last sync is None, it has never been run so we run the sync - if cc_pair.last_time_perm_sync is None: - return True - - last_sync = cc_pair.last_time_perm_sync.replace(tzinfo=timezone.utc) - current_time = datetime.now(timezone.utc) - - # If the last sync is greater than the full fetch period, we run the sync - if (current_time - last_sync).total_seconds() > source_sync_period: - return True - - return False - - def run_external_group_permission_sync( db_session: Session, cc_pair_id: int, @@ -53,9 +30,6 @@ def run_external_group_permission_sync( # Not all sync connectors support group permissions so this is fine return - if not _is_time_to_run_sync(cc_pair): - return - try: # This function updates: # - the user_email <-> external_user_group_id mapping @@ -69,8 +43,8 @@ def run_external_group_permission_sync( # update postgres db_session.commit() - except Exception as e: - logger.error(f"Error updating document index: {e}") + except Exception: + logger.exception("Error Syncing Group Permissions") db_session.rollback() @@ -85,21 +59,19 @@ def run_external_doc_permission_sync( source_type = cc_pair.connector.source doc_sync_func = DOC_PERMISSIONS_FUNC_MAP.get(source_type) + last_time_perm_sync = cc_pair.last_time_perm_sync if doc_sync_func is None: raise ValueError( f"No permission sync function found for source type: {source_type}" ) - if not _is_time_to_run_sync(cc_pair): - return - try: # This function updates: # - the user_email <-> document mapping # - the external_user_group_id <-> document mapping # in postgres without committing - logger.debug(f"Syncing docs for {source_type}") + logger.info(f"Syncing docs for {source_type}") doc_sync_func( db_session, cc_pair, @@ -136,6 +108,8 @@ def run_external_doc_permission_sync( # update postgres db_session.commit() - except Exception as e: - logger.error(f"Error Syncing Permissions: {e}") + logger.info(f"Successfully synced docs for {source_type}") + except Exception: + logger.exception("Error Syncing Document Permissions") + cc_pair.last_time_perm_sync = last_time_perm_sync db_session.rollback() diff --git a/backend/ee/danswer/external_permissions/slack/doc_sync.py b/backend/ee/danswer/external_permissions/slack/doc_sync.py index fe731746a44..b5f6e9695db 100644 --- a/backend/ee/danswer/external_permissions/slack/doc_sync.py +++ b/backend/ee/danswer/external_permissions/slack/doc_sync.py @@ -3,7 +3,7 @@ from danswer.access.models import ExternalAccess from danswer.connectors.factory import instantiate_connector -from danswer.connectors.interfaces import IdConnector +from danswer.connectors.interfaces import SlimConnector from danswer.connectors.models import InputType from danswer.connectors.slack.connector import get_channels from danswer.connectors.slack.connector import make_paginated_slack_api_call_w_retries @@ -17,28 +17,6 @@ logger = setup_logger() -def _extract_channel_id_from_doc_id(doc_id: str) -> str: - """ - Extracts the channel ID from a document ID string. - - The document ID is expected to be in the format: "{channel_id}__{message_ts}" - - Args: - doc_id (str): The document ID string. - - Returns: - str: The extracted channel ID. - - Raises: - ValueError: If the doc_id doesn't contain the expected separator. - """ - try: - channel_id, _ = doc_id.split("__", 1) - return channel_id - except ValueError: - raise ValueError(f"Invalid doc_id format: {doc_id}") - - def _get_slack_document_ids_and_channels( db_session: Session, cc_pair: ConnectorCredentialPair, @@ -47,24 +25,27 @@ def _get_slack_document_ids_and_channels( runnable_connector = instantiate_connector( db_session=db_session, source=cc_pair.connector.source, - input_type=InputType.PRUNE, + input_type=InputType.SLIM_RETRIEVAL, connector_specific_config=cc_pair.connector.connector_specific_config, credential=cc_pair.credential, ) - assert isinstance(runnable_connector, IdConnector) + assert isinstance(runnable_connector, SlimConnector) channel_doc_map: dict[str, list[str]] = {} - for doc_id in runnable_connector.retrieve_all_source_ids(): - channel_id = _extract_channel_id_from_doc_id(doc_id) - if channel_id not in channel_doc_map: - channel_doc_map[channel_id] = [] - channel_doc_map[channel_id].append(doc_id) + for doc_metadata_batch in runnable_connector.retrieve_all_slim_documents(): + for doc_metadata in doc_metadata_batch: + if doc_metadata.perm_sync_data is None: + continue + channel_id = doc_metadata.perm_sync_data["channel_id"] + if channel_id not in channel_doc_map: + channel_doc_map[channel_id] = [] + channel_doc_map[channel_id].append(doc_metadata.id) return channel_doc_map -def _fetch_worspace_permissions( +def _fetch_workspace_permissions( db_session: Session, user_id_to_email_map: dict[str, str], ) -> ExternalAccess: @@ -167,7 +148,7 @@ def slack_doc_sync( db_session=db_session, cc_pair=cc_pair, ) - workspace_permissions = _fetch_worspace_permissions( + workspace_permissions = _fetch_workspace_permissions( db_session=db_session, user_id_to_email_map=user_id_to_email_map, ) diff --git a/backend/ee/danswer/external_permissions/sync_params.py b/backend/ee/danswer/external_permissions/sync_params.py index 10b080cd7f4..1fd09ca1509 100644 --- a/backend/ee/danswer/external_permissions/sync_params.py +++ b/backend/ee/danswer/external_permissions/sync_params.py @@ -6,6 +6,7 @@ from danswer.db.models import ConnectorCredentialPair from ee.danswer.external_permissions.confluence.doc_sync import confluence_doc_sync from ee.danswer.external_permissions.confluence.group_sync import confluence_group_sync +from ee.danswer.external_permissions.gmail.doc_sync import gmail_doc_sync from ee.danswer.external_permissions.google_drive.doc_sync import gdrive_doc_sync from ee.danswer.external_permissions.google_drive.group_sync import gdrive_group_sync from ee.danswer.external_permissions.slack.doc_sync import slack_doc_sync @@ -28,6 +29,7 @@ DocumentSource.GOOGLE_DRIVE: gdrive_doc_sync, DocumentSource.CONFLUENCE: confluence_doc_sync, DocumentSource.SLACK: slack_doc_sync, + DocumentSource.GMAIL: gmail_doc_sync, } # These functions update: diff --git a/backend/ee/danswer/main.py b/backend/ee/danswer/main.py index 7d150107c75..affa5fd1cde 100644 --- a/backend/ee/danswer/main.py +++ b/backend/ee/danswer/main.py @@ -2,6 +2,7 @@ from httpx_oauth.clients.openid import OpenID from danswer.auth.users import auth_backend +from danswer.auth.users import create_danswer_oauth_router from danswer.auth.users import fastapi_users from danswer.configs.app_configs import AUTH_TYPE from danswer.configs.app_configs import OAUTH_CLIENT_ID @@ -24,6 +25,7 @@ basic_router as enterprise_settings_router, ) from ee.danswer.server.manage.standard_answer import router as standard_answer_router +from ee.danswer.server.middleware.tenant_tracking import add_tenant_id_middleware from ee.danswer.server.query_and_chat.chat_backend import ( router as chat_router, ) @@ -34,11 +36,13 @@ from ee.danswer.server.reporting.usage_export_api import router as usage_export_router from ee.danswer.server.saml import router as saml_router from ee.danswer.server.seeding import seed_db +from ee.danswer.server.tenants.api import router as tenants_router from ee.danswer.server.token_rate_limits.api import ( router as token_rate_limit_settings_router, ) from ee.danswer.server.user_group.api import router as user_group_router from ee.danswer.utils.encryption import test_encryption +from shared_configs.configs import MULTI_TENANT logger = setup_logger() @@ -52,10 +56,13 @@ def get_application() -> FastAPI: application = get_application_base() + if MULTI_TENANT: + add_tenant_id_middleware(application, logger) + if AUTH_TYPE == AuthType.OIDC: include_router_with_global_prefix_prepended( application, - fastapi_users.get_oauth_router( + create_danswer_oauth_router( OpenID(OAUTH_CLIENT_ID, OAUTH_CLIENT_SECRET, OPENID_CONFIG_URL), auth_backend, USER_AUTH_SECRET, @@ -99,6 +106,10 @@ def get_application() -> FastAPI: include_router_with_global_prefix_prepended(application, enterprise_settings_router) include_router_with_global_prefix_prepended(application, usage_export_router) + if MULTI_TENANT: + # Tenant management + include_router_with_global_prefix_prepended(application, tenants_router) + # Ensure all routes have auth enabled or are explicitly marked as public check_ee_router_auth(application) diff --git a/backend/ee/danswer/server/enterprise_settings/store.py b/backend/ee/danswer/server/enterprise_settings/store.py index 30b72d5d2e8..74706e0f769 100644 --- a/backend/ee/danswer/server/enterprise_settings/store.py +++ b/backend/ee/danswer/server/enterprise_settings/store.py @@ -11,9 +11,9 @@ from danswer.configs.constants import FileOrigin from danswer.configs.constants import KV_CUSTOM_ANALYTICS_SCRIPT_KEY from danswer.configs.constants import KV_ENTERPRISE_SETTINGS_KEY -from danswer.dynamic_configs.factory import get_dynamic_config_store -from danswer.dynamic_configs.interface import ConfigNotFoundError from danswer.file_store.file_store import get_default_file_store +from danswer.key_value_store.factory import get_kv_store +from danswer.key_value_store.interface import KvKeyNotFoundError from danswer.utils.logger import setup_logger from ee.danswer.server.enterprise_settings.models import AnalyticsScriptUpload from ee.danswer.server.enterprise_settings.models import EnterpriseSettings @@ -23,12 +23,12 @@ def load_settings() -> EnterpriseSettings: - dynamic_config_store = get_dynamic_config_store() + dynamic_config_store = get_kv_store() try: settings = EnterpriseSettings( **cast(dict, dynamic_config_store.load(KV_ENTERPRISE_SETTINGS_KEY)) ) - except ConfigNotFoundError: + except KvKeyNotFoundError: settings = EnterpriseSettings() dynamic_config_store.store(KV_ENTERPRISE_SETTINGS_KEY, settings.model_dump()) @@ -36,17 +36,17 @@ def load_settings() -> EnterpriseSettings: def store_settings(settings: EnterpriseSettings) -> None: - get_dynamic_config_store().store(KV_ENTERPRISE_SETTINGS_KEY, settings.model_dump()) + get_kv_store().store(KV_ENTERPRISE_SETTINGS_KEY, settings.model_dump()) _CUSTOM_ANALYTICS_SECRET_KEY = os.environ.get("CUSTOM_ANALYTICS_SECRET_KEY") def load_analytics_script() -> str | None: - dynamic_config_store = get_dynamic_config_store() + dynamic_config_store = get_kv_store() try: return cast(str, dynamic_config_store.load(KV_CUSTOM_ANALYTICS_SCRIPT_KEY)) - except ConfigNotFoundError: + except KvKeyNotFoundError: return None @@ -57,9 +57,7 @@ def store_analytics_script(analytics_script_upload: AnalyticsScriptUpload) -> No ): raise ValueError("Invalid secret key") - get_dynamic_config_store().store( - KV_CUSTOM_ANALYTICS_SCRIPT_KEY, analytics_script_upload.script - ) + get_kv_store().store(KV_CUSTOM_ANALYTICS_SCRIPT_KEY, analytics_script_upload.script) _LOGO_FILENAME = "__logo__" diff --git a/backend/ee/danswer/server/middleware/tenant_tracking.py b/backend/ee/danswer/server/middleware/tenant_tracking.py new file mode 100644 index 00000000000..20c0ba0afe2 --- /dev/null +++ b/backend/ee/danswer/server/middleware/tenant_tracking.py @@ -0,0 +1,75 @@ +import logging +from collections.abc import Awaitable +from collections.abc import Callable + +import jwt +from fastapi import FastAPI +from fastapi import HTTPException +from fastapi import Request +from fastapi import Response + +from danswer.configs.app_configs import USER_AUTH_SECRET +from danswer.db.engine import is_valid_schema_name +from ee.danswer.auth.api_key import extract_tenant_from_api_key_header +from shared_configs.configs import MULTI_TENANT +from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA +from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR + + +def add_tenant_id_middleware(app: FastAPI, logger: logging.LoggerAdapter) -> None: + @app.middleware("http") + async def set_tenant_id( + request: Request, call_next: Callable[[Request], Awaitable[Response]] + ) -> Response: + try: + tenant_id = ( + _get_tenant_id_from_request(request, logger) + if MULTI_TENANT + else POSTGRES_DEFAULT_SCHEMA + ) + CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id) + return await call_next(request) + + except Exception as e: + logger.error(f"Error in tenant ID middleware: {str(e)}") + raise + + +def _get_tenant_id_from_request(request: Request, logger: logging.LoggerAdapter) -> str: + # First check for API key + tenant_id = extract_tenant_from_api_key_header(request) + if tenant_id is not None: + return tenant_id + + # Check for cookie-based auth + token = request.cookies.get("fastapiusersauth") + if not token: + return POSTGRES_DEFAULT_SCHEMA + + try: + payload = jwt.decode( + token, + USER_AUTH_SECRET, + audience=["fastapi-users:auth"], + algorithms=["HS256"], + ) + tenant_id_from_payload = payload.get("tenant_id", POSTGRES_DEFAULT_SCHEMA) + + # Since payload.get() can return None, ensure we have a string + tenant_id = ( + str(tenant_id_from_payload) + if tenant_id_from_payload is not None + else POSTGRES_DEFAULT_SCHEMA + ) + + if not is_valid_schema_name(tenant_id): + raise HTTPException(status_code=400, detail="Invalid tenant ID format") + + return tenant_id + + except jwt.InvalidTokenError: + return POSTGRES_DEFAULT_SCHEMA + + except Exception as e: + logger.error(f"Unexpected error in set_tenant_id_middleware: {str(e)}") + raise HTTPException(status_code=500, detail="Internal server error") diff --git a/backend/ee/danswer/server/query_and_chat/chat_backend.py b/backend/ee/danswer/server/query_and_chat/chat_backend.py index dd637dcf081..b25ed8357d9 100644 --- a/backend/ee/danswer/server/query_and_chat/chat_backend.py +++ b/backend/ee/danswer/server/query_and_chat/chat_backend.py @@ -176,6 +176,7 @@ def handle_simplified_chat_message( chunks_above=0, chunks_below=0, full_doc=chat_message_req.full_doc, + structured_response_format=chat_message_req.structured_response_format, ) packets = stream_chat_message_objects( @@ -202,7 +203,7 @@ def handle_send_message_simple_with_history( raise HTTPException(status_code=400, detail="Messages cannot be zero length") # This is a sanity check to make sure the chat history is valid - # It must start with a user message and alternate between user and assistant + # It must start with a user message and alternate beteen user and assistant expected_role = MessageType.USER for msg in req.messages: if not msg.message: @@ -296,6 +297,7 @@ def handle_send_message_simple_with_history( chunks_above=0, chunks_below=0, full_doc=req.full_doc, + structured_response_format=req.structured_response_format, ) packets = stream_chat_message_objects( diff --git a/backend/ee/danswer/server/query_and_chat/models.py b/backend/ee/danswer/server/query_and_chat/models.py index ec9db73ecff..4baf17ac8ce 100644 --- a/backend/ee/danswer/server/query_and_chat/models.py +++ b/backend/ee/danswer/server/query_and_chat/models.py @@ -1,3 +1,5 @@ +from uuid import UUID + from pydantic import BaseModel from pydantic import Field @@ -36,7 +38,7 @@ class BasicCreateChatMessageRequest(ChunkContext): Note, for simplicity this option only allows for a single linear chain of messages """ - chat_session_id: int + chat_session_id: UUID # New message contents message: str # Defaults to using retrieval with no additional filters @@ -46,6 +48,9 @@ class BasicCreateChatMessageRequest(ChunkContext): query_override: str | None = None # If search_doc_ids provided, then retrieval options are unused search_doc_ids: list[int] | None = None + # only works if using an OpenAI model. See the following for more details: + # https://platform.openai.com/docs/guides/structured-outputs/introduction + structured_response_format: dict | None = None class BasicCreateChatMessageWithHistoryRequest(ChunkContext): @@ -58,6 +63,9 @@ class BasicCreateChatMessageWithHistoryRequest(ChunkContext): skip_rerank: bool | None = None # If search_doc_ids provided, then retrieval options are unused search_doc_ids: list[int] | None = None + # only works if using an OpenAI model. See the following for more details: + # https://platform.openai.com/docs/guides/structured-outputs/introduction + structured_response_format: dict | None = None class SimpleDoc(BaseModel): diff --git a/backend/ee/danswer/server/query_and_chat/token_limit.py b/backend/ee/danswer/server/query_and_chat/token_limit.py index 538458fb63f..b4c588dc416 100644 --- a/backend/ee/danswer/server/query_and_chat/token_limit.py +++ b/backend/ee/danswer/server/query_and_chat/token_limit.py @@ -12,7 +12,7 @@ from sqlalchemy import select from sqlalchemy.orm import Session -from danswer.db.engine import get_session_context_manager +from danswer.db.engine import get_session_with_tenant from danswer.db.models import ChatMessage from danswer.db.models import ChatSession from danswer.db.models import TokenRateLimit @@ -28,21 +28,21 @@ from ee.danswer.db.token_limit import fetch_all_user_token_rate_limits -def _check_token_rate_limits(user: User | None) -> None: +def _check_token_rate_limits(user: User | None, tenant_id: str | None) -> None: if user is None: # Unauthenticated users are only rate limited by global settings - _user_is_rate_limited_by_global() + _user_is_rate_limited_by_global(tenant_id) elif is_api_key_email_address(user.email): # API keys are only rate limited by global settings - _user_is_rate_limited_by_global() + _user_is_rate_limited_by_global(tenant_id) else: run_functions_tuples_in_parallel( [ - (_user_is_rate_limited, (user.id,)), - (_user_is_rate_limited_by_group, (user.id,)), - (_user_is_rate_limited_by_global, ()), + (_user_is_rate_limited, (user.id, tenant_id)), + (_user_is_rate_limited_by_group, (user.id, tenant_id)), + (_user_is_rate_limited_by_global, (tenant_id,)), ] ) @@ -52,8 +52,8 @@ def _check_token_rate_limits(user: User | None) -> None: """ -def _user_is_rate_limited(user_id: UUID) -> None: - with get_session_context_manager() as db_session: +def _user_is_rate_limited(user_id: UUID, tenant_id: str | None) -> None: + with get_session_with_tenant(tenant_id) as db_session: user_rate_limits = fetch_all_user_token_rate_limits( db_session=db_session, enabled_only=True, ordered=False ) @@ -93,8 +93,8 @@ def _fetch_user_usage( """ -def _user_is_rate_limited_by_group(user_id: UUID) -> None: - with get_session_context_manager() as db_session: +def _user_is_rate_limited_by_group(user_id: UUID, tenant_id: str | None) -> None: + with get_session_with_tenant(tenant_id) as db_session: group_rate_limits = _fetch_all_user_group_rate_limits(user_id, db_session) if group_rate_limits: diff --git a/backend/ee/danswer/server/query_and_chat/utils.py b/backend/ee/danswer/server/query_and_chat/utils.py index a2f7253517a..be5507b01c2 100644 --- a/backend/ee/danswer/server/query_and_chat/utils.py +++ b/backend/ee/danswer/server/query_and_chat/utils.py @@ -12,7 +12,7 @@ from danswer.db.models import User from danswer.db.persona import get_prompts_by_ids from danswer.one_shot_answer.models import PersonaConfig -from danswer.tools.custom.custom_tool import ( +from danswer.tools.tool_implementations.custom.custom_tool import ( build_custom_tools_from_openapi_schema_and_headers, ) diff --git a/backend/ee/danswer/server/query_history/api.py b/backend/ee/danswer/server/query_history/api.py index 3fc0a98153a..f50b9cc5230 100644 --- a/backend/ee/danswer/server/query_history/api.py +++ b/backend/ee/danswer/server/query_history/api.py @@ -4,6 +4,7 @@ from datetime import timedelta from datetime import timezone from typing import Literal +from uuid import UUID from fastapi import APIRouter from fastapi import Depends @@ -19,10 +20,13 @@ from danswer.configs.constants import QAFeedbackType from danswer.configs.constants import SessionType from danswer.db.chat import get_chat_session_by_id +from danswer.db.chat import get_chat_sessions_by_user from danswer.db.engine import get_session from danswer.db.models import ChatMessage from danswer.db.models import ChatSession from danswer.db.models import User +from danswer.server.query_and_chat.models import ChatSessionDetails +from danswer.server.query_and_chat.models import ChatSessionsResponse from ee.danswer.db.query_history import fetch_chat_sessions_eagerly_by_time router = APIRouter() @@ -83,7 +87,7 @@ def build(cls, message: ChatMessage) -> "MessageSnapshot": class ChatSessionMinimal(BaseModel): - id: int + id: UUID user_email: str name: str | None first_user_message: str @@ -95,7 +99,7 @@ class ChatSessionMinimal(BaseModel): class ChatSessionSnapshot(BaseModel): - id: int + id: UUID user_email: str name: str | None messages: list[MessageSnapshot] @@ -105,7 +109,7 @@ class ChatSessionSnapshot(BaseModel): class QuestionAnswerPairSnapshot(BaseModel): - chat_session_id: int + chat_session_id: UUID # 1-indexed message number in the chat_session # e.g. the first message pair in the chat_session is 1, the second is 2, etc. message_pair_num: int @@ -329,6 +333,36 @@ def snapshot_from_chat_session( ) +@router.get("/admin/chat-sessions") +def get_user_chat_sessions( + user_id: UUID, + _: User | None = Depends(current_admin_user), + db_session: Session = Depends(get_session), +) -> ChatSessionsResponse: + try: + chat_sessions = get_chat_sessions_by_user( + user_id=user_id, deleted=False, db_session=db_session, limit=0 + ) + + except ValueError: + raise ValueError("Chat session does not exist or has been deleted") + + return ChatSessionsResponse( + sessions=[ + ChatSessionDetails( + id=chat.id, + name=chat.description, + persona_id=chat.persona_id, + time_created=chat.time_created.isoformat(), + shared_status=chat.shared_status, + folder_id=chat.folder_id, + current_alternate_model=chat.current_alternate_model, + ) + for chat in chat_sessions + ] + ) + + @router.get("/admin/chat-session-history") def get_chat_session_history( feedback_type: QAFeedbackType | None = None, @@ -350,7 +384,7 @@ def get_chat_session_history( @router.get("/admin/chat-session-history/{chat_session_id}") def get_chat_session_admin( - chat_session_id: int, + chat_session_id: UUID, _: User | None = Depends(current_admin_user), db_session: Session = Depends(get_session), ) -> ChatSessionSnapshot: diff --git a/backend/ee/danswer/server/reporting/usage_export_models.py b/backend/ee/danswer/server/reporting/usage_export_models.py index 98d9021f816..21cd104e862 100644 --- a/backend/ee/danswer/server/reporting/usage_export_models.py +++ b/backend/ee/danswer/server/reporting/usage_export_models.py @@ -1,5 +1,6 @@ from datetime import datetime from enum import Enum +from uuid import UUID from pydantic import BaseModel @@ -14,7 +15,7 @@ class FlowType(str, Enum): class ChatMessageSkeleton(BaseModel): message_id: int - chat_session_id: int + chat_session_id: UUID user_id: str | None flow_type: FlowType time_sent: datetime diff --git a/backend/ee/danswer/server/tenants/__init__.py b/backend/ee/danswer/server/tenants/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/backend/ee/danswer/server/tenants/access.py b/backend/ee/danswer/server/tenants/access.py new file mode 100644 index 00000000000..255e6c0ea94 --- /dev/null +++ b/backend/ee/danswer/server/tenants/access.py @@ -0,0 +1,53 @@ +from datetime import datetime +from datetime import timedelta + +import jwt +from fastapi import HTTPException +from fastapi import Request + +from danswer.configs.app_configs import DATA_PLANE_SECRET +from danswer.configs.app_configs import EXPECTED_API_KEY +from danswer.configs.app_configs import JWT_ALGORITHM +from danswer.utils.logger import setup_logger + +logger = setup_logger() + + +def generate_data_plane_token() -> str: + if DATA_PLANE_SECRET is None: + raise ValueError("DATA_PLANE_SECRET is not set") + + payload = { + "iss": "data_plane", + "exp": datetime.utcnow() + timedelta(minutes=5), + "iat": datetime.utcnow(), + "scope": "api_access", + } + + token = jwt.encode(payload, DATA_PLANE_SECRET, algorithm=JWT_ALGORITHM) + return token + + +async def control_plane_dep(request: Request) -> None: + api_key = request.headers.get("X-API-KEY") + if api_key != EXPECTED_API_KEY: + logger.warning("Invalid API key") + raise HTTPException(status_code=401, detail="Invalid API key") + + auth_header = request.headers.get("Authorization") + if not auth_header or not auth_header.startswith("Bearer "): + logger.warning("Invalid authorization header") + raise HTTPException(status_code=401, detail="Invalid authorization header") + + token = auth_header.split(" ")[1] + try: + payload = jwt.decode(token, DATA_PLANE_SECRET, algorithms=[JWT_ALGORITHM]) + if payload.get("scope") != "tenant:create": + logger.warning("Insufficient permissions") + raise HTTPException(status_code=403, detail="Insufficient permissions") + except jwt.ExpiredSignatureError: + logger.warning("Token has expired") + raise HTTPException(status_code=401, detail="Token has expired") + except jwt.InvalidTokenError: + logger.warning("Invalid token") + raise HTTPException(status_code=401, detail="Invalid token") diff --git a/backend/ee/danswer/server/tenants/api.py b/backend/ee/danswer/server/tenants/api.py new file mode 100644 index 00000000000..8e79c0b37b1 --- /dev/null +++ b/backend/ee/danswer/server/tenants/api.py @@ -0,0 +1,170 @@ +import stripe +from fastapi import APIRouter +from fastapi import Depends +from fastapi import HTTPException +from fastapi import Response + +from danswer.auth.users import auth_backend +from danswer.auth.users import current_admin_user +from danswer.auth.users import get_jwt_strategy +from danswer.auth.users import get_tenant_id_for_email +from danswer.auth.users import User +from danswer.configs.app_configs import WEB_DOMAIN +from danswer.db.engine import get_session_with_tenant +from danswer.db.notification import create_notification +from danswer.db.users import get_user_by_email +from danswer.server.settings.store import load_settings +from danswer.server.settings.store import store_settings +from danswer.setup import setup_danswer +from danswer.utils.logger import setup_logger +from ee.danswer.auth.users import current_cloud_superuser +from ee.danswer.configs.app_configs import STRIPE_SECRET_KEY +from ee.danswer.server.tenants.access import control_plane_dep +from ee.danswer.server.tenants.billing import fetch_billing_information +from ee.danswer.server.tenants.billing import fetch_tenant_stripe_information +from ee.danswer.server.tenants.models import BillingInformation +from ee.danswer.server.tenants.models import CreateTenantRequest +from ee.danswer.server.tenants.models import ImpersonateRequest +from ee.danswer.server.tenants.models import ProductGatingRequest +from ee.danswer.server.tenants.provisioning import add_users_to_tenant +from ee.danswer.server.tenants.provisioning import configure_default_api_keys +from ee.danswer.server.tenants.provisioning import ensure_schema_exists +from ee.danswer.server.tenants.provisioning import run_alembic_migrations +from ee.danswer.server.tenants.provisioning import user_owns_a_tenant +from shared_configs.configs import MULTI_TENANT +from shared_configs.contextvars import CURRENT_TENANT_ID_CONTEXTVAR + +stripe.api_key = STRIPE_SECRET_KEY + +logger = setup_logger() +router = APIRouter(prefix="/tenants") + + +@router.post("/create") +def create_tenant( + create_tenant_request: CreateTenantRequest, _: None = Depends(control_plane_dep) +) -> dict[str, str]: + if not MULTI_TENANT: + raise HTTPException(status_code=403, detail="Multi-tenancy is not enabled") + + tenant_id = create_tenant_request.tenant_id + email = create_tenant_request.initial_admin_email + token = None + + if user_owns_a_tenant(email): + raise HTTPException( + status_code=409, detail="User already belongs to an organization" + ) + + try: + if not ensure_schema_exists(tenant_id): + logger.info(f"Created schema for tenant {tenant_id}") + else: + logger.info(f"Schema already exists for tenant {tenant_id}") + + token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id) + run_alembic_migrations(tenant_id) + + with get_session_with_tenant(tenant_id) as db_session: + setup_danswer(db_session, tenant_id) + + configure_default_api_keys(db_session) + + add_users_to_tenant([email], tenant_id) + + return { + "status": "success", + "message": f"Tenant {tenant_id} created successfully", + } + except Exception as e: + logger.exception(f"Failed to create tenant {tenant_id}: {str(e)}") + raise HTTPException( + status_code=500, detail=f"Failed to create tenant: {str(e)}" + ) + finally: + if token is not None: + CURRENT_TENANT_ID_CONTEXTVAR.reset(token) + + +@router.post("/product-gating") +def gate_product( + product_gating_request: ProductGatingRequest, _: None = Depends(control_plane_dep) +) -> None: + """ + Gating the product means that the product is not available to the tenant. + They will be directed to the billing page. + We gate the product when + 1) User has ended free trial without adding payment method + 2) User's card has declined + """ + tenant_id = product_gating_request.tenant_id + token = CURRENT_TENANT_ID_CONTEXTVAR.set(tenant_id) + + settings = load_settings() + settings.product_gating = product_gating_request.product_gating + store_settings(settings) + + if product_gating_request.notification: + with get_session_with_tenant(tenant_id) as db_session: + create_notification(None, product_gating_request.notification, db_session) + + if token is not None: + CURRENT_TENANT_ID_CONTEXTVAR.reset(token) + + +@router.get("/billing-information", response_model=BillingInformation) +async def billing_information( + _: User = Depends(current_admin_user), +) -> BillingInformation: + logger.info("Fetching billing information") + return BillingInformation( + **fetch_billing_information(CURRENT_TENANT_ID_CONTEXTVAR.get()) + ) + + +@router.post("/create-customer-portal-session") +async def create_customer_portal_session(_: User = Depends(current_admin_user)) -> dict: + try: + # Fetch tenant_id and current tenant's information + tenant_id = CURRENT_TENANT_ID_CONTEXTVAR.get() + stripe_info = fetch_tenant_stripe_information(tenant_id) + stripe_customer_id = stripe_info.get("stripe_customer_id") + if not stripe_customer_id: + raise HTTPException(status_code=400, detail="Stripe customer ID not found") + logger.info(stripe_customer_id) + portal_session = stripe.billing_portal.Session.create( + customer=stripe_customer_id, + return_url=f"{WEB_DOMAIN}/admin/cloud-settings", + ) + logger.info(portal_session) + return {"url": portal_session.url} + except Exception as e: + logger.exception("Failed to create customer portal session") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.post("/impersonate") +async def impersonate_user( + impersonate_request: ImpersonateRequest, + _: User = Depends(current_cloud_superuser), +) -> Response: + """Allows a cloud superuser to impersonate another user by generating an impersonation JWT token""" + tenant_id = get_tenant_id_for_email(impersonate_request.email) + + with get_session_with_tenant(tenant_id) as tenant_session: + user_to_impersonate = get_user_by_email( + impersonate_request.email, tenant_session + ) + if user_to_impersonate is None: + raise HTTPException(status_code=404, detail="User not found") + token = await get_jwt_strategy().write_token(user_to_impersonate) + + response = await auth_backend.transport.get_login_response(token) + response.set_cookie( + key="fastapiusersauth", + value=token, + httponly=True, + secure=True, + samesite="lax", + ) + return response diff --git a/backend/ee/danswer/server/tenants/billing.py b/backend/ee/danswer/server/tenants/billing.py new file mode 100644 index 00000000000..681ac835e5f --- /dev/null +++ b/backend/ee/danswer/server/tenants/billing.py @@ -0,0 +1,67 @@ +from typing import cast + +import requests +import stripe + +from danswer.configs.app_configs import CONTROL_PLANE_API_BASE_URL +from danswer.utils.logger import setup_logger +from ee.danswer.configs.app_configs import STRIPE_PRICE_ID +from ee.danswer.configs.app_configs import STRIPE_SECRET_KEY +from ee.danswer.server.tenants.access import generate_data_plane_token + +stripe.api_key = STRIPE_SECRET_KEY + +logger = setup_logger() + + +def fetch_tenant_stripe_information(tenant_id: str) -> dict: + token = generate_data_plane_token() + headers = { + "Authorization": f"Bearer {token}", + "Content-Type": "application/json", + } + url = f"{CONTROL_PLANE_API_BASE_URL}/tenant-stripe-information" + params = {"tenant_id": tenant_id} + response = requests.get(url, headers=headers, params=params) + response.raise_for_status() + return response.json() + + +def fetch_billing_information(tenant_id: str) -> dict: + logger.info("Fetching billing information") + token = generate_data_plane_token() + headers = { + "Authorization": f"Bearer {token}", + "Content-Type": "application/json", + } + url = f"{CONTROL_PLANE_API_BASE_URL}/billing-information" + params = {"tenant_id": tenant_id} + response = requests.get(url, headers=headers, params=params) + response.raise_for_status() + billing_info = response.json() + return billing_info + + +def register_tenant_users(tenant_id: str, number_of_users: int) -> stripe.Subscription: + """ + Send a request to the control service to register the number of users for a tenant. + """ + if not STRIPE_PRICE_ID: + raise Exception("STRIPE_PRICE_ID is not set") + + response = fetch_tenant_stripe_information(tenant_id) + stripe_subscription_id = cast(str, response.get("stripe_subscription_id")) + + subscription = stripe.Subscription.retrieve(stripe_subscription_id) + updated_subscription = stripe.Subscription.modify( + stripe_subscription_id, + items=[ + { + "id": subscription["items"]["data"][0].id, + "price": STRIPE_PRICE_ID, + "quantity": number_of_users, + } + ], + metadata={"tenant_id": str(tenant_id)}, + ) + return updated_subscription diff --git a/backend/ee/danswer/server/tenants/models.py b/backend/ee/danswer/server/tenants/models.py new file mode 100644 index 00000000000..2c1fdbecdb3 --- /dev/null +++ b/backend/ee/danswer/server/tenants/models.py @@ -0,0 +1,35 @@ +from pydantic import BaseModel + +from danswer.configs.constants import NotificationType +from danswer.server.settings.models import GatingType + + +class CheckoutSessionCreationRequest(BaseModel): + quantity: int + + +class CreateTenantRequest(BaseModel): + tenant_id: str + initial_admin_email: str + + +class ProductGatingRequest(BaseModel): + tenant_id: str + product_gating: GatingType + notification: NotificationType | None = None + + +class BillingInformation(BaseModel): + seats: int + subscription_status: str + billing_start: str + billing_end: str + payment_method_enabled: bool + + +class CheckoutSessionCreationResponse(BaseModel): + id: str + + +class ImpersonateRequest(BaseModel): + email: str diff --git a/backend/ee/danswer/server/tenants/provisioning.py b/backend/ee/danswer/server/tenants/provisioning.py new file mode 100644 index 00000000000..9106821b5a5 --- /dev/null +++ b/backend/ee/danswer/server/tenants/provisioning.py @@ -0,0 +1,145 @@ +import os +from types import SimpleNamespace + +from sqlalchemy import text +from sqlalchemy.orm import Session +from sqlalchemy.schema import CreateSchema + +from alembic import command +from alembic.config import Config +from danswer.db.engine import build_connection_string +from danswer.db.engine import get_session_with_tenant +from danswer.db.engine import get_sqlalchemy_engine +from danswer.db.llm import upsert_cloud_embedding_provider +from danswer.db.llm import upsert_llm_provider +from danswer.db.models import UserTenantMapping +from danswer.server.manage.embedding.models import CloudEmbeddingProviderCreationRequest +from danswer.server.manage.llm.models import LLMProviderUpsertRequest +from danswer.utils.logger import setup_logger +from ee.danswer.configs.app_configs import ANTHROPIC_DEFAULT_API_KEY +from ee.danswer.configs.app_configs import COHERE_DEFAULT_API_KEY +from ee.danswer.configs.app_configs import OPENAI_DEFAULT_API_KEY +from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA +from shared_configs.enums import EmbeddingProvider + +logger = setup_logger() + + +def run_alembic_migrations(schema_name: str) -> None: + logger.info(f"Starting Alembic migrations for schema: {schema_name}") + + try: + current_dir = os.path.dirname(os.path.abspath(__file__)) + root_dir = os.path.abspath(os.path.join(current_dir, "..", "..", "..", "..")) + alembic_ini_path = os.path.join(root_dir, "alembic.ini") + + # Configure Alembic + alembic_cfg = Config(alembic_ini_path) + alembic_cfg.set_main_option("sqlalchemy.url", build_connection_string()) + alembic_cfg.set_main_option( + "script_location", os.path.join(root_dir, "alembic") + ) + + # Ensure that logging isn't broken + alembic_cfg.attributes["configure_logger"] = False + + # Mimic command-line options by adding 'cmd_opts' to the config + alembic_cfg.cmd_opts = SimpleNamespace() # type: ignore + alembic_cfg.cmd_opts.x = [f"schema={schema_name}"] # type: ignore + + # Run migrations programmatically + command.upgrade(alembic_cfg, "head") + + # Run migrations programmatically + logger.info( + f"Alembic migrations completed successfully for schema: {schema_name}" + ) + + except Exception as e: + logger.exception(f"Alembic migration failed for schema {schema_name}: {str(e)}") + raise + + +def configure_default_api_keys(db_session: Session) -> None: + open_provider = LLMProviderUpsertRequest( + name="OpenAI", + provider="OpenAI", + api_key=OPENAI_DEFAULT_API_KEY, + default_model_name="gpt-4o", + ) + anthropic_provider = LLMProviderUpsertRequest( + name="Anthropic", + provider="Anthropic", + api_key=ANTHROPIC_DEFAULT_API_KEY, + default_model_name="claude-3-5-sonnet-20240620", + ) + upsert_llm_provider(open_provider, db_session) + upsert_llm_provider(anthropic_provider, db_session) + + cloud_embedding_provider = CloudEmbeddingProviderCreationRequest( + provider_type=EmbeddingProvider.COHERE, + api_key=COHERE_DEFAULT_API_KEY, + ) + upsert_cloud_embedding_provider(db_session, cloud_embedding_provider) + + +def ensure_schema_exists(tenant_id: str) -> bool: + with Session(get_sqlalchemy_engine()) as db_session: + with db_session.begin(): + result = db_session.execute( + text( + "SELECT schema_name FROM information_schema.schemata WHERE schema_name = :schema_name" + ), + {"schema_name": tenant_id}, + ) + schema_exists = result.scalar() is not None + if not schema_exists: + stmt = CreateSchema(tenant_id) + db_session.execute(stmt) + return True + return False + + +# For now, we're implementing a primitive mapping between users and tenants. +# This function is only used to determine a user's relationship to a tenant upon creation (implying ownership). +def user_owns_a_tenant(email: str) -> bool: + with get_session_with_tenant(POSTGRES_DEFAULT_SCHEMA) as db_session: + result = ( + db_session.query(UserTenantMapping) + .filter(UserTenantMapping.email == email) + .first() + ) + return result is not None + + +def add_users_to_tenant(emails: list[str], tenant_id: str) -> None: + with get_session_with_tenant(POSTGRES_DEFAULT_SCHEMA) as db_session: + try: + for email in emails: + db_session.add(UserTenantMapping(email=email, tenant_id=tenant_id)) + except Exception as e: + logger.exception(f"Failed to add users to tenant {tenant_id}: {str(e)}") + db_session.commit() + + +def remove_users_from_tenant(emails: list[str], tenant_id: str) -> None: + with get_session_with_tenant(POSTGRES_DEFAULT_SCHEMA) as db_session: + try: + mappings_to_delete = ( + db_session.query(UserTenantMapping) + .filter( + UserTenantMapping.email.in_(emails), + UserTenantMapping.tenant_id == tenant_id, + ) + .all() + ) + + for mapping in mappings_to_delete: + db_session.delete(mapping) + + db_session.commit() + except Exception as e: + logger.exception( + f"Failed to remove users from tenant {tenant_id}: {str(e)}" + ) + db_session.rollback() diff --git a/backend/model_server/encoders.py b/backend/model_server/encoders.py index 860151b3dc4..003953cb29a 100644 --- a/backend/model_server/encoders.py +++ b/backend/model_server/encoders.py @@ -1,5 +1,5 @@ import json -from typing import Any +from typing import cast from typing import Optional import httpx @@ -10,6 +10,7 @@ from fastapi import APIRouter from fastapi import HTTPException from google.oauth2 import service_account # type: ignore +from litellm import embedding from retry import retry from sentence_transformers import CrossEncoder # type: ignore from sentence_transformers import SentenceTransformer # type: ignore @@ -24,7 +25,9 @@ from model_server.constants import EmbeddingModelTextType from model_server.constants import EmbeddingProvider from model_server.utils import simple_log_function_time +from shared_configs.configs import API_BASED_EMBEDDING_TIMEOUT from shared_configs.configs import INDEXING_ONLY +from shared_configs.configs import OPENAI_EMBEDDING_TIMEOUT from shared_configs.enums import EmbedTextType from shared_configs.enums import RerankerProvider from shared_configs.model_server_models import Embedding @@ -52,48 +55,29 @@ _COHERE_MAX_INPUT_LEN = 96 -def _initialize_client( - api_key: str, provider: EmbeddingProvider, model: str | None = None -) -> Any: - if provider == EmbeddingProvider.OPENAI: - return openai.OpenAI(api_key=api_key) - elif provider == EmbeddingProvider.COHERE: - return CohereClient(api_key=api_key) - elif provider == EmbeddingProvider.VOYAGE: - return voyageai.Client(api_key=api_key) - elif provider == EmbeddingProvider.GOOGLE: - credentials = service_account.Credentials.from_service_account_info( - json.loads(api_key) - ) - project_id = json.loads(api_key)["project_id"] - vertexai.init(project=project_id, credentials=credentials) - return TextEmbeddingModel.from_pretrained(model or DEFAULT_VERTEX_MODEL) - else: - raise ValueError(f"Unsupported provider: {provider}") - - class CloudEmbedding: def __init__( self, api_key: str, provider: EmbeddingProvider, - # Only for Google as is needed on client setup - model: str | None = None, + api_url: str | None = None, + api_version: str | None = None, ) -> None: self.provider = provider - self.client = _initialize_client(api_key, self.provider, model) + self.api_key = api_key + self.api_url = api_url + self.api_version = api_version def _embed_openai(self, texts: list[str], model: str | None) -> list[Embedding]: if not model: model = DEFAULT_OPENAI_MODEL - # OpenAI does not seem to provide truncation option, however - # the context lengths used by Danswer currently are smaller than the max token length - # for OpenAI embeddings so it's not a big deal + client = openai.OpenAI(api_key=self.api_key, timeout=OPENAI_EMBEDDING_TIMEOUT) + final_embeddings: list[Embedding] = [] try: for text_batch in batch_list(texts, _OPENAI_MAX_INPUT_LEN): - response = self.client.embeddings.create(input=text_batch, model=model) + response = client.embeddings.create(input=text_batch, model=model) final_embeddings.extend( [embedding.embedding for embedding in response.data] ) @@ -114,17 +98,19 @@ def _embed_cohere( if not model: model = DEFAULT_COHERE_MODEL + client = CohereClient(api_key=self.api_key, timeout=API_BASED_EMBEDDING_TIMEOUT) + final_embeddings: list[Embedding] = [] for text_batch in batch_list(texts, _COHERE_MAX_INPUT_LEN): # Does not use the same tokenizer as the Danswer API server but it's approximately the same # empirically it's only off by a very few tokens so it's not a big deal - response = self.client.embed( + response = client.embed( texts=text_batch, model=model, input_type=embedding_type, truncate="END", ) - final_embeddings.extend(response.embeddings) + final_embeddings.extend(cast(list[Embedding], response.embeddings)) return final_embeddings def _embed_voyage( @@ -133,23 +119,45 @@ def _embed_voyage( if not model: model = DEFAULT_VOYAGE_MODEL - # Similar to Cohere, the API server will do approximate size chunking - # it's acceptable to miss by a few tokens - response = self.client.embed( + client = voyageai.Client( + api_key=self.api_key, timeout=API_BASED_EMBEDDING_TIMEOUT + ) + + response = client.embed( texts, model=model, input_type=embedding_type, - truncation=True, # Also this is default + truncation=True, ) return response.embeddings + def _embed_azure(self, texts: list[str], model: str | None) -> list[Embedding]: + response = embedding( + model=model, + input=texts, + timeout=API_BASED_EMBEDDING_TIMEOUT, + api_key=self.api_key, + api_base=self.api_url, + api_version=self.api_version, + ) + embeddings = [embedding["embedding"] for embedding in response.data] + + return embeddings + def _embed_vertex( self, texts: list[str], model: str | None, embedding_type: str ) -> list[Embedding]: if not model: model = DEFAULT_VERTEX_MODEL - embeddings = self.client.get_embeddings( + credentials = service_account.Credentials.from_service_account_info( + json.loads(self.api_key) + ) + project_id = json.loads(self.api_key)["project_id"] + vertexai.init(project=project_id, credentials=credentials) + client = TextEmbeddingModel.from_pretrained(model) + + embeddings = client.get_embeddings( [ TextEmbeddingInput( text, @@ -161,6 +169,33 @@ def _embed_vertex( ) return [embedding.values for embedding in embeddings] + def _embed_litellm_proxy( + self, texts: list[str], model_name: str | None + ) -> list[Embedding]: + if not model_name: + raise ValueError("Model name is required for LiteLLM proxy embedding.") + + if not self.api_url: + raise ValueError("API URL is required for LiteLLM proxy embedding.") + + headers = ( + {} if not self.api_key else {"Authorization": f"Bearer {self.api_key}"} + ) + + with httpx.Client() as client: + response = client.post( + self.api_url, + json={ + "model": model_name, + "input": texts, + }, + headers=headers, + timeout=API_BASED_EMBEDDING_TIMEOUT, + ) + response.raise_for_status() + result = response.json() + return [embedding["embedding"] for embedding in result["data"]] + @retry(tries=_RETRY_TRIES, delay=_RETRY_DELAY) def embed( self, @@ -168,10 +203,16 @@ def embed( texts: list[str], text_type: EmbedTextType, model_name: str | None = None, + deployment_name: str | None = None, ) -> list[Embedding]: try: if self.provider == EmbeddingProvider.OPENAI: return self._embed_openai(texts, model_name) + elif self.provider == EmbeddingProvider.AZURE: + return self._embed_azure(texts, f"azure/{deployment_name}") + elif self.provider == EmbeddingProvider.LITELLM: + return self._embed_litellm_proxy(texts, model_name) + embedding_type = EmbeddingModelTextType.get_type(self.provider, text_type) if self.provider == EmbeddingProvider.COHERE: return self._embed_cohere(texts, model_name, embedding_type) @@ -189,10 +230,13 @@ def embed( @staticmethod def create( - api_key: str, provider: EmbeddingProvider, model: str | None = None + api_key: str, + provider: EmbeddingProvider, + api_url: str | None = None, + api_version: str | None = None, ) -> "CloudEmbedding": logger.debug(f"Creating Embedding instance for provider: {provider}") - return CloudEmbedding(api_key, provider, model) + return CloudEmbedding(api_key, provider, api_url, api_version) def get_embedding_model( @@ -235,36 +279,19 @@ def get_local_reranking_model( return _RERANK_MODEL -def embed_with_litellm_proxy( - texts: list[str], api_url: str, model_name: str, api_key: str | None -) -> list[Embedding]: - headers = {} if not api_key else {"Authorization": f"Bearer {api_key}"} - - with httpx.Client() as client: - response = client.post( - api_url, - json={ - "model": model_name, - "input": texts, - }, - headers=headers, - ) - response.raise_for_status() - result = response.json() - return [embedding["embedding"] for embedding in result["data"]] - - @simple_log_function_time() def embed_text( texts: list[str], text_type: EmbedTextType, model_name: str | None, + deployment_name: str | None, max_context_length: int, normalize_embeddings: bool, api_key: str | None, provider_type: EmbeddingProvider | None, prefix: str | None, api_url: str | None, + api_version: str | None, ) -> list[Embedding]: logger.info(f"Embedding {len(texts)} texts with provider: {provider_type}") @@ -276,23 +303,7 @@ def embed_text( logger.error("No texts provided for embedding") raise ValueError("No texts provided for embedding.") - if provider_type == EmbeddingProvider.LITELLM: - logger.debug(f"Using LiteLLM proxy for embedding with URL: {api_url}") - if not api_url: - logger.error("API URL not provided for LiteLLM proxy") - raise ValueError("API URL is required for LiteLLM proxy embedding.") - try: - return embed_with_litellm_proxy( - texts=texts, - api_url=api_url, - model_name=model_name or "", - api_key=api_key, - ) - except Exception as e: - logger.exception(f"Error during LiteLLM proxy embedding: {str(e)}") - raise - - elif provider_type is not None: + if provider_type is not None: logger.debug(f"Using cloud provider {provider_type} for embedding") if api_key is None: logger.error("API key not provided for cloud model") @@ -306,11 +317,15 @@ def embed_text( ) cloud_model = CloudEmbedding( - api_key=api_key, provider=provider_type, model=model_name + api_key=api_key, + provider=provider_type, + api_url=api_url, + api_version=api_version, ) embeddings = cloud_model.embed( texts=texts, model_name=model_name, + deployment_name=deployment_name, text_type=text_type, ) @@ -404,12 +419,14 @@ async def process_embed_request( embeddings = embed_text( texts=embed_request.texts, model_name=embed_request.model_name, + deployment_name=embed_request.deployment_name, max_context_length=embed_request.max_context_length, normalize_embeddings=embed_request.normalize_embeddings, api_key=embed_request.api_key, provider_type=embed_request.provider_type, text_type=embed_request.text_type, api_url=embed_request.api_url, + api_version=embed_request.api_version, prefix=prefix, ) return EmbedResponse(embeddings=embeddings) diff --git a/backend/model_server/main.py b/backend/model_server/main.py index 5c7979475c7..ce9cc724a98 100644 --- a/backend/model_server/main.py +++ b/backend/model_server/main.py @@ -4,9 +4,12 @@ from contextlib import asynccontextmanager from pathlib import Path +import sentry_sdk import torch import uvicorn from fastapi import FastAPI +from sentry_sdk.integrations.fastapi import FastApiIntegration +from sentry_sdk.integrations.starlette import StarletteIntegration from transformers import logging as transformer_logging # type:ignore from danswer import __version__ @@ -19,6 +22,7 @@ from shared_configs.configs import MIN_THREADS_ML_MODELS from shared_configs.configs import MODEL_SERVER_ALLOWED_HOST from shared_configs.configs import MODEL_SERVER_PORT +from shared_configs.configs import SENTRY_DSN os.environ["TOKENIZERS_PARALLELISM"] = "false" os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1" @@ -81,6 +85,15 @@ def get_model_app() -> FastAPI: application = FastAPI( title="Danswer Model Server", version=__version__, lifespan=lifespan ) + if SENTRY_DSN: + sentry_sdk.init( + dsn=SENTRY_DSN, + integrations=[StarletteIntegration(), FastApiIntegration()], + traces_sample_rate=0.1, + ) + logger.info("Sentry initialized") + else: + logger.debug("Sentry DSN not provided, skipping Sentry initialization") application.include_router(management_router) application.include_router(encoders_router) diff --git a/backend/pyproject.toml b/backend/pyproject.toml index a9cf3650e13..d32255d9f65 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -4,6 +4,14 @@ mypy_path = "$MYPY_CONFIG_FILE_DIR" explicit_package_bases = true disallow_untyped_defs = true +[[tool.mypy.overrides]] +module = "alembic.versions.*" +disable_error_code = ["var-annotated"] + +[[tool.mypy.overrides]] +module = "alembic_tenants.versions.*" +disable_error_code = ["var-annotated"] + [tool.ruff] ignore = [] line-length = 130 diff --git a/backend/requirements/default.txt b/backend/requirements/default.txt index 2b7da8b84d4..c06ab2ca7cf 100644 --- a/backend/requirements/default.txt +++ b/backend/requirements/default.txt @@ -1,8 +1,8 @@ aiohttp==3.10.2 alembic==1.10.4 asyncpg==0.27.0 -atlassian-python-api==3.37.0 -beautifulsoup4==4.12.2 +atlassian-python-api==3.41.16 +beautifulsoup4==4.12.3 boto3==1.34.84 celery==5.5.0b4 chardet==5.2.0 @@ -19,23 +19,26 @@ google-auth-oauthlib==1.0.0 # GPT4All library has issues running on Macs and python:3.11.4-slim-bookworm # will reintroduce this when library version catches up # gpt4all==2.0.2 -httpcore==0.16.3 -httpx[http2]==0.23.3 -httpx-oauth==0.11.2 +httpcore==1.0.5 +httpx[http2]==0.27.0 +httpx-oauth==0.15.1 huggingface-hub==0.20.1 jira==3.5.1 jsonref==1.1.0 +trafilatura==1.12.2 langchain==0.1.17 langchain-core==0.1.50 langchain-text-splitters==0.0.1 -litellm==1.48.7 +litellm==1.50.2 +lxml==5.3.0 +lxml_html_clean==0.2.2 llama-index==0.9.45 Mako==1.2.4 msal==1.28.0 nltk==3.8.1 Office365-REST-Python-Client==2.5.9 oauthlib==3.2.2 -openai==1.47.0 +openai==1.52.2 openpyxl==3.1.2 playwright==1.41.2 psutil==5.9.5 @@ -46,7 +49,7 @@ PyGithub==1.58.2 python-dateutil==2.8.2 python-gitlab==3.9.0 python-pptx==0.6.23 -pypdf==3.17.0 +pypdf==4.3.0 pytest-mock==3.12.0 pytest-playwright==0.3.2 python-docx==1.1.2 @@ -58,7 +61,6 @@ requests==2.32.2 requests-oauthlib==1.3.1 retry==0.9.2 # This pulls in py which is in CVE-2022-42969, must remove py from image rfc3986==1.5.0 -rt==3.1.2 simple-salesforce==1.12.6 slack-sdk==3.20.2 SQLAlchemy[mypy]==2.0.15 @@ -67,11 +69,15 @@ supervisor==4.2.5 tiktoken==0.7.0 timeago==1.0.16 transformers==4.39.2 +unstructured==0.15.1 +unstructured-client==0.25.4 uvicorn==0.21.1 zulip==0.8.2 hubspot-api-client==8.1.0 asana==5.0.8 -zenpy==2.0.41 dropbox==11.36.2 boto3-stubs[s3]==1.34.133 -ultimate_sitemap_parser==0.5 +stripe==10.12.0 +urllib3==2.2.3 +mistune==0.8.4 +sentry-sdk==2.14.0 diff --git a/backend/requirements/dev.txt b/backend/requirements/dev.txt index 881920af7f2..84c1c9ea4bf 100644 --- a/backend/requirements/dev.txt +++ b/backend/requirements/dev.txt @@ -11,6 +11,7 @@ types-beautifulsoup4==4.12.0.3 types-html5lib==1.1.11.13 types-oauthlib==3.2.0.9 types-setuptools==68.0.0.3 +types-Pillow==10.2.0.20240822 types-passlib==1.7.7.20240106 types-psutil==5.9.5.17 types-psycopg2==2.9.21.10 @@ -20,4 +21,9 @@ types-regex==2023.3.23.1 types-requests==2.28.11.17 types-retry==0.9.9.3 types-urllib3==1.26.25.11 -boto3-stubs[s3]==1.34.133 \ No newline at end of file +trafilatura==1.12.2 +lxml==5.3.0 +lxml_html_clean==0.2.2 +boto3-stubs[s3]==1.34.133 +pandas==2.2.3 +pandas-stubs==2.2.3.241009 \ No newline at end of file diff --git a/backend/requirements/ee.txt b/backend/requirements/ee.txt index 0717e3a67e7..1ca9c7eb924 100644 --- a/backend/requirements/ee.txt +++ b/backend/requirements/ee.txt @@ -1 +1 @@ -python3-saml==1.15.0 +python3-saml==1.15.0 \ No newline at end of file diff --git a/backend/requirements/model_server.txt b/backend/requirements/model_server.txt index 1e7baa415ee..3bc32a8f6d7 100644 --- a/backend/requirements/model_server.txt +++ b/backend/requirements/model_server.txt @@ -1,9 +1,9 @@ -cohere==5.6.1 einops==0.8.0 +cohere==5.6.1 fastapi==0.109.2 google-cloud-aiplatform==1.58.0 numpy==1.26.4 -openai==1.47.0 +openai==1.52.2 pydantic==2.8.2 retry==0.9.2 safetensors==0.4.2 @@ -12,3 +12,5 @@ torch==2.2.0 transformers==4.39.2 uvicorn==0.21.1 voyageai==0.2.3 +litellm==1.50.2 +sentry-sdk[fastapi,celery,starlette]==2.14.0 \ No newline at end of file diff --git a/backend/scripts/add_connector_creation_script.py b/backend/scripts/add_connector_creation_script.py new file mode 100644 index 00000000000..9a1944080c0 --- /dev/null +++ b/backend/scripts/add_connector_creation_script.py @@ -0,0 +1,148 @@ +from typing import Any +from typing import Dict + +import requests + +API_SERVER_URL = "http://localhost:3000" # Adjust this to your Danswer server URL +HEADERS = {"Content-Type": "application/json"} +API_KEY = "danswer-api-key" # API key here, if auth is enabled + + +def create_connector( + name: str, + source: str, + input_type: str, + connector_specific_config: Dict[str, Any], + is_public: bool = True, + groups: list[int] | None = None, +) -> Dict[str, Any]: + connector_update_request = { + "name": name, + "source": source, + "input_type": input_type, + "connector_specific_config": connector_specific_config, + "is_public": is_public, + "groups": groups or [], + } + + response = requests.post( + url=f"{API_SERVER_URL}/api/manage/admin/connector", + json=connector_update_request, + headers=HEADERS, + ) + response.raise_for_status() + return response.json() + + +def create_credential( + name: str, + source: str, + credential_json: Dict[str, Any], + is_public: bool = True, + groups: list[int] | None = None, +) -> Dict[str, Any]: + credential_request = { + "name": name, + "source": source, + "credential_json": credential_json, + "admin_public": is_public, + "groups": groups or [], + } + + response = requests.post( + url=f"{API_SERVER_URL}/api/manage/credential", + json=credential_request, + headers=HEADERS, + ) + response.raise_for_status() + return response.json() + + +def create_cc_pair( + connector_id: int, + credential_id: int, + name: str, + access_type: str = "public", + groups: list[int] | None = None, +) -> Dict[str, Any]: + cc_pair_request = { + "name": name, + "access_type": access_type, + "groups": groups or [], + } + + response = requests.put( + url=f"{API_SERVER_URL}/api/manage/connector/{connector_id}/credential/{credential_id}", + json=cc_pair_request, + headers=HEADERS, + ) + response.raise_for_status() + return response.json() + + +def main() -> None: + # Create a Web connector + web_connector = create_connector( + name="Example Web Connector", + source="web", + input_type="load_state", + connector_specific_config={ + "base_url": "https://example.com", + "web_connector_type": "recursive", + }, + ) + print(f"Created Web Connector: {web_connector}") + + # Create a credential for the Web connector + web_credential = create_credential( + name="Example Web Credential", + source="web", + credential_json={}, # Web connectors typically don't need credentials + is_public=True, + ) + print(f"Created Web Credential: {web_credential}") + + # Create CC pair for Web connector + web_cc_pair = create_cc_pair( + connector_id=web_connector["id"], + credential_id=web_credential["id"], + name="Example Web CC Pair", + access_type="public", + ) + print(f"Created Web CC Pair: {web_cc_pair}") + + # Create a GitHub connector + github_connector = create_connector( + name="Example GitHub Connector", + source="github", + input_type="poll", + connector_specific_config={ + "repo_owner": "example-owner", + "repo_name": "example-repo", + "include_prs": True, + "include_issues": True, + }, + ) + print(f"Created GitHub Connector: {github_connector}") + + # Create a credential for the GitHub connector + github_credential = create_credential( + name="Example GitHub Credential", + source="github", + credential_json={"github_access_token": "your_github_access_token_here"}, + is_public=True, + ) + print(f"Created GitHub Credential: {github_credential}") + + # Create CC pair for GitHub connector + github_cc_pair = create_cc_pair( + connector_id=github_connector["id"], + credential_id=github_credential["id"], + name="Example GitHub CC Pair", + access_type="public", + ) + print(f"Created GitHub CC Pair: {github_cc_pair}") + + +if __name__ == "__main__": + main() diff --git a/backend/scripts/chat_feedback_dump.py b/backend/scripts/chat_feedback_dump.py new file mode 100644 index 00000000000..f0d6d3cbb37 --- /dev/null +++ b/backend/scripts/chat_feedback_dump.py @@ -0,0 +1,239 @@ +# This file is used to demonstrate how to use the backend APIs directly +# to query out feedback for all messages +import argparse +import logging +from logging import getLogger +from typing import Any +from uuid import UUID + +import requests + +from danswer.server.manage.models import AllUsersResponse +from danswer.server.query_and_chat.models import ChatSessionsResponse +from ee.danswer.server.query_history.api import ChatSessionSnapshot + +# Configure the logger +logging.basicConfig( + level=logging.INFO, # Set the log level (DEBUG, INFO, WARNING, ERROR, CRITICAL) + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", # Log format + handlers=[logging.StreamHandler()], # Output logs to console +) + +logger = getLogger(__name__) + +# uncomment the following pydantic models if you need the script to be independent +# from pydantic import BaseModel +# from datetime import datetime +# from enum import Enum + +# class UserRole(str, Enum): +# """ +# User roles +# - Basic can't perform any admin actions +# - Admin can perform all admin actions +# - Curator can perform admin actions for +# groups they are curators of +# - Global Curator can perform admin actions +# for all groups they are a member of +# """ + +# BASIC = "basic" +# ADMIN = "admin" +# CURATOR = "curator" +# GLOBAL_CURATOR = "global_curator" + + +# class UserStatus(str, Enum): +# LIVE = "live" +# INVITED = "invited" +# DEACTIVATED = "deactivated" + + +# class FullUserSnapshot(BaseModel): +# id: UUID +# email: str +# role: UserRole +# status: UserStatus + + +# class InvitedUserSnapshot(BaseModel): +# email: str + + +# class AllUsersResponse(BaseModel): +# accepted: list[FullUserSnapshot] +# invited: list[InvitedUserSnapshot] +# accepted_pages: int +# invited_pages: int + + +# class ChatSessionSharedStatus(str, Enum): +# PUBLIC = "public" +# PRIVATE = "private" + + +# class ChatSessionDetails(BaseModel): +# id: UUID +# name: str +# persona_id: int | None = None +# time_created: str +# shared_status: ChatSessionSharedStatus +# folder_id: int | None = None +# current_alternate_model: str | None = None + + +# class ChatSessionsResponse(BaseModel): +# sessions: list[ChatSessionDetails] + + +# class SessionType(str, Enum): +# CHAT = "Chat" +# SEARCH = "Search" +# SLACK = "Slack" + + +# class AbridgedSearchDoc(BaseModel): +# """A subset of the info present in `SearchDoc`""" + +# document_id: str +# semantic_identifier: str +# link: str | None + + +# class QAFeedbackType(str, Enum): +# LIKE = "like" # User likes the answer, used for metrics +# DISLIKE = "dislike" # User dislikes the answer, used for metrics + + +# class MessageType(str, Enum): +# # Using OpenAI standards, Langchain equivalent shown in comment +# # System message is always constructed on the fly, not saved +# SYSTEM = "system" # SystemMessage +# USER = "user" # HumanMessage +# ASSISTANT = "assistant" # AIMessage + + +# class MessageSnapshot(BaseModel): +# message: str +# message_type: MessageType +# documents: list[AbridgedSearchDoc] +# feedback_type: QAFeedbackType | None +# feedback_text: str | None +# time_created: datetime + + +# class ChatSessionSnapshot(BaseModel): +# id: UUID +# user_email: str +# name: str | None +# messages: list[MessageSnapshot] +# persona_name: str | None +# time_created: datetime +# flow_type: SessionType + + +def create_new_chat_session(danswer_url: str, api_key: str | None) -> int: + headers = {"Authorization": f"Bearer {api_key}"} if api_key else None + session_endpoint = danswer_url + "/api/chat/create-chat-session" + + response = requests.get(session_endpoint, headers=headers) + response.raise_for_status() + + new_session_id = response.json()["chat_session_id"] + return new_session_id + + +def manage_users(danswer_url: str, headers: dict[str, str] | None) -> AllUsersResponse: + endpoint = danswer_url + "/manage/users" + + response = requests.get( + endpoint, + headers=headers, + ) + response.raise_for_status() + + all_users = AllUsersResponse(**response.json()) + return all_users + + +def get_chat_sessions( + danswer_url: str, headers: dict[str, str] | None, user_id: UUID +) -> ChatSessionsResponse: + endpoint = danswer_url + "/admin/chat-sessions" + + params: dict[str, Any] = {"user_id": user_id} + response = requests.get( + endpoint, + params=params, + headers=headers, + ) + response.raise_for_status() + + sessions = ChatSessionsResponse(**response.json()) + return sessions + + +def get_session_history( + danswer_url: str, headers: dict[str, str] | None, session_id: UUID +) -> ChatSessionSnapshot: + endpoint = danswer_url + f"/admin/chat-session-history/{session_id}" + + response = requests.get( + endpoint, + headers=headers, + ) + response.raise_for_status() + + sessions = ChatSessionSnapshot(**response.json()) + return sessions + + +def process_all_chat_feedback(danswer_url: str, api_key: str | None) -> None: + headers = {"Authorization": f"Bearer {api_key}"} if api_key else None + + all_users = manage_users(danswer_url, headers) + if not all_users: + raise RuntimeError("manage_users returned None") + + logger.info(f"Accepted users: {len(all_users.accepted)}") + + user_ids: list[UUID] = [user.id for user in all_users.accepted] + + for user_id in user_ids: + r_sessions = get_chat_sessions(danswer_url, headers, user_id) + logger.info(f"user={user_id} num_sessions={len(r_sessions.sessions)}") + for session in r_sessions.sessions: + try: + s = get_session_history(danswer_url, headers, session.id) + except requests.exceptions.HTTPError: + logger.exception("get_session_history failed.") + + for m in s.messages: + logger.info( + f"user={user_id} " + f"session={session.id} " + f"message={m.message} " + f"feedback_type={m.feedback_type} " + f"feedback_text={m.feedback_text}" + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Sample API Usage - Chat Feedback") + parser.add_argument( + "--url", + type=str, + default="http://localhost:8080", + help="Danswer URL, should point to Danswer nginx.", + ) + + # Not needed if Auth is disabled? + # Or for Danswer MIT Edition API key must be replaced with session cookie + parser.add_argument( + "--api-key", + type=str, + help="Danswer Admin Level API key", + ) + + args = parser.parse_args() + process_all_chat_feedback(danswer_url=args.url, api_key=args.api_key) diff --git a/backend/scripts/dev_run_background_jobs.py b/backend/scripts/dev_run_background_jobs.py index a4a253a10df..1ca823e0935 100644 --- a/backend/scripts/dev_run_background_jobs.py +++ b/backend/scripts/dev_run_background_jobs.py @@ -1,5 +1,3 @@ -import argparse -import os import subprocess import threading @@ -17,18 +15,18 @@ def monitor_process(process_name: str, process: subprocess.Popen) -> None: break -def run_jobs(exclude_indexing: bool) -> None: +def run_jobs() -> None: # command setup cmd_worker_primary = [ "celery", "-A", - "ee.danswer.background.celery.celery_app", + "danswer.background.celery.versioned_apps.primary", "worker", "--pool=threads", "--concurrency=6", + "--prefetch-multiplier=1", "--loglevel=INFO", - "-n", - "primary@%n", + "--hostname=primary@%n", "-Q", "celery", ] @@ -36,13 +34,13 @@ def run_jobs(exclude_indexing: bool) -> None: cmd_worker_light = [ "celery", "-A", - "ee.danswer.background.celery.celery_app", + "danswer.background.celery.versioned_apps.light", "worker", "--pool=threads", "--concurrency=16", + "--prefetch-multiplier=8", "--loglevel=INFO", - "-n", - "light@%n", + "--hostname=light@%n", "-Q", "vespa_metadata_sync,connector_deletion", ] @@ -50,21 +48,34 @@ def run_jobs(exclude_indexing: bool) -> None: cmd_worker_heavy = [ "celery", "-A", - "ee.danswer.background.celery.celery_app", + "danswer.background.celery.versioned_apps.heavy", "worker", "--pool=threads", "--concurrency=6", + "--prefetch-multiplier=1", "--loglevel=INFO", - "-n", - "heavy@%n", + "--hostname=heavy@%n", "-Q", "connector_pruning", ] + cmd_worker_indexing = [ + "celery", + "-A", + "danswer.background.celery.versioned_apps.indexing", + "worker", + "--pool=threads", + "--concurrency=1", + "--prefetch-multiplier=1", + "--loglevel=INFO", + "--hostname=indexing@%n", + "--queues=connector_indexing", + ] + cmd_beat = [ "celery", "-A", - "ee.danswer.background.celery.celery_app", + "danswer.background.celery.versioned_apps.beat", "beat", "--loglevel=INFO", ] @@ -82,6 +93,10 @@ def run_jobs(exclude_indexing: bool) -> None: cmd_worker_heavy, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True ) + worker_indexing_process = subprocess.Popen( + cmd_worker_indexing, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True + ) + beat_process = subprocess.Popen( cmd_beat, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True ) @@ -96,64 +111,23 @@ def run_jobs(exclude_indexing: bool) -> None: worker_heavy_thread = threading.Thread( target=monitor_process, args=("HEAVY", worker_heavy_process) ) + worker_indexing_thread = threading.Thread( + target=monitor_process, args=("INDEX", worker_indexing_process) + ) beat_thread = threading.Thread(target=monitor_process, args=("BEAT", beat_process)) worker_primary_thread.start() worker_light_thread.start() worker_heavy_thread.start() + worker_indexing_thread.start() beat_thread.start() - if not exclude_indexing: - update_env = os.environ.copy() - update_env["PYTHONPATH"] = "." - cmd_indexing = ["python", "danswer/background/update.py"] - - indexing_process = subprocess.Popen( - cmd_indexing, - env=update_env, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, - ) - - indexing_thread = threading.Thread( - target=monitor_process, args=("INDEXING", indexing_process) - ) - - indexing_thread.start() - indexing_thread.join() - try: - update_env = os.environ.copy() - update_env["PYTHONPATH"] = "." - cmd_perm_sync = ["python", "ee/danswer/background/permission_sync.py"] - - indexing_process = subprocess.Popen( - cmd_perm_sync, - env=update_env, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, - ) - - perm_sync_thread = threading.Thread( - target=monitor_process, args=("INDEXING", indexing_process) - ) - perm_sync_thread.start() - perm_sync_thread.join() - except Exception: - pass - worker_primary_thread.join() worker_light_thread.join() worker_heavy_thread.join() + worker_indexing_thread.join() beat_thread.join() if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Run background jobs.") - parser.add_argument( - "--no-indexing", action="store_true", help="Do not run indexing process" - ) - args = parser.parse_args() - - run_jobs(args.no_indexing) + run_jobs() diff --git a/backend/scripts/document_seeding_prep.py b/backend/scripts/document_seeding_prep.py new file mode 100644 index 00000000000..d853b68ccb4 --- /dev/null +++ b/backend/scripts/document_seeding_prep.py @@ -0,0 +1,240 @@ +# This script preps the documents used for initially seeding the index. It handles the embedding so that the +# documents can be added to the index with minimal processing. +import json + +from pydantic import BaseModel +from sentence_transformers import SentenceTransformer # type: ignore + + +class SeedPresaveDocument(BaseModel): + url: str + title: str + content: str + title_embedding: list[float] + content_embedding: list[float] + chunk_ind: int = 0 + + +# Be sure to use the default embedding model +model = SentenceTransformer("nomic-ai/nomic-embed-text-v1", trust_remote_code=True) +tokenizer = model.tokenizer + +# This is easier than cleaning up the crawl, needs to be updated if the sites are changed +overview_title = "Use Cases Overview" +overview = ( + "How to leverage Danswer in your organization\n\n" + "Danswer Overview\n" + "Danswer is the AI Assistant connected to your organization's docs, apps, and people. " + "Danswer makes Generative AI more versatile for work by enabling new types of questions like " + '"What is the most common feature request we\'ve heard from customers this month". ' + "Whereas other AI systems have no context of your team and are generally unhelpful with work related questions, " + "Danswer makes it possible to ask these questions in natural language and get back answers in seconds.\n\n" + "Danswer can connect to +30 different tools and the use cases are not limited to the ones in the following pages. " + "The highlighted use cases are for inspiration and come from feedback gathered from our users and customers.\n\n\n" + "Common Getting Started Questions:\n\n" + "Why are these docs connected in my Danswer deployment?\n" + "Answer: This is just an example of how connectors work in Danswer. You can connect up your own team's knowledge " + "and you will be able to ask questions unique to your organization. Danswer will keep all of the knowledge up to date " + "and in sync with your connected applications.\n\n" + "Is my data being sent anywhere when I connect it up to Danswer?\n" + "Answer: No! Danswer is built with data security as our highest priority. We open sourced it so our users can know " + "exactly what is going on with their data. By default all of the document processing happens within Danswer. " + "The only time it is sent outward is for the GenAI call to generate answers.\n\n" + "Where is the feature for auto sync-ing document level access permissions from all connected sources?\n" + "Answer: This falls under the Enterprise Edition set of Danswer features built on top of the MIT/community edition. " + "If you are on Danswer Cloud, you have access to them by default. If you're running it yourself, reach out to the " + "Danswer team to receive access." +) + +enterprise_search_title = "Enterprise Search" +enterprise_search_1 = ( + "Value of Enterprise Search with Danswer\n\n" + "What is Enterprise Search and why is it Important?\n" + "An Enterprise Search system gives team members a single place to access all of the disparate knowledge " + "of an organization. Critical information is saved across a host of channels like call transcripts with " + "prospects, engineering design docs, IT runbooks, customer support email exchanges, project management " + "tickets, and more. As fast moving teams scale up, information gets spread out and more disorganized.\n\n" + "Since it quickly becomes infeasible to check across every source, decisions get made on incomplete " + "information, employee satisfaction decreases, and the most valuable members of your team are tied up " + "with constant distractions as junior teammates are unable to unblock themselves. Danswer solves this " + "problem by letting anyone on the team access all of the knowledge across your organization in a " + "permissioned and secure way. Users can ask questions in natural language and get back answers and " + "documents across all of the connected sources instantly.\n\n" + "What's the real cost?\n" + "A typical knowledge worker spends over 2 hours a week on search, but more than that, the cost of " + "incomplete or incorrect information can be extremely high. Customer support/success that isn't able " + "to find the reference to similar cases could cause hours or even days of delay leading to lower " + "customer satisfaction or in the worst case - churn. An account exec not realizing that a prospect had " + "previously mentioned a specific need could lead to lost deals. An engineer not realizing a similar " + "feature had previously been built could result in weeks of wasted development time and tech debt with " + "duplicate implementation. With a lack of knowledge, your whole organization is navigating in the dark " + "- inefficient and mistake prone." +) + +enterprise_search_2 = ( + "More than Search\n" + "When analyzing the entire corpus of knowledge within your company is as easy as asking a question " + "in a search bar, your entire team can stay informed and up to date. Danswer also makes it trivial " + "to identify where knowledge is well documented and where it is lacking. Team members who are centers " + "of knowledge can begin to effectively document their expertise since it is no longer being thrown into " + "a black hole. All of this allows the organization to achieve higher efficiency and drive business outcomes.\n\n" + "With Generative AI, the entire user experience has evolved as well. For example, instead of just finding similar " + "cases for your customer support team to reference, Danswer breaks down the issue and explains it so that even " + "the most junior members can understand it. This in turn lets them give the most holistic and technically accurate " + "response possible to your customers. On the other end, even the super stars of your sales team will not be able " + "to review 10 hours of transcripts before hopping on that critical call, but Danswer can easily parse through it " + "in mere seconds and give crucial context to help your team close." +) + +ai_platform_title = "AI Platform" +ai_platform = ( + "Build AI Agents powered by the knowledge and workflows specific to your organization.\n\n" + "Beyond Answers\n" + "Agents enabled by generative AI and reasoning capable models are helping teams to automate their work. " + "Danswer is helping teams make it happen. Danswer provides out of the box user chat sessions, attaching custom tools, " + "handling LLM reasoning, code execution, data analysis, referencing internal knowledge, and much more.\n\n" + "Danswer as a platform is not a no-code agent builder. We are made by developers for developers and this gives your " + "team the full flexibility and power to create agents not constrained by blocks and simple logic paths.\n\n" + "Flexibility and Extensibility\n" + "Danswer is open source and completely whitebox. This not only gives transparency to what happens within the system " + "but also means that your team can directly modify the source code to suit your unique needs." +) + +customer_support_title = "Customer Support" +customer_support = ( + "Help your customer support team instantly answer any question across your entire product.\n\n" + "AI Enabled Support\n" + "Customer support agents have one of the highest breadth jobs. They field requests that cover the entire surface " + "area of the product and need to help your users find success on extremely short timelines. " + "Because they're not the same people who designed or built the system, they often lack the depth of understanding " + "needed - resulting in delays and escalations to other teams. Modern teams are leveraging AI to help their CS team " + "optimize the speed and quality of these critical customer-facing interactions.\n\n" + "The Importance of Context\n" + "There are two critical components of AI copilots for customer support. The first is that the AI system needs to be " + "connected with as much information as possible (not just support tools like Zendesk or Intercom) and that the " + "knowledge needs to be as fresh as possible. Sometimes a fix might even be in places rarely checked by CS such as " + "pull requests in a code repository. The second critical component is the ability of the AI system to break down " + "difficult concepts and convoluted processes into more digestible descriptions and for your team members to be able " + "to chat back and forth with the system to build a better understanding.\n\n" + "Danswer takes care of both of these. The system connects up to over 30+ different applications and the knowledge is " + "pulled in constantly so that the information access is always up to date." +) + +sales_title = "Sales" +sales = ( + "Keep your team up to date on every conversation and update so they can close.\n\n" + "Recall Every Detail\n" + "Being able to instantly revisit every detail of any call without reading transcripts is helping Sales teams provide " + "more tailored pitches, build stronger relationships, and close more deals. Instead of searching and reading through " + 'hours of transcripts in preparation for a call, your team can now ask Danswer "What specific features was ACME ' + "interested in seeing for the demo\". Since your team doesn't have time to read every transcript prior to a call, " + "Danswer provides a more thorough summary because it can instantly parse hundreds of pages and distill out the relevant " + "information. Even for fast lookups it becomes much more convenient - for example to brush up on connection building " + 'topics by asking "What rapport building topic did we chat about in the last call with ACME".\n\n' + "Know Every Product Update\n" + "It is impossible for Sales teams to keep up with every product update. Because of this, when a prospect has a question " + "that the Sales team does not know, they have no choice but to rely on the Product and Engineering orgs to get an " + "authoritative answer. Not only is this distracting to the other teams, it also slows down the time to respond to the " + "prospect (and as we know, time is the biggest killer of deals). With Danswer, it is even possible to get answers live " + 'on call because of how fast accessing information becomes. A question like "Have we shipped the Microsoft AD ' + 'integration yet?" can now be answered in seconds meaning that prospects can get answers while on the call instead of ' + "asynchronously and sales cycles are reduced as a result." +) + +operations_title = "Operations" +operations = ( + "Double the productivity of your Ops teams like IT, HR, etc.\n\n" + "Automatically Resolve Tickets\n" + "Modern teams are leveraging AI to auto-resolve up to 50% of tickets. Whether it is an employee asking about benefits " + "details or how to set up the VPN for remote work, Danswer can help your team help themselves. This frees up your team to " + "do the real impactful work of landing star candidates or improving your internal processes.\n\n" + "AI Aided Onboarding\n" + "One of the periods where your team needs the most help is when they're just ramping up. Instead of feeling lost in dozens " + "of new tools, Danswer gives them a single place where they can ask about anything in natural language. Whether it's how to " + "set up their work environment or what their onboarding goals are, Danswer can walk them through every step with the help " + "of Generative AI. This lets your team feel more empowered and gives time back to the more seasoned members of your team to " + "focus on moving the needle." +) + +# For simplicity, we're not adding any metadata suffix here. Generally there is none for the Web connector anyway +overview_doc = SeedPresaveDocument( + url="https://docs.danswer.dev/more/use_cases/overview", + title=overview_title, + content=overview, + title_embedding=model.encode(f"search_document: {overview_title}"), + content_embedding=model.encode(f"search_document: {overview_title}\n{overview}"), +) + +enterprise_search_doc = SeedPresaveDocument( + url="https://docs.danswer.dev/more/use_cases/enterprise_search", + title=enterprise_search_title, + content=enterprise_search_1, + title_embedding=model.encode(f"search_document: {enterprise_search_title}"), + content_embedding=model.encode( + f"search_document: {enterprise_search_title}\n{enterprise_search_1}" + ), +) + +enterprise_search_doc_2 = SeedPresaveDocument( + url="https://docs.danswer.dev/more/use_cases/enterprise_search", + title=enterprise_search_title, + content=enterprise_search_2, + title_embedding=model.encode(f"search_document: {enterprise_search_title}"), + content_embedding=model.encode( + f"search_document: {enterprise_search_title}\n{enterprise_search_2}" + ), + chunk_ind=1, +) + +ai_platform_doc = SeedPresaveDocument( + url="https://docs.danswer.dev/more/use_cases/ai_platform", + title=ai_platform_title, + content=ai_platform, + title_embedding=model.encode(f"search_document: {ai_platform_title}"), + content_embedding=model.encode( + f"search_document: {ai_platform_title}\n{ai_platform}" + ), +) + +customer_support_doc = SeedPresaveDocument( + url="https://docs.danswer.dev/more/use_cases/customer_support", + title=customer_support_title, + content=customer_support, + title_embedding=model.encode(f"search_document: {customer_support_title}"), + content_embedding=model.encode( + f"search_document: {customer_support_title}\n{customer_support}" + ), +) + +sales_doc = SeedPresaveDocument( + url="https://docs.danswer.dev/more/use_cases/sales", + title=sales_title, + content=sales, + title_embedding=model.encode(f"search_document: {sales_title}"), + content_embedding=model.encode(f"search_document: {sales_title}\n{sales}"), +) + +operations_doc = SeedPresaveDocument( + url="https://docs.danswer.dev/more/use_cases/operations", + title=operations_title, + content=operations, + title_embedding=model.encode(f"search_document: {operations_title}"), + content_embedding=model.encode( + f"search_document: {operations_title}\n{operations}" + ), +) + +documents = [ + overview_doc, + enterprise_search_doc, + enterprise_search_doc_2, + ai_platform_doc, + customer_support_doc, + sales_doc, + operations_doc, +] + +documents_dict = [doc.model_dump() for doc in documents] + +with open("./backend/danswer/seeding/initial_docs.json", "w") as json_file: + json.dump(documents_dict, json_file, indent=4) diff --git a/backend/scripts/force_delete_connector_by_id.py b/backend/scripts/force_delete_connector_by_id.py index 0a9857304c8..241242f4a23 100755 --- a/backend/scripts/force_delete_connector_by_id.py +++ b/backend/scripts/force_delete_connector_by_id.py @@ -206,6 +206,8 @@ def _delete_connector(cc_pair_id: int, db_session: Session) -> None: logger.notice(f"Deleting file {file_name}") file_store.delete_file(file_name) + db_session.commit() + if __name__ == "__main__": parser = argparse.ArgumentParser(description="Delete a connector by its ID") diff --git a/backend/scripts/query_time_check/seed_dummy_docs.py b/backend/scripts/query_time_check/seed_dummy_docs.py index 96b6b4a0133..e7aa65fba76 100644 --- a/backend/scripts/query_time_check/seed_dummy_docs.py +++ b/backend/scripts/query_time_check/seed_dummy_docs.py @@ -21,6 +21,7 @@ from danswer.indexing.models import DocMetadataAwareIndexChunk from danswer.indexing.models import IndexChunk from danswer.utils.timing import log_function_time +from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA from shared_configs.model_server_models import Embedding @@ -94,6 +95,7 @@ def generate_dummy_chunk( ), document_sets={document_set for document_set in document_set_names}, boost=random.randint(-1, 1), + tenant_id=POSTGRES_DEFAULT_SCHEMA, ) diff --git a/backend/scripts/reset_indexes.py b/backend/scripts/reset_indexes.py index 4ec8d9bf312..1411a082499 100644 --- a/backend/scripts/reset_indexes.py +++ b/backend/scripts/reset_indexes.py @@ -1,8 +1,10 @@ # This file is purely for development use, not included in any builds import os import sys +from time import sleep import requests +from requests.exceptions import RequestException # makes it so `PYTHONPATH=.` is not required when running this script parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) @@ -15,22 +17,58 @@ logger = setup_logger() -def wipe_vespa_index() -> None: +def wipe_vespa_index() -> bool: + """ + Wipes the Vespa index by deleting all documents. + """ continuation = None should_continue = True + RETRIES = 3 + while should_continue: params = {"selection": "true", "cluster": DOCUMENT_INDEX_NAME} if continuation: - params = {**params, "continuation": continuation} - response = requests.delete(DOCUMENT_ID_ENDPOINT, params=params) - response.raise_for_status() + params["continuation"] = continuation + + for attempt in range(RETRIES): + try: + response = requests.delete(DOCUMENT_ID_ENDPOINT, params=params) + response.raise_for_status() + + response_json = response.json() + logger.info(f"Response: {response_json}") + + continuation = response_json.get("continuation") + should_continue = bool(continuation) + break # Exit the retry loop if the request is successful + + except RequestException: + logger.exception("Request failed") + sleep(2**attempt) # Exponential backoff + else: + logger.error(f"Max retries ({RETRIES}) exceeded. Exiting.") + return False + + return True + + +def main() -> int: + """ + Main function to execute the script. + """ + try: + succeeded = wipe_vespa_index() + except Exception: + logger.exception("wipe_vespa_index exceptioned.") + return 1 - response_json = response.json() - print(response_json) + if not succeeded: + logger.info("Vespa index wipe failed.") + return 0 - continuation = response_json.get("continuation") - should_continue = bool(continuation) + logger.info("Vespa index wiped successfully.") + return 1 if __name__ == "__main__": - wipe_vespa_index() + sys.exit(main()) diff --git a/backend/shared_configs/configs.py b/backend/shared_configs/configs.py index ea37b031c7a..d4378251aa5 100644 --- a/backend/shared_configs/configs.py +++ b/backend/shared_configs/configs.py @@ -1,6 +1,9 @@ import os +from typing import List from urllib.parse import urlparse +from shared_configs.model_server_models import SupportedEmbeddingModel + # Used for logging SLACK_CHANNEL_ID = "channel_id" @@ -59,6 +62,24 @@ # notset, debug, info, notice, warning, error, or critical LOG_LEVEL = os.environ.get("LOG_LEVEL", "notice") +# Timeout for API-based embedding models +# NOTE: does not apply for Google VertexAI, since the python client doesn't +# allow us to specify a custom timeout +API_BASED_EMBEDDING_TIMEOUT = int(os.environ.get("API_BASED_EMBEDDING_TIMEOUT", "600")) + +# Only used for OpenAI +OPENAI_EMBEDDING_TIMEOUT = int( + os.environ.get("OPENAI_EMBEDDING_TIMEOUT", API_BASED_EMBEDDING_TIMEOUT) +) + +# Whether or not to strictly enforce token limit for chunking. +STRICT_CHUNK_TOKEN_LIMIT = ( + os.environ.get("STRICT_CHUNK_TOKEN_LIMIT", "").lower() == "true" +) + +# Set up Sentry integration (for error logging) +SENTRY_DSN = os.environ.get("SENTRY_DSN") + # Fields which should only be set on new search setting PRESERVED_SEARCH_FIELDS = [ @@ -76,16 +97,111 @@ ] -# CORS def validate_cors_origin(origin: str) -> None: parsed = urlparse(origin) if parsed.scheme not in ["http", "https"] or not parsed.netloc: raise ValueError(f"Invalid CORS origin: '{origin}'") -CORS_ALLOWED_ORIGIN = os.environ.get("CORS_ALLOWED_ORIGIN", "*").split(",") or ["*"] - -# Validate non-wildcard origins -for origin in CORS_ALLOWED_ORIGIN: - if origin != "*" and (stripped_origin := origin.strip()): - validate_cors_origin(stripped_origin) +# Examples of valid values for the environment variable: +# - "" (allow all origins) +# - "http://example.com" (single origin) +# - "http://example.com,https://example.org" (multiple origins) +# - "*" (allow all origins) +CORS_ALLOWED_ORIGIN_ENV = os.environ.get("CORS_ALLOWED_ORIGIN", "") + +# Explicitly declare the type of CORS_ALLOWED_ORIGIN +CORS_ALLOWED_ORIGIN: List[str] + +if CORS_ALLOWED_ORIGIN_ENV: + # Split the environment variable into a list of origins + CORS_ALLOWED_ORIGIN = [ + origin.strip() + for origin in CORS_ALLOWED_ORIGIN_ENV.split(",") + if origin.strip() + ] + # Validate each origin in the list + for origin in CORS_ALLOWED_ORIGIN: + validate_cors_origin(origin) +else: + # If the environment variable is empty, allow all origins + CORS_ALLOWED_ORIGIN = ["*"] + + +# Multi-tenancy configuration +MULTI_TENANT = os.environ.get("MULTI_TENANT", "").lower() == "true" + +POSTGRES_DEFAULT_SCHEMA = os.environ.get("POSTGRES_DEFAULT_SCHEMA") or "public" + +# Prefix used for all tenant ids +TENANT_ID_PREFIX = "tenant_" + +SUPPORTED_EMBEDDING_MODELS = [ + # Cloud-based models + SupportedEmbeddingModel( + name="cohere/embed-english-v3.0", + dim=1024, + index_name="danswer_chunk_cohere_embed_english_v3_0", + ), + SupportedEmbeddingModel( + name="cohere/embed-english-light-v3.0", + dim=384, + index_name="danswer_chunk_cohere_embed_english_light_v3_0", + ), + SupportedEmbeddingModel( + name="openai/text-embedding-3-large", + dim=3072, + index_name="danswer_chunk_openai_text_embedding_3_large", + ), + SupportedEmbeddingModel( + name="openai/text-embedding-3-small", + dim=1536, + index_name="danswer_chunk_openai_text_embedding_3_small", + ), + SupportedEmbeddingModel( + name="google/text-embedding-004", + dim=768, + index_name="danswer_chunk_google_text_embedding_004", + ), + SupportedEmbeddingModel( + name="google/textembedding-gecko@003", + dim=768, + index_name="danswer_chunk_google_textembedding_gecko_003", + ), + SupportedEmbeddingModel( + name="voyage/voyage-large-2-instruct", + dim=1024, + index_name="danswer_chunk_voyage_large_2_instruct", + ), + SupportedEmbeddingModel( + name="voyage/voyage-light-2-instruct", + dim=384, + index_name="danswer_chunk_voyage_light_2_instruct", + ), + # Self-hosted models + SupportedEmbeddingModel( + name="nomic-ai/nomic-embed-text-v1", + dim=768, + index_name="danswer_chunk_nomic_ai_nomic_embed_text_v1", + ), + SupportedEmbeddingModel( + name="intfloat/e5-base-v2", + dim=768, + index_name="danswer_chunk_intfloat_e5_base_v2", + ), + SupportedEmbeddingModel( + name="intfloat/e5-small-v2", + dim=384, + index_name="danswer_chunk_intfloat_e5_small_v2", + ), + SupportedEmbeddingModel( + name="intfloat/multilingual-e5-base", + dim=768, + index_name="danswer_chunk_intfloat_multilingual_e5_base", + ), + SupportedEmbeddingModel( + name="intfloat/multilingual-e5-small", + dim=384, + index_name="danswer_chunk_intfloat_multilingual_e5_small", + ), +] diff --git a/backend/shared_configs/contextvars.py b/backend/shared_configs/contextvars.py new file mode 100644 index 00000000000..df66b141c6e --- /dev/null +++ b/backend/shared_configs/contextvars.py @@ -0,0 +1,8 @@ +import contextvars + +from shared_configs.configs import POSTGRES_DEFAULT_SCHEMA + +# Context variable for the current tenant id +CURRENT_TENANT_ID_CONTEXTVAR = contextvars.ContextVar( + "current_tenant_id", default=POSTGRES_DEFAULT_SCHEMA +) diff --git a/backend/shared_configs/enums.py b/backend/shared_configs/enums.py index b58ac0a8928..3fe1cd0bd01 100644 --- a/backend/shared_configs/enums.py +++ b/backend/shared_configs/enums.py @@ -7,6 +7,7 @@ class EmbeddingProvider(str, Enum): VOYAGE = "voyage" GOOGLE = "google" LITELLM = "litellm" + AZURE = "azure" class RerankerProvider(str, Enum): diff --git a/backend/shared_configs/model_server_models.py b/backend/shared_configs/model_server_models.py index dd846ed6bad..9f7e853d26a 100644 --- a/backend/shared_configs/model_server_models.py +++ b/backend/shared_configs/model_server_models.py @@ -20,6 +20,7 @@ class EmbedRequest(BaseModel): texts: list[str] # Can be none for cloud embedding model requests, error handling logic exists for other cases model_name: str | None = None + deployment_name: str | None = None max_context_length: int normalize_embeddings: bool api_key: str | None = None @@ -28,7 +29,7 @@ class EmbedRequest(BaseModel): manual_query_prefix: str | None = None manual_passage_prefix: str | None = None api_url: str | None = None - + api_version: str | None = None # This disables the "model_" protected namespace for pydantic model_config = {"protected_namespaces": ()} @@ -64,3 +65,9 @@ class IntentRequest(BaseModel): class IntentResponse(BaseModel): is_keyword: bool keywords: list[str] + + +class SupportedEmbeddingModel(BaseModel): + name: str + dim: int + index_name: str diff --git a/backend/supervisord.conf b/backend/supervisord.conf index 3dc2edcc6a5..bb7b2a8ba27 100644 --- a/backend/supervisord.conf +++ b/backend/supervisord.conf @@ -3,35 +3,21 @@ nodaemon=true user=root logfile=/var/log/supervisord.log -# Indexing is the heaviest job, also requires some CPU intensive steps -# Cannot place this in Celery for now because Celery must run as a single process (see note below) -# Indexing uses multi-processing to speed things up -[program:document_indexing] -environment=CURRENT_PROCESS_IS_AN_INDEXING_JOB=true -command=python danswer/background/update.py -stdout_logfile=/var/log/document_indexing.log -stdout_logfile_maxbytes=16MB -redirect_stderr=true -autorestart=true - # Background jobs that must be run async due to long time to completion -# NOTE: due to an issue with Celery + SQLAlchemy +# NOTE: due to an issue with Celery + SQLAlchemy # (https://github.com/celery/celery/issues/7007#issuecomment-1740139367) # we must use the threads pool instead of the default prefork pool for now # in order to avoid intermittent errors like: -# `billiard.exceptions.WorkerLostError: Worker exited prematurely: signal 11 (SIGSEGV)`. +# `billiard.exceptions.WorkerLostError: Worker exited prematurely: signal 11 (SIGSEGV)`. # -# This means workers will not be able take advantage of multiple CPU cores +# This means workers will not be able take advantage of multiple CPU cores # on a system, but this should be okay for now since all our celery tasks are -# relatively compute-light (e.g. they tend to just make a bunch of requests to +# relatively compute-light (e.g. they tend to just make a bunch of requests to # Vespa / Postgres) [program:celery_worker_primary] -command=celery -A danswer.background.celery.celery_run:celery_app worker - --pool=threads - --concurrency=4 - --prefetch-multiplier=1 - --loglevel=INFO - --hostname=primary@%%n +command=celery -A danswer.background.celery.versioned_apps.primary worker + --loglevel=INFO + --hostname=primary@%%n -Q celery stdout_logfile=/var/log/celery_worker_primary.log stdout_logfile_maxbytes=16MB @@ -40,14 +26,14 @@ autorestart=true startsecs=10 stopasgroup=true +# NOTE: only allowing configuration here and not in the other celery workers, +# since this is often the bottleneck for "sync" jobs (e.g. document set syncing, +# user group syncing, deletion, etc.) [program:celery_worker_light] -command=bash -c "celery -A danswer.background.celery.celery_run:celery_app worker \ - --pool=threads \ - --concurrency=${CELERY_WORKER_LIGHT_CONCURRENCY:-24} \ - --prefetch-multiplier=${CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER:-8} \ - --loglevel=INFO \ - --hostname=light@%%n \ - -Q vespa_metadata_sync,connector_deletion" +command=celery -A danswer.background.celery.versioned_apps.light worker + --loglevel=INFO + --hostname=light@%%n + -Q vespa_metadata_sync,connector_deletion stdout_logfile=/var/log/celery_worker_light.log stdout_logfile_maxbytes=16MB redirect_stderr=true @@ -56,11 +42,8 @@ startsecs=10 stopasgroup=true [program:celery_worker_heavy] -command=celery -A danswer.background.celery.celery_run:celery_app worker - --pool=threads - --concurrency=4 - --prefetch-multiplier=1 - --loglevel=INFO +command=celery -A danswer.background.celery.versioned_apps.heavy worker + --loglevel=INFO --hostname=heavy@%%n -Q connector_pruning stdout_logfile=/var/log/celery_worker_heavy.log @@ -70,9 +53,21 @@ autorestart=true startsecs=10 stopasgroup=true +[program:celery_worker_indexing] +command=celery -A danswer.background.celery.versioned_apps.indexing worker + --loglevel=INFO + --hostname=indexing@%%n + -Q connector_indexing +stdout_logfile=/var/log/celery_worker_indexing.log +stdout_logfile_maxbytes=16MB +redirect_stderr=true +autorestart=true +startsecs=10 +stopasgroup=true + # Job scheduler for periodic tasks [program:celery_beat] -command=celery -A danswer.background.celery.celery_run:celery_app beat +command=celery -A danswer.background.celery.versioned_apps.beat beat stdout_logfile=/var/log/celery_beat.log stdout_logfile_maxbytes=16MB redirect_stderr=true @@ -100,7 +95,7 @@ command=tail -qF /var/log/celery_worker_primary.log /var/log/celery_worker_light.log /var/log/celery_worker_heavy.log - /var/log/document_indexing.log + /var/log/celery_worker_indexing.log /var/log/slack_bot.log stdout_logfile=/dev/stdout stdout_logfile_maxbytes = 0 # must be set to 0 when stdout_logfile=/dev/stdout diff --git a/backend/tests/daily/conftest.py b/backend/tests/daily/conftest.py new file mode 100644 index 00000000000..88a74c7b4ce --- /dev/null +++ b/backend/tests/daily/conftest.py @@ -0,0 +1,24 @@ +import os +from collections.abc import Generator +from typing import Any + +import pytest +from fastapi.testclient import TestClient + +from danswer.main import fetch_versioned_implementation +from danswer.utils.logger import setup_logger + +logger = setup_logger() + + +@pytest.fixture(scope="function") +def client() -> Generator[TestClient, Any, None]: + # Set environment variables + os.environ["ENABLE_PAID_ENTERPRISE_EDITION_FEATURES"] = "True" + + # Initialize TestClient with the FastAPI app + app = fetch_versioned_implementation( + module="danswer.main", attribute="get_application" + )() + client = TestClient(app) + yield client diff --git a/backend/tests/daily/connectors/confluence/test_confluence_basic.py b/backend/tests/daily/connectors/confluence/test_confluence_basic.py index 4eb25207814..d4287298bdb 100644 --- a/backend/tests/daily/connectors/confluence/test_confluence_basic.py +++ b/backend/tests/daily/connectors/confluence/test_confluence_basic.py @@ -1,5 +1,7 @@ import os import time +from unittest.mock import MagicMock +from unittest.mock import patch import pytest @@ -24,25 +26,46 @@ def confluence_connector() -> ConfluenceConnector: return connector -def test_confluence_connector_basic(confluence_connector: ConfluenceConnector) -> None: +@patch( + "danswer.file_processing.extract_file_text.get_unstructured_api_key", + return_value=None, +) +def test_confluence_connector_basic( + mock_get_api_key: MagicMock, confluence_connector: ConfluenceConnector +) -> None: doc_batch_generator = confluence_connector.poll_source(0, time.time()) doc_batch = next(doc_batch_generator) with pytest.raises(StopIteration): next(doc_batch_generator) - assert len(doc_batch) == 1 + assert len(doc_batch) == 2 - doc = doc_batch[0] - assert doc.semantic_identifier == "DailyConnectorTestSpace Home" - assert doc.metadata["labels"] == ["testlabel"] - assert doc.primary_owners - assert doc.primary_owners[0].email == "chris@danswer.ai" - assert len(doc.sections) == 1 + for doc in doc_batch: + if doc.semantic_identifier == "DailyConnectorTestSpace Home": + page_doc = doc + elif ".txt" in doc.semantic_identifier: + txt_doc = doc - section = doc.sections[0] - assert section.text == "test123small" + assert page_doc.semantic_identifier == "DailyConnectorTestSpace Home" + assert page_doc.metadata["labels"] == ["testlabel"] + assert page_doc.primary_owners + assert page_doc.primary_owners[0].email == "chris@danswer.ai" + assert len(page_doc.sections) == 1 + + section = page_doc.sections[0] + assert section.text == "test123" assert ( section.link == "https://danswerai.atlassian.net/wiki/spaces/DailyConne/overview" ) + + assert txt_doc.semantic_identifier == "small-file.txt" + assert len(txt_doc.sections) == 1 + assert txt_doc.sections[0].text == "small" + assert txt_doc.primary_owners + assert txt_doc.primary_owners[0].email == "chris@danswer.ai" + assert ( + txt_doc.sections[0].link + == "https://danswerai.atlassian.net/wiki/pages/viewpageattachments.action?pageId=52494430&preview=%2F52494430%2F52527123%2Fsmall-file.txt" + ) diff --git a/backend/tests/daily/connectors/gmail/conftest.py b/backend/tests/daily/connectors/gmail/conftest.py new file mode 100644 index 00000000000..5010d0b5133 --- /dev/null +++ b/backend/tests/daily/connectors/gmail/conftest.py @@ -0,0 +1,89 @@ +import json +import os +from collections.abc import Callable + +import pytest + +from danswer.connectors.gmail.connector import GmailConnector +from danswer.connectors.google_utils.shared_constants import ( + DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY, +) +from danswer.connectors.google_utils.shared_constants import ( + DB_CREDENTIALS_DICT_TOKEN_KEY, +) +from danswer.connectors.google_utils.shared_constants import ( + DB_CREDENTIALS_PRIMARY_ADMIN_KEY, +) +from tests.load_env_vars import load_env_vars + + +# Load environment variables at the module level +load_env_vars() + + +def parse_credentials(env_str: str) -> dict: + """ + Parse a double-escaped JSON string from environment variables into a Python dictionary. + + Args: + env_str (str): The double-escaped JSON string from environment variables + + Returns: + dict: Parsed OAuth credentials + """ + # first try normally + try: + return json.loads(env_str) + except Exception: + # First, try remove extra escaping backslashes + unescaped = env_str.replace('\\"', '"') + + # remove leading / trailing quotes + unescaped = unescaped.strip('"') + + # Now parse the JSON + return json.loads(unescaped) + + +@pytest.fixture +def google_gmail_oauth_connector_factory() -> Callable[..., GmailConnector]: + def _connector_factory( + primary_admin_email: str = "admin@onyx-test.com", + ) -> GmailConnector: + print("Creating GmailConnector with OAuth credentials") + connector = GmailConnector() + + json_string = os.environ["GOOGLE_GMAIL_OAUTH_CREDENTIALS_JSON_STR"] + refried_json_string = json.dumps(parse_credentials(json_string)) + + credentials_json = { + DB_CREDENTIALS_DICT_TOKEN_KEY: refried_json_string, + DB_CREDENTIALS_PRIMARY_ADMIN_KEY: primary_admin_email, + } + connector.load_credentials(credentials_json) + return connector + + return _connector_factory + + +@pytest.fixture +def google_gmail_service_acct_connector_factory() -> Callable[..., GmailConnector]: + def _connector_factory( + primary_admin_email: str = "admin@onyx-test.com", + ) -> GmailConnector: + print("Creating GmailConnector with service account credentials") + connector = GmailConnector() + + json_string = os.environ["GOOGLE_GMAIL_SERVICE_ACCOUNT_JSON_STR"] + refried_json_string = json.dumps(parse_credentials(json_string)) + + # Load Service Account Credentials + connector.load_credentials( + { + DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY: refried_json_string, + DB_CREDENTIALS_PRIMARY_ADMIN_KEY: primary_admin_email, + } + ) + return connector + + return _connector_factory diff --git a/backend/tests/daily/connectors/gmail/test_gmail_connector.py b/backend/tests/daily/connectors/gmail/test_gmail_connector.py new file mode 100644 index 00000000000..73cab1136c7 --- /dev/null +++ b/backend/tests/daily/connectors/gmail/test_gmail_connector.py @@ -0,0 +1,125 @@ +from collections.abc import Callable +from typing import Any +from unittest.mock import MagicMock +from unittest.mock import patch + +from danswer.connectors.gmail.connector import GmailConnector +from danswer.connectors.models import Document +from danswer.connectors.models import SlimDocument + + +_THREAD_1_START_TIME = 1730568700 +_THREAD_1_END_TIME = 1730569000 + +""" +This thread was 4 emails long: + admin@onyx-test.com -> test-group-1@onyx-test.com (conaining test_user_1 and test_user_2) + test_user_1@onyx-test.com -> admin@onyx-test.com + admin@onyx-test.com -> test_user_2@onyx-test.com + BCC: test_user_3@onyx-test.com + test_user_3@onyx-test.com -> admin@onyx-test.com +""" +_THREAD_1_BY_ID: dict[str, dict[str, Any]] = { + "192edefb315737c3": { + "email": "admin@onyx-test.com", + "sections_count": 4, + "primary_owners": set( + [ + "admin@onyx-test.com", + "test_user_1@onyx-test.com", + "test_user_3@onyx-test.com", + ] + ), + "secondary_owners": set( + [ + "test-group-1@onyx-test.com", + "admin@onyx-test.com", + "test_user_2@onyx-test.com", + "test_user_3@onyx-test.com", + ] + ), + }, + "192edf020d2f5def": { + "email": "test_user_1@onyx-test.com", + "sections_count": 2, + "primary_owners": set(["admin@onyx-test.com", "test_user_1@onyx-test.com"]), + "secondary_owners": set(["test-group-1@onyx-test.com", "admin@onyx-test.com"]), + }, + "192edf020ae90aab": { + "email": "test_user_2@onyx-test.com", + "sections_count": 2, + "primary_owners": set(["admin@onyx-test.com"]), + "secondary_owners": set( + ["test-group-1@onyx-test.com", "test_user_2@onyx-test.com"] + ), + }, + "192edf18316015fa": { + "email": "test_user_3@onyx-test.com", + "sections_count": 2, + "primary_owners": set(["admin@onyx-test.com", "test_user_3@onyx-test.com"]), + "secondary_owners": set( + [ + "admin@onyx-test.com", + "test_user_2@onyx-test.com", + "test_user_3@onyx-test.com", + ] + ), + }, +} + + +@patch( + "danswer.file_processing.extract_file_text.get_unstructured_api_key", + return_value=None, +) +def test_slim_docs_retrieval( + mock_get_api_key: MagicMock, + google_gmail_service_acct_connector_factory: Callable[..., GmailConnector], +) -> None: + print("\n\nRunning test_slim_docs_retrieval") + connector = google_gmail_service_acct_connector_factory() + retrieved_slim_docs: list[SlimDocument] = [] + for doc_batch in connector.retrieve_all_slim_documents( + _THREAD_1_START_TIME, _THREAD_1_END_TIME + ): + retrieved_slim_docs.extend(doc_batch) + + assert len(retrieved_slim_docs) == 4 + + for doc in retrieved_slim_docs: + permission_info = doc.perm_sync_data + assert isinstance(permission_info, dict) + user_email = permission_info["user_email"] + assert _THREAD_1_BY_ID[doc.id]["email"] == user_email + + +@patch( + "danswer.file_processing.extract_file_text.get_unstructured_api_key", + return_value=None, +) +def test_docs_retrieval( + mock_get_api_key: MagicMock, + google_gmail_service_acct_connector_factory: Callable[..., GmailConnector], +) -> None: + print("\n\nRunning test_docs_retrieval") + connector = google_gmail_service_acct_connector_factory() + retrieved_docs: list[Document] = [] + for doc_batch in connector.poll_source(_THREAD_1_START_TIME, _THREAD_1_END_TIME): + retrieved_docs.extend(doc_batch) + + assert len(retrieved_docs) == 4 + + for doc in retrieved_docs: + id = doc.id + if doc.primary_owners: + retrieved_primary_owner_emails = set( + [owner.email for owner in doc.primary_owners] + ) + if doc.secondary_owners: + retrieved_secondary_owner_emails = set( + [owner.email for owner in doc.secondary_owners] + ) + assert _THREAD_1_BY_ID[id]["sections_count"] == len(doc.sections) + assert _THREAD_1_BY_ID[id]["primary_owners"] == retrieved_primary_owner_emails + assert ( + _THREAD_1_BY_ID[id]["secondary_owners"] == retrieved_secondary_owner_emails + ) diff --git a/backend/tests/daily/connectors/google_drive/conftest.py b/backend/tests/daily/connectors/google_drive/conftest.py new file mode 100644 index 00000000000..4b618b28e1d --- /dev/null +++ b/backend/tests/daily/connectors/google_drive/conftest.py @@ -0,0 +1,113 @@ +import json +import os +from collections.abc import Callable + +import pytest + +from danswer.connectors.google_drive.connector import GoogleDriveConnector +from danswer.connectors.google_utils.shared_constants import ( + DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY, +) +from danswer.connectors.google_utils.shared_constants import ( + DB_CREDENTIALS_DICT_TOKEN_KEY, +) +from danswer.connectors.google_utils.shared_constants import ( + DB_CREDENTIALS_PRIMARY_ADMIN_KEY, +) +from tests.load_env_vars import load_env_vars + + +# Load environment variables at the module level +load_env_vars() + + +def parse_credentials(env_str: str) -> dict: + """ + Parse a double-escaped JSON string from environment variables into a Python dictionary. + + Args: + env_str (str): The double-escaped JSON string from environment variables + + Returns: + dict: Parsed OAuth credentials + """ + # first try normally + try: + return json.loads(env_str) + except Exception: + # First, try remove extra escaping backslashes + unescaped = env_str.replace('\\"', '"') + + # remove leading / trailing quotes + unescaped = unescaped.strip('"') + + # Now parse the JSON + return json.loads(unescaped) + + +@pytest.fixture +def google_drive_oauth_connector_factory() -> Callable[..., GoogleDriveConnector]: + def _connector_factory( + primary_admin_email: str = "admin@onyx-test.com", + include_shared_drives: bool = True, + shared_drive_urls: str | None = None, + include_my_drives: bool = True, + my_drive_emails: str | None = None, + shared_folder_urls: str | None = None, + ) -> GoogleDriveConnector: + print("Creating GoogleDriveConnector with OAuth credentials") + connector = GoogleDriveConnector( + include_shared_drives=include_shared_drives, + shared_drive_urls=shared_drive_urls, + include_my_drives=include_my_drives, + my_drive_emails=my_drive_emails, + shared_folder_urls=shared_folder_urls, + ) + + json_string = os.environ["GOOGLE_DRIVE_OAUTH_CREDENTIALS_JSON_STR"] + refried_json_string = json.dumps(parse_credentials(json_string)) + + credentials_json = { + DB_CREDENTIALS_DICT_TOKEN_KEY: refried_json_string, + DB_CREDENTIALS_PRIMARY_ADMIN_KEY: primary_admin_email, + } + connector.load_credentials(credentials_json) + return connector + + return _connector_factory + + +@pytest.fixture +def google_drive_service_acct_connector_factory() -> ( + Callable[..., GoogleDriveConnector] +): + def _connector_factory( + primary_admin_email: str = "admin@onyx-test.com", + include_shared_drives: bool = True, + shared_drive_urls: str | None = None, + include_my_drives: bool = True, + my_drive_emails: str | None = None, + shared_folder_urls: str | None = None, + ) -> GoogleDriveConnector: + print("Creating GoogleDriveConnector with service account credentials") + connector = GoogleDriveConnector( + include_shared_drives=include_shared_drives, + shared_drive_urls=shared_drive_urls, + include_my_drives=include_my_drives, + my_drive_emails=my_drive_emails, + shared_folder_urls=shared_folder_urls, + ) + + json_string = os.environ["GOOGLE_DRIVE_SERVICE_ACCOUNT_JSON_STR"] + refried_json_string = json.dumps(parse_credentials(json_string)) + + # Load Service Account Credentials + connector.load_credentials( + { + DB_CREDENTIALS_DICT_SERVICE_ACCOUNT_KEY: refried_json_string, + DB_CREDENTIALS_PRIMARY_ADMIN_KEY: primary_admin_email, + } + ) + return connector + + return _connector_factory diff --git a/backend/tests/daily/connectors/google_drive/helpers.py b/backend/tests/daily/connectors/google_drive/helpers.py new file mode 100644 index 00000000000..7a120412e9d --- /dev/null +++ b/backend/tests/daily/connectors/google_drive/helpers.py @@ -0,0 +1,192 @@ +from collections.abc import Sequence + +from danswer.connectors.models import Document + +ALL_FILES = list(range(0, 60)) +SHARED_DRIVE_FILES = list(range(20, 25)) + + +_ADMIN_FILE_IDS = list(range(0, 5)) +_TEST_USER_1_FILE_IDS = list(range(5, 10)) +_TEST_USER_2_FILE_IDS = list(range(10, 15)) +_TEST_USER_3_FILE_IDS = list(range(15, 20)) +_SHARED_DRIVE_1_FILE_IDS = list(range(20, 25)) +_FOLDER_1_FILE_IDS = list(range(25, 30)) +_FOLDER_1_1_FILE_IDS = list(range(30, 35)) +_FOLDER_1_2_FILE_IDS = list(range(35, 40)) +_SHARED_DRIVE_2_FILE_IDS = list(range(40, 45)) +_FOLDER_2_FILE_IDS = list(range(45, 50)) +_FOLDER_2_1_FILE_IDS = list(range(50, 55)) +_FOLDER_2_2_FILE_IDS = list(range(55, 60)) +_SECTIONS_FILE_IDS = [61] + +_PUBLIC_FOLDER_RANGE = _FOLDER_1_2_FILE_IDS +_PUBLIC_FILE_IDS = list(range(55, 57)) +PUBLIC_RANGE = _PUBLIC_FOLDER_RANGE + _PUBLIC_FILE_IDS + +_SHARED_DRIVE_1_URL = "https://drive.google.com/drive/folders/0AC_OJ4BkMd4kUk9PVA" +# Group 1 is given access to this folder +_FOLDER_1_URL = ( + "https://drive.google.com/drive/folders/1d3I7U3vUZMDziF1OQqYRkB8Jp2s_GWUn" +) +_FOLDER_1_1_URL = ( + "https://drive.google.com/drive/folders/1aR33-zwzl_mnRAwH55GgtWTE-4A4yWWI" +) +_FOLDER_1_2_URL = ( + "https://drive.google.com/drive/folders/1IO0X55VhvLXf4mdxzHxuKf4wxrDBB6jq" +) +_SHARED_DRIVE_2_URL = "https://drive.google.com/drive/folders/0ABKspIh7P4f4Uk9PVA" +_FOLDER_2_URL = ( + "https://drive.google.com/drive/folders/1lNpCJ1teu8Se0louwL0oOHK9nEalskof" +) +_FOLDER_2_1_URL = ( + "https://drive.google.com/drive/folders/1XeDOMWwxTDiVr9Ig2gKum3Zq_Wivv6zY" +) +_FOLDER_2_2_URL = ( + "https://drive.google.com/drive/folders/1RKlsexA8h7NHvBAWRbU27MJotic7KXe3" +) + +_ADMIN_EMAIL = "admin@onyx-test.com" +_TEST_USER_1_EMAIL = "test_user_1@onyx-test.com" +_TEST_USER_2_EMAIL = "test_user_2@onyx-test.com" +_TEST_USER_3_EMAIL = "test_user_3@onyx-test.com" + +# Dictionary for ranges +DRIVE_ID_MAPPING: dict[str, list[int]] = { + "ADMIN": _ADMIN_FILE_IDS, + "TEST_USER_1": _TEST_USER_1_FILE_IDS, + "TEST_USER_2": _TEST_USER_2_FILE_IDS, + "TEST_USER_3": _TEST_USER_3_FILE_IDS, + "SHARED_DRIVE_1": _SHARED_DRIVE_1_FILE_IDS, + "FOLDER_1": _FOLDER_1_FILE_IDS, + "FOLDER_1_1": _FOLDER_1_1_FILE_IDS, + "FOLDER_1_2": _FOLDER_1_2_FILE_IDS, + "SHARED_DRIVE_2": _SHARED_DRIVE_2_FILE_IDS, + "FOLDER_2": _FOLDER_2_FILE_IDS, + "FOLDER_2_1": _FOLDER_2_1_FILE_IDS, + "FOLDER_2_2": _FOLDER_2_2_FILE_IDS, + "SECTIONS": _SECTIONS_FILE_IDS, +} + +# Dictionary for emails +EMAIL_MAPPING: dict[str, str] = { + "ADMIN": _ADMIN_EMAIL, + "TEST_USER_1": _TEST_USER_1_EMAIL, + "TEST_USER_2": _TEST_USER_2_EMAIL, + "TEST_USER_3": _TEST_USER_3_EMAIL, +} + +# Dictionary for URLs +URL_MAPPING: dict[str, str] = { + "SHARED_DRIVE_1": _SHARED_DRIVE_1_URL, + "FOLDER_1": _FOLDER_1_URL, + "FOLDER_1_1": _FOLDER_1_1_URL, + "FOLDER_1_2": _FOLDER_1_2_URL, + "SHARED_DRIVE_2": _SHARED_DRIVE_2_URL, + "FOLDER_2": _FOLDER_2_URL, + "FOLDER_2_1": _FOLDER_2_1_URL, + "FOLDER_2_2": _FOLDER_2_2_URL, +} + +# Dictionary for access permissions +# All users have access to their own My Drive as well as public files +ACCESS_MAPPING: dict[str, list[int]] = { + # Admin has access to everything in shared + "ADMIN": ( + _ADMIN_FILE_IDS + + _SHARED_DRIVE_1_FILE_IDS + + _FOLDER_1_FILE_IDS + + _FOLDER_1_1_FILE_IDS + + _FOLDER_1_2_FILE_IDS + + _SHARED_DRIVE_2_FILE_IDS + + _FOLDER_2_FILE_IDS + + _FOLDER_2_1_FILE_IDS + + _FOLDER_2_2_FILE_IDS + + _SECTIONS_FILE_IDS + ), + # This user has access to drive 1 + # This user has redundant access to folder 1 because of group access + # This user has been given individual access to files in Admin's My Drive + "TEST_USER_1": ( + _TEST_USER_1_FILE_IDS + + _SHARED_DRIVE_1_FILE_IDS + + _FOLDER_1_FILE_IDS + + _FOLDER_1_1_FILE_IDS + + _FOLDER_1_2_FILE_IDS + + list(range(0, 2)) + ), + # Group 1 includes this user, giving access to folder 1 + # This user has also been given access to folder 2-1 + # This user has also been given individual access to files in folder 2 + "TEST_USER_2": ( + _TEST_USER_2_FILE_IDS + + _FOLDER_1_FILE_IDS + + _FOLDER_1_1_FILE_IDS + + _FOLDER_1_2_FILE_IDS + + _FOLDER_2_1_FILE_IDS + + list(range(45, 47)) + ), + # This user can only see his own files and public files + "TEST_USER_3": _TEST_USER_3_FILE_IDS, +} + +SPECIAL_FILE_ID_TO_CONTENT_MAP: dict[int, str] = { + 61: ( + "Title\n\n" + "This is a Google Doc with sections - " + "Section 1\n\n" + "Section 1 content - " + "Sub-Section 1-1\n\n" + "Sub-Section 1-1 content - " + "Sub-Section 1-2\n\n" + "Sub-Section 1-2 content - " + "Section 2\n\n" + "Section 2 content" + ), +} + + +file_name_template = "file_{}.txt" +file_text_template = "This is file {}" + + +def print_discrepencies(expected: set[str], retrieved: set[str]) -> None: + if expected != retrieved: + print(expected) + print(retrieved) + print("Extra:") + print(retrieved - expected) + print("Missing:") + print(expected - retrieved) + + +def get_file_content(file_id: int) -> str: + if file_id in SPECIAL_FILE_ID_TO_CONTENT_MAP: + return SPECIAL_FILE_ID_TO_CONTENT_MAP[file_id] + + return file_text_template.format(file_id) + + +def assert_retrieved_docs_match_expected( + retrieved_docs: list[Document], expected_file_ids: Sequence[int] +) -> None: + expected_file_names = { + file_name_template.format(file_id) for file_id in expected_file_ids + } + expected_file_texts = {get_file_content(file_id) for file_id in expected_file_ids} + + retrieved_file_names = set([doc.semantic_identifier for doc in retrieved_docs]) + retrieved_texts = set( + [ + " - ".join([section.text for section in doc.sections]) + for doc in retrieved_docs + ] + ) + + # Check file names + print_discrepencies(expected_file_names, retrieved_file_names) + assert expected_file_names == retrieved_file_names + + # Check file texts + print_discrepencies(expected_file_texts, retrieved_texts) + assert expected_file_texts == retrieved_texts diff --git a/backend/tests/daily/connectors/google_drive/test_google_drive_oauth.py b/backend/tests/daily/connectors/google_drive/test_google_drive_oauth.py new file mode 100644 index 00000000000..9e788a7ffa2 --- /dev/null +++ b/backend/tests/daily/connectors/google_drive/test_google_drive_oauth.py @@ -0,0 +1,235 @@ +import time +from collections.abc import Callable +from unittest.mock import MagicMock +from unittest.mock import patch + +from danswer.connectors.google_drive.connector import GoogleDriveConnector +from danswer.connectors.models import Document +from tests.daily.connectors.google_drive.helpers import ( + assert_retrieved_docs_match_expected, +) +from tests.daily.connectors.google_drive.helpers import DRIVE_ID_MAPPING +from tests.daily.connectors.google_drive.helpers import EMAIL_MAPPING +from tests.daily.connectors.google_drive.helpers import URL_MAPPING + + +@patch( + "danswer.file_processing.extract_file_text.get_unstructured_api_key", + return_value=None, +) +def test_include_all( + mock_get_api_key: MagicMock, + google_drive_oauth_connector_factory: Callable[..., GoogleDriveConnector], +) -> None: + print("\n\nRunning test_include_all") + connector = google_drive_oauth_connector_factory( + include_shared_drives=True, + include_my_drives=True, + ) + retrieved_docs: list[Document] = [] + for doc_batch in connector.poll_source(0, time.time()): + retrieved_docs.extend(doc_batch) + + # Should get everything in shared and admin's My Drive with oauth + expected_file_ids = ( + DRIVE_ID_MAPPING["ADMIN"] + + DRIVE_ID_MAPPING["SHARED_DRIVE_1"] + + DRIVE_ID_MAPPING["FOLDER_1"] + + DRIVE_ID_MAPPING["FOLDER_1_1"] + + DRIVE_ID_MAPPING["FOLDER_1_2"] + + DRIVE_ID_MAPPING["SHARED_DRIVE_2"] + + DRIVE_ID_MAPPING["FOLDER_2"] + + DRIVE_ID_MAPPING["FOLDER_2_1"] + + DRIVE_ID_MAPPING["FOLDER_2_2"] + + DRIVE_ID_MAPPING["SECTIONS"] + ) + assert_retrieved_docs_match_expected( + retrieved_docs=retrieved_docs, + expected_file_ids=expected_file_ids, + ) + + +@patch( + "danswer.file_processing.extract_file_text.get_unstructured_api_key", + return_value=None, +) +def test_include_shared_drives_only( + mock_get_api_key: MagicMock, + google_drive_oauth_connector_factory: Callable[..., GoogleDriveConnector], +) -> None: + print("\n\nRunning test_include_shared_drives_only") + connector = google_drive_oauth_connector_factory( + include_shared_drives=True, + include_my_drives=False, + ) + retrieved_docs: list[Document] = [] + for doc_batch in connector.poll_source(0, time.time()): + retrieved_docs.extend(doc_batch) + + # Should only get shared drives + expected_file_ids = ( + DRIVE_ID_MAPPING["SHARED_DRIVE_1"] + + DRIVE_ID_MAPPING["FOLDER_1"] + + DRIVE_ID_MAPPING["FOLDER_1_1"] + + DRIVE_ID_MAPPING["FOLDER_1_2"] + + DRIVE_ID_MAPPING["SHARED_DRIVE_2"] + + DRIVE_ID_MAPPING["FOLDER_2"] + + DRIVE_ID_MAPPING["FOLDER_2_1"] + + DRIVE_ID_MAPPING["FOLDER_2_2"] + + DRIVE_ID_MAPPING["SECTIONS"] + ) + assert_retrieved_docs_match_expected( + retrieved_docs=retrieved_docs, + expected_file_ids=expected_file_ids, + ) + + +@patch( + "danswer.file_processing.extract_file_text.get_unstructured_api_key", + return_value=None, +) +def test_include_my_drives_only( + mock_get_api_key: MagicMock, + google_drive_oauth_connector_factory: Callable[..., GoogleDriveConnector], +) -> None: + print("\n\nRunning test_include_my_drives_only") + connector = google_drive_oauth_connector_factory( + include_shared_drives=False, + include_my_drives=True, + ) + retrieved_docs: list[Document] = [] + for doc_batch in connector.poll_source(0, time.time()): + retrieved_docs.extend(doc_batch) + + # Should only get everyone's My Drives + expected_file_ids = list(range(0, 5)) # Admin's My Drive only + assert_retrieved_docs_match_expected( + retrieved_docs=retrieved_docs, + expected_file_ids=expected_file_ids, + ) + + +@patch( + "danswer.file_processing.extract_file_text.get_unstructured_api_key", + return_value=None, +) +def test_drive_one_only( + mock_get_api_key: MagicMock, + google_drive_oauth_connector_factory: Callable[..., GoogleDriveConnector], +) -> None: + print("\n\nRunning test_drive_one_only") + drive_urls = [ + URL_MAPPING["SHARED_DRIVE_1"], + ] + connector = google_drive_oauth_connector_factory( + include_shared_drives=True, + include_my_drives=False, + shared_drive_urls=",".join([str(url) for url in drive_urls]), + ) + retrieved_docs: list[Document] = [] + for doc_batch in connector.poll_source(0, time.time()): + retrieved_docs.extend(doc_batch) + + # We ignore shared_drive_urls if include_shared_drives is False + expected_file_ids = list(range(20, 40)) # Shared Drive 1 and its folders + assert_retrieved_docs_match_expected( + retrieved_docs=retrieved_docs, + expected_file_ids=expected_file_ids, + ) + + +@patch( + "danswer.file_processing.extract_file_text.get_unstructured_api_key", + return_value=None, +) +def test_folder_and_shared_drive( + mock_get_api_key: MagicMock, + google_drive_oauth_connector_factory: Callable[..., GoogleDriveConnector], +) -> None: + print("\n\nRunning test_folder_and_shared_drive") + drive_urls = [URL_MAPPING["SHARED_DRIVE_1"]] + folder_urls = [URL_MAPPING["FOLDER_2"]] + connector = google_drive_oauth_connector_factory( + include_shared_drives=True, + include_my_drives=True, + shared_drive_urls=",".join([str(url) for url in drive_urls]), + shared_folder_urls=",".join([str(url) for url in folder_urls]), + ) + retrieved_docs: list[Document] = [] + for doc_batch in connector.poll_source(0, time.time()): + retrieved_docs.extend(doc_batch) + + # Should + expected_file_ids = ( + list(range(0, 5)) # Admin's My Drive + + list(range(20, 40)) # Shared Drive 1 and its folders + + list(range(45, 60)) # Folder 2 and its subfolders + ) + assert_retrieved_docs_match_expected( + retrieved_docs=retrieved_docs, + expected_file_ids=expected_file_ids, + ) + + +@patch( + "danswer.file_processing.extract_file_text.get_unstructured_api_key", + return_value=None, +) +def test_folders_only( + mock_get_api_key: MagicMock, + google_drive_oauth_connector_factory: Callable[..., GoogleDriveConnector], +) -> None: + print("\n\nRunning test_folders_only") + folder_urls = [ + URL_MAPPING["FOLDER_1_1"], + URL_MAPPING["FOLDER_1_2"], + URL_MAPPING["FOLDER_2_1"], + URL_MAPPING["FOLDER_2_2"], + ] + connector = google_drive_oauth_connector_factory( + include_shared_drives=False, + include_my_drives=False, + shared_folder_urls=",".join([str(url) for url in folder_urls]), + ) + retrieved_docs: list[Document] = [] + for doc_batch in connector.poll_source(0, time.time()): + retrieved_docs.extend(doc_batch) + + expected_file_ids = list(range(30, 40)) + list( # Folders 1_1 and 1_2 + range(50, 60) + ) # Folders 2_1 and 2_2 + assert_retrieved_docs_match_expected( + retrieved_docs=retrieved_docs, + expected_file_ids=expected_file_ids, + ) + + +@patch( + "danswer.file_processing.extract_file_text.get_unstructured_api_key", + return_value=None, +) +def test_specific_emails( + mock_get_api_key: MagicMock, + google_drive_oauth_connector_factory: Callable[..., GoogleDriveConnector], +) -> None: + print("\n\nRunning test_specific_emails") + my_drive_emails = [ + EMAIL_MAPPING["TEST_USER_1"], + EMAIL_MAPPING["TEST_USER_3"], + ] + connector = google_drive_oauth_connector_factory( + include_shared_drives=False, + include_my_drives=True, + my_drive_emails=",".join([str(email) for email in my_drive_emails]), + ) + retrieved_docs: list[Document] = [] + for doc_batch in connector.poll_source(0, time.time()): + retrieved_docs.extend(doc_batch) + + # No matter who is specified, when using oauth, if include_my_drives is True, + # we will get all the files from the admin's My Drive + expected_file_ids = DRIVE_ID_MAPPING["ADMIN"] + assert_retrieved_docs_match_expected( + retrieved_docs=retrieved_docs, + expected_file_ids=expected_file_ids, + ) diff --git a/backend/tests/daily/connectors/google_drive/test_google_drive_sections.py b/backend/tests/daily/connectors/google_drive/test_google_drive_sections.py new file mode 100644 index 00000000000..4f4556a06d6 --- /dev/null +++ b/backend/tests/daily/connectors/google_drive/test_google_drive_sections.py @@ -0,0 +1,71 @@ +import time +from collections.abc import Callable +from unittest.mock import MagicMock +from unittest.mock import patch + +from danswer.connectors.google_drive.connector import GoogleDriveConnector +from danswer.connectors.models import Document + + +SECTIONS_FOLDER_URL = ( + "https://drive.google.com/drive/u/5/folders/1loe6XJ-pJxu9YYPv7cF3Hmz296VNzA33" +) + + +@patch( + "danswer.file_processing.extract_file_text.get_unstructured_api_key", + return_value=None, +) +def test_google_drive_sections( + mock_get_api_key: MagicMock, + google_drive_oauth_connector_factory: Callable[..., GoogleDriveConnector], + google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector], +) -> None: + oauth_connector = google_drive_oauth_connector_factory( + include_shared_drives=False, + include_my_drives=False, + shared_folder_urls=SECTIONS_FOLDER_URL, + ) + service_acct_connector = google_drive_service_acct_connector_factory( + include_shared_drives=False, + include_my_drives=False, + shared_folder_urls=SECTIONS_FOLDER_URL, + ) + for connector in [oauth_connector, service_acct_connector]: + retrieved_docs: list[Document] = [] + for doc_batch in connector.poll_source(0, time.time()): + retrieved_docs.extend(doc_batch) + + # Verify we got the 1 doc with sections + assert len(retrieved_docs) == 1 + + # Verify each section has the expected structure + doc = retrieved_docs[0] + assert len(doc.sections) == 5 + + header_section = doc.sections[0] + assert header_section.text == "Title\n\nThis is a Google Doc with sections" + assert header_section.link is not None + assert header_section.link.endswith( + "?tab=t.0#heading=h.hfjc17k6qwzt" + ) or header_section.link.endswith("?tab=t.0#heading=h.hfjc17k6qwzt") + + section_1 = doc.sections[1] + assert section_1.text == "Section 1\n\nSection 1 content" + assert section_1.link is not None + assert section_1.link.endswith("?tab=t.0#heading=h.8slfx752a3g5") + + section_2 = doc.sections[2] + assert section_2.text == "Sub-Section 1-1\n\nSub-Section 1-1 content" + assert section_2.link is not None + assert section_2.link.endswith("?tab=t.0#heading=h.4kj3ayade1bp") + + section_3 = doc.sections[3] + assert section_3.text == "Sub-Section 1-2\n\nSub-Section 1-2 content" + assert section_3.link is not None + assert section_3.link.endswith("?tab=t.0#heading=h.pm6wrpzgk69l") + + section_4 = doc.sections[4] + assert section_4.text == "Section 2\n\nSection 2 content" + assert section_4.link is not None + assert section_4.link.endswith("?tab=t.0#heading=h.2m0s9youe2k9") diff --git a/backend/tests/daily/connectors/google_drive/test_google_drive_service_acct.py b/backend/tests/daily/connectors/google_drive/test_google_drive_service_acct.py new file mode 100644 index 00000000000..3ea2c6ddf49 --- /dev/null +++ b/backend/tests/daily/connectors/google_drive/test_google_drive_service_acct.py @@ -0,0 +1,238 @@ +import time +from collections.abc import Callable +from unittest.mock import MagicMock +from unittest.mock import patch + +from danswer.connectors.google_drive.connector import GoogleDriveConnector +from danswer.connectors.models import Document +from tests.daily.connectors.google_drive.helpers import ( + assert_retrieved_docs_match_expected, +) +from tests.daily.connectors.google_drive.helpers import DRIVE_ID_MAPPING +from tests.daily.connectors.google_drive.helpers import EMAIL_MAPPING +from tests.daily.connectors.google_drive.helpers import URL_MAPPING + + +@patch( + "danswer.file_processing.extract_file_text.get_unstructured_api_key", + return_value=None, +) +def test_include_all( + mock_get_api_key: MagicMock, + google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector], +) -> None: + print("\n\nRunning test_include_all") + connector = google_drive_service_acct_connector_factory( + include_shared_drives=True, + include_my_drives=True, + ) + retrieved_docs: list[Document] = [] + for doc_batch in connector.poll_source(0, time.time()): + retrieved_docs.extend(doc_batch) + + # Should get everything + expected_file_ids = ( + DRIVE_ID_MAPPING["ADMIN"] + + DRIVE_ID_MAPPING["TEST_USER_1"] + + DRIVE_ID_MAPPING["TEST_USER_2"] + + DRIVE_ID_MAPPING["TEST_USER_3"] + + DRIVE_ID_MAPPING["SHARED_DRIVE_1"] + + DRIVE_ID_MAPPING["FOLDER_1"] + + DRIVE_ID_MAPPING["FOLDER_1_1"] + + DRIVE_ID_MAPPING["FOLDER_1_2"] + + DRIVE_ID_MAPPING["SHARED_DRIVE_2"] + + DRIVE_ID_MAPPING["FOLDER_2"] + + DRIVE_ID_MAPPING["FOLDER_2_1"] + + DRIVE_ID_MAPPING["FOLDER_2_2"] + + DRIVE_ID_MAPPING["SECTIONS"] + ) + assert_retrieved_docs_match_expected( + retrieved_docs=retrieved_docs, + expected_file_ids=expected_file_ids, + ) + + +@patch( + "danswer.file_processing.extract_file_text.get_unstructured_api_key", + return_value=None, +) +def test_include_shared_drives_only( + mock_get_api_key: MagicMock, + google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector], +) -> None: + print("\n\nRunning test_include_shared_drives_only") + connector = google_drive_service_acct_connector_factory( + include_shared_drives=True, + include_my_drives=False, + ) + retrieved_docs: list[Document] = [] + for doc_batch in connector.poll_source(0, time.time()): + retrieved_docs.extend(doc_batch) + + # Should only get shared drives + expected_file_ids = ( + DRIVE_ID_MAPPING["SHARED_DRIVE_1"] + + DRIVE_ID_MAPPING["FOLDER_1"] + + DRIVE_ID_MAPPING["FOLDER_1_1"] + + DRIVE_ID_MAPPING["FOLDER_1_2"] + + DRIVE_ID_MAPPING["SHARED_DRIVE_2"] + + DRIVE_ID_MAPPING["FOLDER_2"] + + DRIVE_ID_MAPPING["FOLDER_2_1"] + + DRIVE_ID_MAPPING["FOLDER_2_2"] + + DRIVE_ID_MAPPING["SECTIONS"] + ) + assert_retrieved_docs_match_expected( + retrieved_docs=retrieved_docs, + expected_file_ids=expected_file_ids, + ) + + +@patch( + "danswer.file_processing.extract_file_text.get_unstructured_api_key", + return_value=None, +) +def test_include_my_drives_only( + mock_get_api_key: MagicMock, + google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector], +) -> None: + print("\n\nRunning test_include_my_drives_only") + connector = google_drive_service_acct_connector_factory( + include_shared_drives=False, + include_my_drives=True, + ) + retrieved_docs: list[Document] = [] + for doc_batch in connector.poll_source(0, time.time()): + retrieved_docs.extend(doc_batch) + + # Should only get everyone's My Drives + expected_file_ids = list(range(0, 20)) # All My Drives + assert_retrieved_docs_match_expected( + retrieved_docs=retrieved_docs, + expected_file_ids=expected_file_ids, + ) + + +@patch( + "danswer.file_processing.extract_file_text.get_unstructured_api_key", + return_value=None, +) +def test_drive_one_only( + mock_get_api_key: MagicMock, + google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector], +) -> None: + print("\n\nRunning test_drive_one_only") + urls = [URL_MAPPING["SHARED_DRIVE_1"]] + connector = google_drive_service_acct_connector_factory( + include_shared_drives=True, + include_my_drives=False, + shared_drive_urls=",".join([str(url) for url in urls]), + ) + retrieved_docs: list[Document] = [] + for doc_batch in connector.poll_source(0, time.time()): + retrieved_docs.extend(doc_batch) + + # We ignore shared_drive_urls if include_shared_drives is False + expected_file_ids = list(range(20, 40)) # Shared Drive 1 and its folders + assert_retrieved_docs_match_expected( + retrieved_docs=retrieved_docs, + expected_file_ids=expected_file_ids, + ) + + +@patch( + "danswer.file_processing.extract_file_text.get_unstructured_api_key", + return_value=None, +) +def test_folder_and_shared_drive( + mock_get_api_key: MagicMock, + google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector], +) -> None: + print("\n\nRunning test_folder_and_shared_drive") + drive_urls = [ + URL_MAPPING["SHARED_DRIVE_1"], + ] + folder_urls = [URL_MAPPING["FOLDER_2"]] + connector = google_drive_service_acct_connector_factory( + include_shared_drives=True, + include_my_drives=True, + shared_drive_urls=",".join([str(url) for url in drive_urls]), + shared_folder_urls=",".join([str(url) for url in folder_urls]), + ) + retrieved_docs: list[Document] = [] + for doc_batch in connector.poll_source(0, time.time()): + retrieved_docs.extend(doc_batch) + + # Should + expected_file_ids = ( + list(range(0, 20)) # All My Drives + + list(range(20, 40)) # Shared Drive 1 and its folders + + list(range(45, 60)) # Folder 2 and its subfolders + ) + assert_retrieved_docs_match_expected( + retrieved_docs=retrieved_docs, + expected_file_ids=expected_file_ids, + ) + + +@patch( + "danswer.file_processing.extract_file_text.get_unstructured_api_key", + return_value=None, +) +def test_folders_only( + mock_get_api_key: MagicMock, + google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector], +) -> None: + print("\n\nRunning test_folders_only") + folder_urls = [ + URL_MAPPING["FOLDER_1_1"], + URL_MAPPING["FOLDER_1_2"], + URL_MAPPING["FOLDER_2_1"], + URL_MAPPING["FOLDER_2_2"], + ] + connector = google_drive_service_acct_connector_factory( + include_shared_drives=False, + include_my_drives=False, + shared_folder_urls=",".join([str(url) for url in folder_urls]), + ) + retrieved_docs: list[Document] = [] + for doc_batch in connector.poll_source(0, time.time()): + retrieved_docs.extend(doc_batch) + + expected_file_ids = list(range(30, 40)) + list( # Folders 1_1 and 1_2 + range(50, 60) + ) # Folders 2_1 and 2_2 + assert_retrieved_docs_match_expected( + retrieved_docs=retrieved_docs, + expected_file_ids=expected_file_ids, + ) + + +@patch( + "danswer.file_processing.extract_file_text.get_unstructured_api_key", + return_value=None, +) +def test_specific_emails( + mock_get_api_key: MagicMock, + google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector], +) -> None: + print("\n\nRunning test_specific_emails") + my_drive_emails = [ + EMAIL_MAPPING["TEST_USER_1"], + EMAIL_MAPPING["TEST_USER_3"], + ] + connector = google_drive_service_acct_connector_factory( + include_shared_drives=False, + include_my_drives=True, + my_drive_emails=",".join([str(email) for email in my_drive_emails]), + ) + retrieved_docs: list[Document] = [] + for doc_batch in connector.poll_source(0, time.time()): + retrieved_docs.extend(doc_batch) + + expected_file_ids = list(range(5, 10)) + list( + range(15, 20) + ) # TEST_USER_1 and TEST_USER_3 My Drives + assert_retrieved_docs_match_expected( + retrieved_docs=retrieved_docs, + expected_file_ids=expected_file_ids, + ) diff --git a/backend/tests/daily/connectors/google_drive/test_google_drive_slim_docs.py b/backend/tests/daily/connectors/google_drive/test_google_drive_slim_docs.py new file mode 100644 index 00000000000..1361c198849 --- /dev/null +++ b/backend/tests/daily/connectors/google_drive/test_google_drive_slim_docs.py @@ -0,0 +1,184 @@ +import time +from collections.abc import Callable +from unittest.mock import MagicMock +from unittest.mock import patch + +from danswer.access.models import ExternalAccess +from danswer.connectors.google_drive.connector import GoogleDriveConnector +from danswer.connectors.google_utils.google_utils import execute_paginated_retrieval +from danswer.connectors.google_utils.resources import get_admin_service +from ee.danswer.external_permissions.google_drive.doc_sync import ( + _get_permissions_from_slim_doc, +) +from tests.daily.connectors.google_drive.helpers import EMAIL_MAPPING +from tests.daily.connectors.google_drive.helpers import file_name_template +from tests.daily.connectors.google_drive.helpers import print_discrepencies +from tests.daily.connectors.google_drive.helpers import PUBLIC_RANGE + + +def get_keys_available_to_user_from_access_map( + user_email: str, + group_map: dict[str, list[str]], + access_map: dict[str, ExternalAccess], +) -> list[str]: + """ + Extracts the names of the files available to the user from the access map + through their own email or group memberships or public access + """ + group_emails_for_user = [] + for group_email, user_in_group_email_list in group_map.items(): + if user_email in user_in_group_email_list: + group_emails_for_user.append(group_email) + + accessible_file_names_for_user = [] + for file_name, external_access in access_map.items(): + if external_access.is_public: + accessible_file_names_for_user.append(file_name) + elif user_email in external_access.external_user_emails: + accessible_file_names_for_user.append(file_name) + elif any( + group_email in external_access.external_user_group_ids + for group_email in group_emails_for_user + ): + accessible_file_names_for_user.append(file_name) + return accessible_file_names_for_user + + +def assert_correct_access_for_user( + user_email: str, + expected_access_ids: list[int], + group_map: dict[str, list[str]], + retrieved_access_map: dict[str, ExternalAccess], +) -> None: + """ + compares the expected access range of the user to the keys available to the user + retrieved from the source + """ + retrieved_keys_available_to_user = get_keys_available_to_user_from_access_map( + user_email, group_map, retrieved_access_map + ) + retrieved_file_names = set(retrieved_keys_available_to_user) + + # Combine public and user-specific access IDs + all_accessible_ids = expected_access_ids + PUBLIC_RANGE + expected_file_names = {file_name_template.format(i) for i in all_accessible_ids} + + print_discrepencies(expected_file_names, retrieved_file_names) + + assert expected_file_names == retrieved_file_names + + +# This function is supposed to map to the group_sync.py file for the google drive connector +# TODO: Call it directly +def get_group_map(google_drive_connector: GoogleDriveConnector) -> dict[str, list[str]]: + admin_service = get_admin_service( + creds=google_drive_connector.creds, + user_email=google_drive_connector.primary_admin_email, + ) + + group_map: dict[str, list[str]] = {} + for group in execute_paginated_retrieval( + admin_service.groups().list, + list_key="groups", + domain=google_drive_connector.google_domain, + fields="groups(email)", + ): + # The id is the group email + group_email = group["email"] + + # Gather group member emails + group_member_emails: list[str] = [] + for member in execute_paginated_retrieval( + admin_service.members().list, + list_key="members", + groupKey=group_email, + fields="members(email)", + ): + group_member_emails.append(member["email"]) + group_map[group_email] = group_member_emails + return group_map + + +@patch( + "danswer.file_processing.extract_file_text.get_unstructured_api_key", + return_value=None, +) +def test_all_permissions( + mock_get_api_key: MagicMock, + google_drive_service_acct_connector_factory: Callable[..., GoogleDriveConnector], +) -> None: + google_drive_connector = google_drive_service_acct_connector_factory( + include_shared_drives=True, + include_my_drives=True, + ) + + access_map: dict[str, ExternalAccess] = {} + for slim_doc_batch in google_drive_connector.retrieve_all_slim_documents( + 0, time.time() + ): + for slim_doc in slim_doc_batch: + access_map[ + (slim_doc.perm_sync_data or {})["name"] + ] = _get_permissions_from_slim_doc( + google_drive_connector=google_drive_connector, + slim_doc=slim_doc, + ) + + for file_name, external_access in access_map.items(): + print(file_name, external_access) + + expected_file_range = ( + list(range(0, 5)) # Admin's My Drive + + list(range(5, 10)) # TEST_USER_1's My Drive + + list(range(10, 15)) # TEST_USER_2's My Drive + + list(range(15, 20)) # TEST_USER_3's My Drive + + list(range(20, 25)) # Shared Drive 1 + + list(range(25, 30)) # Folder 1 + + list(range(30, 35)) # Folder 1_1 + + list(range(35, 40)) # Folder 1_2 + + list(range(40, 45)) # Shared Drive 2 + + list(range(45, 50)) # Folder 2 + + list(range(50, 55)) # Folder 2_1 + + list(range(55, 60)) # Folder 2_2 + + [61] # Sections + ) + + # Should get everything + assert len(access_map) == len(expected_file_range) + + group_map = get_group_map(google_drive_connector) + + print("groups:\n", group_map) + + assert_correct_access_for_user( + user_email=EMAIL_MAPPING["ADMIN"], + expected_access_ids=list(range(0, 5)) # Admin's My Drive + + list(range(20, 60)) # All shared drive content + + [61], # Sections + group_map=group_map, + retrieved_access_map=access_map, + ) + assert_correct_access_for_user( + user_email=EMAIL_MAPPING["TEST_USER_1"], + expected_access_ids=list(range(5, 10)) # TEST_USER_1's My Drive + + list(range(20, 40)) # Shared Drive 1 and its folders + + list(range(0, 2)), # Access to some of Admin's files + group_map=group_map, + retrieved_access_map=access_map, + ) + + assert_correct_access_for_user( + user_email=EMAIL_MAPPING["TEST_USER_2"], + expected_access_ids=list(range(10, 15)) # TEST_USER_2's My Drive + + list(range(25, 40)) # Folder 1 and its subfolders + + list(range(50, 55)) # Folder 2_1 + + list(range(45, 47)), # Some files in Folder 2 + group_map=group_map, + retrieved_access_map=access_map, + ) + assert_correct_access_for_user( + user_email=EMAIL_MAPPING["TEST_USER_3"], + expected_access_ids=list(range(15, 20)), # TEST_USER_3's My Drive only + group_map=group_map, + retrieved_access_map=access_map, + ) diff --git a/backend/tests/daily/embedding/test_embeddings.py b/backend/tests/daily/embedding/test_embeddings.py index b736f374741..10a1dd850f6 100644 --- a/backend/tests/daily/embedding/test_embeddings.py +++ b/backend/tests/daily/embedding/test_embeddings.py @@ -61,6 +61,26 @@ def test_cohere_embedding(cohere_embedding_model: EmbeddingModel) -> None: _run_embeddings(TOO_LONG_SAMPLE, cohere_embedding_model, 384) +@pytest.fixture +def litellm_embedding_model() -> EmbeddingModel: + return EmbeddingModel( + server_host="localhost", + server_port=9000, + model_name="text-embedding-3-small", + normalize=True, + query_prefix=None, + passage_prefix=None, + api_key=os.getenv("LITE_LLM_API_KEY"), + provider_type=EmbeddingProvider.LITELLM, + api_url=os.getenv("LITE_LLM_API_URL"), + ) + + +def test_litellm_embedding(litellm_embedding_model: EmbeddingModel) -> None: + _run_embeddings(VALID_SAMPLE, litellm_embedding_model, 1536) + _run_embeddings(TOO_LONG_SAMPLE, litellm_embedding_model, 1536) + + @pytest.fixture def local_nomic_embedding_model() -> EmbeddingModel: return EmbeddingModel( diff --git a/backend/tests/daily/llm/test_bedrock.py b/backend/tests/daily/llm/test_bedrock.py new file mode 100644 index 00000000000..1d5022abf99 --- /dev/null +++ b/backend/tests/daily/llm/test_bedrock.py @@ -0,0 +1,81 @@ +import os +from typing import Any + +import pytest +from fastapi.testclient import TestClient + +from danswer.llm.llm_provider_options import BEDROCK_PROVIDER_NAME +from danswer.llm.llm_provider_options import fetch_available_well_known_llms +from danswer.llm.llm_provider_options import WellKnownLLMProviderDescriptor + + +@pytest.fixture +def bedrock_provider() -> WellKnownLLMProviderDescriptor: + provider = next( + ( + provider + for provider in fetch_available_well_known_llms() + if provider.name == BEDROCK_PROVIDER_NAME + ), + None, + ) + assert provider is not None, "Bedrock provider not found" + return provider + + +def test_bedrock_llm_configuration( + client: TestClient, bedrock_provider: WellKnownLLMProviderDescriptor +) -> None: + # Prepare the test request payload + test_request: dict[str, Any] = { + "provider": BEDROCK_PROVIDER_NAME, + "default_model_name": bedrock_provider.default_model, + "fast_default_model_name": bedrock_provider.default_fast_model, + "api_key": None, + "api_base": None, + "api_version": None, + "custom_config": { + "AWS_REGION_NAME": os.environ.get("AWS_REGION_NAME", "us-east-1"), + "AWS_ACCESS_KEY_ID": os.environ.get("AWS_ACCESS_KEY_ID"), + "AWS_SECRET_ACCESS_KEY": os.environ.get("AWS_SECRET_ACCESS_KEY"), + }, + } + + # Send the test request + response = client.post("/admin/llm/test", json=test_request) + + # Assert the response + assert ( + response.status_code == 200 + ), f"Expected status code 200, but got {response.status_code}. Response: {response.text}" + + +def test_bedrock_llm_configuration_invalid_key( + client: TestClient, bedrock_provider: WellKnownLLMProviderDescriptor +) -> None: + # Prepare the test request payload with invalid credentials + test_request: dict[str, Any] = { + "provider": BEDROCK_PROVIDER_NAME, + "default_model_name": bedrock_provider.default_model, + "fast_default_model_name": bedrock_provider.default_fast_model, + "api_key": None, + "api_base": None, + "api_version": None, + "custom_config": { + "AWS_REGION_NAME": "us-east-1", + "AWS_ACCESS_KEY_ID": "invalid_access_key_id", + "AWS_SECRET_ACCESS_KEY": "invalid_secret_access_key", + }, + } + + # Send the test request + response = client.post("/admin/llm/test", json=test_request) + + # Assert the response + assert ( + response.status_code == 400 + ), f"Expected status code 400, but got {response.status_code}. Response: {response.text}" + assert ( + "Invalid credentials" in response.text + or "Invalid Authentication" in response.text + ), f"Expected error message about invalid credentials, but got: {response.text}" diff --git a/backend/tests/integration/Dockerfile b/backend/tests/integration/Dockerfile index 02cdcad0b44..3eecb0d5683 100644 --- a/backend/tests/integration/Dockerfile +++ b/backend/tests/integration/Dockerfile @@ -83,4 +83,5 @@ COPY ./tests/integration /app/tests/integration ENV PYTHONPATH=/app -CMD ["pytest", "-s", "/app/tests/integration"] +ENTRYPOINT ["pytest", "-s"] +CMD ["/app/tests/integration", "--ignore=/app/tests/integration/multitenant_tests"] \ No newline at end of file diff --git a/backend/tests/integration/common_utils/constants.py b/backend/tests/integration/common_utils/constants.py index 7d729191cf6..57db1ad9a32 100644 --- a/backend/tests/integration/common_utils/constants.py +++ b/backend/tests/integration/common_utils/constants.py @@ -4,7 +4,7 @@ API_SERVER_HOST = os.getenv("API_SERVER_HOST") or "localhost" API_SERVER_PORT = os.getenv("API_SERVER_PORT") or "8080" API_SERVER_URL = f"{API_SERVER_PROTOCOL}://{API_SERVER_HOST}:{API_SERVER_PORT}" -MAX_DELAY = 30 +MAX_DELAY = 45 GENERAL_HEADERS = {"Content-Type": "application/json"} diff --git a/backend/tests/integration/common_utils/managers/cc_pair.py b/backend/tests/integration/common_utils/managers/cc_pair.py index 000bbac59d0..68469d144b9 100644 --- a/backend/tests/integration/common_utils/managers/cc_pair.py +++ b/backend/tests/integration/common_utils/managers/cc_pair.py @@ -9,7 +9,7 @@ from danswer.db.enums import AccessType from danswer.db.enums import ConnectorCredentialPairStatus from danswer.db.enums import TaskStatus -from danswer.server.documents.models import CCPairPruningTask +from danswer.server.documents.models import CeleryTaskStatus from danswer.server.documents.models import ConnectorCredentialPairIdentifier from danswer.server.documents.models import ConnectorIndexingStatus from danswer.server.documents.models import DocumentSource @@ -85,7 +85,7 @@ def create_from_scratch( groups=groups, user_performing_action=user_performing_action, ) - return _cc_pair_creator( + cc_pair = _cc_pair_creator( connector_id=connector.id, credential_id=credential.id, name=name, @@ -93,6 +93,7 @@ def create_from_scratch( groups=groups, user_performing_action=user_performing_action, ) + return cc_pair @staticmethod def create( @@ -103,7 +104,7 @@ def create( groups: list[int] | None = None, user_performing_action: DATestUser | None = None, ) -> DATestCCPair: - return _cc_pair_creator( + cc_pair = _cc_pair_creator( connector_id=connector_id, credential_id=credential_id, name=name, @@ -111,6 +112,7 @@ def create( groups=groups, user_performing_action=user_performing_action, ) + return cc_pair @staticmethod def pause_cc_pair( @@ -203,9 +205,28 @@ def verify( if not verify_deleted: raise ValueError(f"CC pair {cc_pair.id} not found") + @staticmethod + def run_once( + cc_pair: DATestCCPair, + user_performing_action: DATestUser | None = None, + ) -> None: + body = { + "connector_id": cc_pair.connector_id, + "credential_ids": [cc_pair.credential_id], + "from_beginning": True, + } + result = requests.post( + url=f"{API_SERVER_URL}/manage/admin/connector/run-once", + json=body, + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + result.raise_for_status() + @staticmethod def wait_for_indexing( - cc_pair_test: DATestCCPair, + cc_pair: DATestCCPair, after: datetime, timeout: float = MAX_DELAY, user_performing_action: DATestUser | None = None, @@ -213,23 +234,29 @@ def wait_for_indexing( """after: Wait for an indexing success time after this time""" start = time.monotonic() while True: - cc_pairs = CCPairManager.get_all(user_performing_action) - for cc_pair in cc_pairs: - if cc_pair.cc_pair_id != cc_pair_test.id: + fetched_cc_pairs = CCPairManager.get_all(user_performing_action) + for fetched_cc_pair in fetched_cc_pairs: + if fetched_cc_pair.cc_pair_id != cc_pair.id: continue - if cc_pair.last_success and cc_pair.last_success > after: - print(f"cc_pair {cc_pair_test.id} indexing complete.") + if fetched_cc_pair.in_progress: + continue + + if ( + fetched_cc_pair.last_success + and fetched_cc_pair.last_success > after + ): + print(f"Indexing complete: cc_pair={cc_pair.id}") return elapsed = time.monotonic() - start if elapsed > timeout: raise TimeoutError( - f"CC pair indexing was not completed within {timeout} seconds" + f"Indexing wait timed out: cc_pair={cc_pair.id} timeout={timeout}s" ) print( - f"Waiting for CC indexing to complete. elapsed={elapsed:.2f} timeout={timeout}" + f"Indexing wait for completion: cc_pair={cc_pair.id} elapsed={elapsed:.2f} timeout={timeout}s" ) time.sleep(5) @@ -247,22 +274,31 @@ def prune( result.raise_for_status() @staticmethod - def get_prune_task( + def last_pruned( cc_pair: DATestCCPair, user_performing_action: DATestUser | None = None, - ) -> CCPairPruningTask: + ) -> datetime | None: response = requests.get( - url=f"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair.id}/prune", + url=f"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair.id}/last_pruned", headers=user_performing_action.headers if user_performing_action else GENERAL_HEADERS, ) response.raise_for_status() - return CCPairPruningTask(**response.json()) + response_str = response.json() + + # If the response itself is a datetime string, parse it + if not isinstance(response_str, str): + return None + + try: + return datetime.fromisoformat(response_str) + except ValueError: + return None @staticmethod def wait_for_prune( - cc_pair_test: DATestCCPair, + cc_pair: DATestCCPair, after: datetime, timeout: float = MAX_DELAY, user_performing_action: DATestUser | None = None, @@ -270,40 +306,108 @@ def wait_for_prune( """after: The task register time must be after this time.""" start = time.monotonic() while True: - task = CCPairManager.get_prune_task(cc_pair_test, user_performing_action) + last_pruned = CCPairManager.last_pruned(cc_pair, user_performing_action) + if last_pruned and last_pruned > after: + print(f"Pruning complete: cc_pair={cc_pair.id}") + break + + elapsed = time.monotonic() - start + if elapsed > timeout: + raise TimeoutError( + f"CC pair pruning was not completed within {timeout} seconds" + ) + + print( + f"Waiting for CC pruning to complete. elapsed={elapsed:.2f} timeout={timeout}" + ) + time.sleep(5) + + @staticmethod + def sync( + cc_pair: DATestCCPair, + user_performing_action: DATestUser | None = None, + ) -> None: + result = requests.post( + url=f"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair.id}/sync", + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + result.raise_for_status() + + @staticmethod + def get_sync_task( + cc_pair: DATestCCPair, + user_performing_action: DATestUser | None = None, + ) -> CeleryTaskStatus: + response = requests.get( + url=f"{API_SERVER_URL}/manage/admin/cc-pair/{cc_pair.id}/sync", + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + response.raise_for_status() + return CeleryTaskStatus(**response.json()) + + @staticmethod + def wait_for_sync( + cc_pair: DATestCCPair, + after: datetime, + timeout: float = MAX_DELAY, + user_performing_action: DATestUser | None = None, + ) -> None: + """after: The task register time must be after this time.""" + start = time.monotonic() + while True: + task = CCPairManager.get_sync_task(cc_pair, user_performing_action) if not task: - raise ValueError("Prune task not found.") + raise ValueError("Sync task not found.") if not task.register_time or task.register_time < after: - raise ValueError("Prune task register time is too early.") + raise ValueError("Sync task register time is too early.") if task.status == TaskStatus.SUCCESS: - # Pruning succeeded + # Sync succeeded return elapsed = time.monotonic() - start if elapsed > timeout: raise TimeoutError( - f"CC pair pruning was not completed within {timeout} seconds" + f"CC pair syncing was not completed within {timeout} seconds" ) print( - f"Waiting for CC pruning to complete. elapsed={elapsed:.2f} timeout={timeout}" + f"Waiting for CC syncing to complete. elapsed={elapsed:.2f} timeout={timeout}" ) time.sleep(5) @staticmethod def wait_for_deletion_completion( + cc_pair_id: int | None = None, user_performing_action: DATestUser | None = None, ) -> None: + """if cc_pair_id is not specified, just waits until no connectors are in the deleting state. + if cc_pair_id is specified, checks to ensure the specific cc_pair_id is gone. + We had a bug where the connector was paused in the middle of deleting, so specifying the + cc_pair_id is good to do.""" start = time.monotonic() while True: cc_pairs = CCPairManager.get_all(user_performing_action) - if all( - cc_pair.cc_pair_status != ConnectorCredentialPairStatus.DELETING - for cc_pair in cc_pairs - ): - return + if cc_pair_id: + found = False + for cc_pair in cc_pairs: + if cc_pair.cc_pair_id == cc_pair_id: + found = True + break + + if not found: + return + else: + if all( + cc_pair.cc_pair_status != ConnectorCredentialPairStatus.DELETING + for cc_pair in cc_pairs + ): + return if time.monotonic() - start > MAX_DELAY: raise TimeoutError( diff --git a/backend/tests/integration/common_utils/managers/chat.py b/backend/tests/integration/common_utils/managers/chat.py index 696baa2ad8b..a2edb32caec 100644 --- a/backend/tests/integration/common_utils/managers/chat.py +++ b/backend/tests/integration/common_utils/managers/chat.py @@ -1,4 +1,5 @@ import json +from uuid import UUID import requests from requests.models import Response @@ -22,7 +23,7 @@ class ChatSessionManager: @staticmethod def create( - persona_id: int = -1, + persona_id: int = 0, description: str = "Test chat session", user_performing_action: DATestUser | None = None, ) -> DATestChatSession: @@ -44,7 +45,7 @@ def create( @staticmethod def send_message( - chat_session_id: int, + chat_session_id: UUID, message: str, parent_message_id: int | None = None, user_performing_action: DATestUser | None = None, diff --git a/backend/tests/integration/common_utils/managers/credential.py b/backend/tests/integration/common_utils/managers/credential.py index 8f729e4b06c..8c8a59d4856 100644 --- a/backend/tests/integration/common_utils/managers/credential.py +++ b/backend/tests/integration/common_utils/managers/credential.py @@ -32,6 +32,7 @@ def create( "curator_public": curator_public, "groups": groups or [], } + response = requests.post( url=f"{API_SERVER_URL}/manage/credential", json=credential_request, diff --git a/backend/tests/integration/common_utils/managers/document_search.py b/backend/tests/integration/common_utils/managers/document_search.py new file mode 100644 index 00000000000..4fe2442b69a --- /dev/null +++ b/backend/tests/integration/common_utils/managers/document_search.py @@ -0,0 +1,39 @@ +import requests + +from danswer.search.enums import LLMEvaluationType +from danswer.search.enums import SearchType +from danswer.search.models import RetrievalDetails +from danswer.search.models import SavedSearchDocWithContent +from ee.danswer.server.query_and_chat.models import DocumentSearchRequest +from tests.integration.common_utils.constants import API_SERVER_URL +from tests.integration.common_utils.constants import GENERAL_HEADERS +from tests.integration.common_utils.test_models import DATestUser + + +class DocumentSearchManager: + @staticmethod + def search_documents( + query: str, + search_type: SearchType = SearchType.KEYWORD, + user_performing_action: DATestUser | None = None, + ) -> list[str]: + search_request = DocumentSearchRequest( + message=query, + search_type=search_type, + retrieval_options=RetrievalDetails(), + evaluation_type=LLMEvaluationType.SKIP, + ) + result = requests.post( + url=f"{API_SERVER_URL}/query/document-search", + json=search_request.model_dump(), + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + result.raise_for_status() + result_json = result.json() + top_documents: list[SavedSearchDocWithContent] = [ + SavedSearchDocWithContent(**doc) for doc in result_json["top_documents"] + ] + document_content_list: list[str] = [doc.content for doc in top_documents] + return document_content_list diff --git a/backend/tests/integration/common_utils/managers/document_set.py b/backend/tests/integration/common_utils/managers/document_set.py index cd6936602ea..7670f42fa3c 100644 --- a/backend/tests/integration/common_utils/managers/document_set.py +++ b/backend/tests/integration/common_utils/managers/document_set.py @@ -135,6 +135,7 @@ def wait_for_sync( all_up_to_date = all(doc_set.is_up_to_date for doc_set in doc_sets) if all_up_to_date: + print("Document sets synced successfully.") break if time.time() - start > MAX_DELAY: diff --git a/backend/tests/integration/common_utils/managers/llm_provider.py b/backend/tests/integration/common_utils/managers/llm_provider.py index cde75284ca8..6ac4693496d 100644 --- a/backend/tests/integration/common_utils/managers/llm_provider.py +++ b/backend/tests/integration/common_utils/managers/llm_provider.py @@ -50,9 +50,7 @@ def create( ) llm_response.raise_for_status() response_data = llm_response.json() - import json - print(json.dumps(response_data, indent=4)) result_llm = DATestLLMProvider( id=response_data["id"], name=response_data["name"], diff --git a/backend/tests/integration/common_utils/managers/tenant.py b/backend/tests/integration/common_utils/managers/tenant.py new file mode 100644 index 00000000000..76fd16471f8 --- /dev/null +++ b/backend/tests/integration/common_utils/managers/tenant.py @@ -0,0 +1,82 @@ +from datetime import datetime +from datetime import timedelta + +import jwt +import requests + +from danswer.server.manage.models import AllUsersResponse +from danswer.server.models import FullUserSnapshot +from danswer.server.models import InvitedUserSnapshot +from tests.integration.common_utils.constants import API_SERVER_URL +from tests.integration.common_utils.constants import GENERAL_HEADERS +from tests.integration.common_utils.test_models import DATestUser + + +def generate_auth_token() -> str: + payload = { + "iss": "control_plane", + "exp": datetime.utcnow() + timedelta(minutes=5), + "iat": datetime.utcnow(), + "scope": "tenant:create", + } + token = jwt.encode(payload, "", algorithm="HS256") + return token + + +class TenantManager: + @staticmethod + def create( + tenant_id: str | None = None, + initial_admin_email: str | None = None, + ) -> dict[str, str]: + body = { + "tenant_id": tenant_id, + "initial_admin_email": initial_admin_email, + } + + token = generate_auth_token() + headers = { + "Authorization": f"Bearer {token}", + "X-API-KEY": "", + "Content-Type": "application/json", + } + + response = requests.post( + url=f"{API_SERVER_URL}/tenants/create", + json=body, + headers=headers, + ) + + response.raise_for_status() + + return response.json() + + @staticmethod + def get_all_users( + user_performing_action: DATestUser | None = None, + ) -> AllUsersResponse: + response = requests.get( + url=f"{API_SERVER_URL}/manage/users", + headers=user_performing_action.headers + if user_performing_action + else GENERAL_HEADERS, + ) + response.raise_for_status() + + data = response.json() + return AllUsersResponse( + accepted=[FullUserSnapshot(**user) for user in data["accepted"]], + invited=[InvitedUserSnapshot(**user) for user in data["invited"]], + accepted_pages=data["accepted_pages"], + invited_pages=data["invited_pages"], + ) + + @staticmethod + def verify_user_in_tenant( + user: DATestUser, user_performing_action: DATestUser | None = None + ) -> None: + all_users = TenantManager.get_all_users(user_performing_action) + for accepted_user in all_users.accepted: + if accepted_user.email == user.email and accepted_user.id == user.id: + return + raise ValueError(f"User {user.email} not found in tenant") diff --git a/backend/tests/integration/common_utils/managers/user.py b/backend/tests/integration/common_utils/managers/user.py index c299a5eb38a..2b9aa6e189d 100644 --- a/backend/tests/integration/common_utils/managers/user.py +++ b/backend/tests/integration/common_utils/managers/user.py @@ -17,11 +17,14 @@ class UserManager: @staticmethod def create( name: str | None = None, + email: str | None = None, ) -> DATestUser: if name is None: name = f"test{str(uuid4())}" - email = f"{name}@test.com" + if email is None: + email = f"{name}@test.com" + password = "test" body = { @@ -44,12 +47,10 @@ def create( ) print(f"Created user {test_user.email}") - test_user.headers["Cookie"] = UserManager.login_as_user(test_user) - - return test_user + return UserManager.login_as_user(test_user) @staticmethod - def login_as_user(test_user: DATestUser) -> str: + def login_as_user(test_user: DATestUser) -> DATestUser: data = urlencode( { "username": test_user.email, @@ -64,14 +65,20 @@ def login_as_user(test_user: DATestUser) -> str: data=data, headers=headers, ) + response.raise_for_status() - result_cookie = next(iter(response.cookies), None) - if not result_cookie: + cookies = response.cookies.get_dict() + session_cookie = cookies.get("fastapiusersauth") + + if not session_cookie: raise Exception("Failed to login") print(f"Logged in as {test_user.email}") - return f"{result_cookie.name}={result_cookie.value}" + + # Set cookies in the headers + test_user.headers["Cookie"] = f"fastapiusersauth={session_cookie}; " + return test_user @staticmethod def verify_role(user_to_verify: DATestUser, target_role: UserRole) -> bool: diff --git a/backend/tests/integration/common_utils/managers/user_group.py b/backend/tests/integration/common_utils/managers/user_group.py index baf2008b965..e8a26fa34a7 100644 --- a/backend/tests/integration/common_utils/managers/user_group.py +++ b/backend/tests/integration/common_utils/managers/user_group.py @@ -146,6 +146,7 @@ def wait_for_sync( if user_group.id in check_ids ] if all(ug.is_up_to_date for ug in user_groups): + print("User groups synced successfully.") return if time.time() - start > MAX_DELAY: diff --git a/backend/tests/integration/common_utils/reset.py b/backend/tests/integration/common_utils/reset.py index 95b3f734ed4..1792af9dbf9 100644 --- a/backend/tests/integration/common_utils/reset.py +++ b/backend/tests/integration/common_utils/reset.py @@ -1,5 +1,6 @@ import logging import time +from types import SimpleNamespace import psycopg2 import requests @@ -11,22 +12,28 @@ from danswer.configs.app_configs import POSTGRES_PORT from danswer.configs.app_configs import POSTGRES_USER from danswer.db.engine import build_connection_string +from danswer.db.engine import get_all_tenant_ids from danswer.db.engine import get_session_context_manager +from danswer.db.engine import get_session_with_tenant from danswer.db.engine import SYNC_DB_API from danswer.db.search_settings import get_current_search_settings from danswer.db.swap_index import check_index_swap from danswer.document_index.vespa.index import DOCUMENT_ID_ENDPOINT from danswer.document_index.vespa.index import VespaIndex from danswer.indexing.models import IndexingSetting -from danswer.main import setup_postgres -from danswer.main import setup_vespa +from danswer.setup import setup_postgres +from danswer.setup import setup_vespa from danswer.utils.logger import setup_logger logger = setup_logger() def _run_migrations( - database_url: str, direction: str = "upgrade", revision: str = "head" + database_url: str, + config_name: str, + direction: str = "upgrade", + revision: str = "head", + schema: str = "public", ) -> None: # hide info logs emitted during migration logging.getLogger("alembic").setLevel(logging.CRITICAL) @@ -35,6 +42,10 @@ def _run_migrations( alembic_cfg = Config("alembic.ini") alembic_cfg.set_section_option("logger_alembic", "level", "WARN") alembic_cfg.attributes["configure_logger"] = False + alembic_cfg.config_ini_section = config_name + + alembic_cfg.cmd_opts = SimpleNamespace() # type: ignore + alembic_cfg.cmd_opts.x = [f"schema={schema}"] # type: ignore # Set the SQLAlchemy URL in the Alembic configuration alembic_cfg.set_main_option("sqlalchemy.url", database_url) @@ -52,7 +63,9 @@ def _run_migrations( logging.getLogger("alembic").setLevel(logging.INFO) -def reset_postgres(database: str = "postgres") -> None: +def reset_postgres( + database: str = "postgres", config_name: str = "alembic", setup_danswer: bool = True +) -> None: """Reset the Postgres database.""" # NOTE: need to delete all rows to allow migrations to be rolled back @@ -111,14 +124,18 @@ def reset_postgres(database: str = "postgres") -> None: ) _run_migrations( conn_str, + config_name, direction="downgrade", revision="base", ) _run_migrations( conn_str, + config_name, direction="upgrade", revision="head", ) + if not setup_danswer: + return # do the same thing as we do on API server startup with get_session_context_manager() as db_session: @@ -127,6 +144,7 @@ def reset_postgres(database: str = "postgres") -> None: def reset_vespa() -> None: """Wipe all data from the Vespa index.""" + with get_session_context_manager() as db_session: # swap to the correct default model check_index_swap(db_session) @@ -166,10 +184,98 @@ def reset_vespa() -> None: time.sleep(5) +def reset_postgres_multitenant() -> None: + """Reset the Postgres database for all tenants in a multitenant setup.""" + + conn = psycopg2.connect( + dbname="postgres", + user=POSTGRES_USER, + password=POSTGRES_PASSWORD, + host=POSTGRES_HOST, + port=POSTGRES_PORT, + ) + conn.autocommit = True + cur = conn.cursor() + + # Get all tenant schemas + cur.execute( + """ + SELECT schema_name + FROM information_schema.schemata + WHERE schema_name LIKE 'tenant_%' + """ + ) + tenant_schemas = cur.fetchall() + + # Drop all tenant schemas + for schema in tenant_schemas: + schema_name = schema[0] + cur.execute(f'DROP SCHEMA "{schema_name}" CASCADE') + + cur.close() + conn.close() + + reset_postgres(config_name="schema_private", setup_danswer=False) + + +def reset_vespa_multitenant() -> None: + """Wipe all data from the Vespa index for all tenants.""" + + for tenant_id in get_all_tenant_ids(): + with get_session_with_tenant(tenant_id=tenant_id) as db_session: + # swap to the correct default model for each tenant + check_index_swap(db_session) + + search_settings = get_current_search_settings(db_session) + index_name = search_settings.index_name + + success = setup_vespa( + document_index=VespaIndex(index_name=index_name, secondary_index_name=None), + index_setting=IndexingSetting.from_db_model(search_settings), + secondary_index_setting=None, + ) + + if not success: + raise RuntimeError( + f"Could not connect to Vespa for tenant {tenant_id} within the specified timeout." + ) + + for _ in range(5): + try: + continuation = None + should_continue = True + while should_continue: + params = {"selection": "true", "cluster": "danswer_index"} + if continuation: + params = {**params, "continuation": continuation} + response = requests.delete( + DOCUMENT_ID_ENDPOINT.format(index_name=index_name), + params=params, + ) + response.raise_for_status() + + response_json = response.json() + + continuation = response_json.get("continuation") + should_continue = bool(continuation) + + break + except Exception as e: + print(f"Error deleting documents for tenant {tenant_id}: {e}") + time.sleep(5) + + def reset_all() -> None: - """Reset both Postgres and Vespa.""" logger.info("Resetting Postgres...") reset_postgres() logger.info("Resetting Vespa...") reset_vespa() + + +def reset_all_multitenant() -> None: + """Reset both Postgres and Vespa for all tenants.""" + logger.info("Resetting Postgres for all tenants...") + reset_postgres_multitenant() + logger.info("Resetting Vespa for all tenants...") + reset_vespa_multitenant() logger.info("Finished resetting all.") diff --git a/backend/tests/integration/common_utils/test_models.py b/backend/tests/integration/common_utils/test_models.py index ca573663e72..af7cd882a68 100644 --- a/backend/tests/integration/common_utils/test_models.py +++ b/backend/tests/integration/common_utils/test_models.py @@ -123,14 +123,14 @@ class DATestPersona(BaseModel): # class DATestChatSession(BaseModel): - id: int + id: UUID persona_id: int description: str class DATestChatMessage(BaseModel): id: str | None = None - chat_session_id: int + chat_session_id: UUID parent_message_id: str | None message: str response: str diff --git a/backend/tests/integration/conftest.py b/backend/tests/integration/conftest.py index 77d9e0e7022..91e61966643 100644 --- a/backend/tests/integration/conftest.py +++ b/backend/tests/integration/conftest.py @@ -6,7 +6,10 @@ from danswer.db.engine import get_session_context_manager from danswer.db.search_settings import get_current_search_settings +from tests.integration.common_utils.managers.user import UserManager from tests.integration.common_utils.reset import reset_all +from tests.integration.common_utils.reset import reset_all_multitenant +from tests.integration.common_utils.test_models import DATestUser from tests.integration.common_utils.vespa import vespa_fixture @@ -44,3 +47,16 @@ def vespa_client(db_session: Session) -> vespa_fixture: @pytest.fixture def reset() -> None: reset_all() + + +@pytest.fixture +def new_admin_user(reset: None) -> DATestUser | None: + try: + return UserManager.create(name="admin_user") + except Exception: + return None + + +@pytest.fixture +def reset_multitenant() -> None: + reset_all_multitenant() diff --git a/backend/tests/integration/connector_job_tests/slack/conftest.py b/backend/tests/integration/connector_job_tests/slack/conftest.py new file mode 100644 index 00000000000..03d99737ce7 --- /dev/null +++ b/backend/tests/integration/connector_job_tests/slack/conftest.py @@ -0,0 +1,28 @@ +import os +from collections.abc import Generator +from typing import Any + +import pytest + +from tests.integration.connector_job_tests.slack.slack_api_utils import SlackManager + + +@pytest.fixture() +def slack_test_setup() -> Generator[tuple[dict[str, Any], dict[str, Any]], None, None]: + slack_client = SlackManager.get_slack_client(os.environ["SLACK_BOT_TOKEN"]) + admin_user_id = SlackManager.build_slack_user_email_id_map(slack_client)[ + "admin@onyx-test.com" + ] + + ( + public_channel, + private_channel, + run_id, + ) = SlackManager.get_and_provision_available_slack_channels( + slack_client=slack_client, admin_user_id=admin_user_id + ) + + yield public_channel, private_channel + + # This part will always run after the test, even if it fails + SlackManager.cleanup_after_test(slack_client=slack_client, test_id=run_id) diff --git a/backend/tests/integration/connector_job_tests/slack/slack_api_utils.py b/backend/tests/integration/connector_job_tests/slack/slack_api_utils.py new file mode 100644 index 00000000000..f17c4211066 --- /dev/null +++ b/backend/tests/integration/connector_job_tests/slack/slack_api_utils.py @@ -0,0 +1,291 @@ +""" +Assumptions: +- The test users have already been created +- General is empty of messages +- In addition to the normal slack oauth permissions, the following scopes are needed: + - channels:manage + - groups:write + - chat:write + - chat:write.public +""" +from typing import Any +from uuid import uuid4 + +from slack_sdk import WebClient +from slack_sdk.errors import SlackApiError + +from danswer.connectors.slack.connector import default_msg_filter +from danswer.connectors.slack.connector import get_channel_messages +from danswer.connectors.slack.utils import make_paginated_slack_api_call_w_retries +from danswer.connectors.slack.utils import make_slack_api_call_w_retries + + +def _get_slack_channel_id(channel: dict[str, Any]) -> str: + if not (channel_id := channel.get("id")): + raise ValueError("Channel ID is missing") + return channel_id + + +def _get_non_general_channels( + slack_client: WebClient, + get_private: bool, + get_public: bool, + only_get_done: bool = False, +) -> list[dict[str, Any]]: + channel_types = [] + if get_private: + channel_types.append("private_channel") + if get_public: + channel_types.append("public_channel") + + conversations: list[dict[str, Any]] = [] + for result in make_paginated_slack_api_call_w_retries( + slack_client.conversations_list, + exclude_archived=False, + types=channel_types, + ): + conversations.extend(result["channels"]) + + filtered_conversations = [] + for conversation in conversations: + if conversation.get("is_general", False): + continue + if only_get_done and "done" not in conversation.get("name", ""): + continue + filtered_conversations.append(conversation) + return filtered_conversations + + +def _clear_slack_conversation_members( + slack_client: WebClient, + admin_user_id: str, + channel: dict[str, Any], +) -> None: + channel_id = _get_slack_channel_id(channel) + member_ids: list[str] = [] + for result in make_paginated_slack_api_call_w_retries( + slack_client.conversations_members, + channel=channel_id, + ): + member_ids.extend(result["members"]) + + for member_id in member_ids: + if member_id == admin_user_id: + continue + try: + slack_client.conversations_kick(channel=channel_id, user=member_id) + print(f"Kicked member: {member_id}") + except Exception as e: + if "cant_kick_self" in str(e): + continue + print(f"Error kicking member: {e}") + print(member_id) + try: + slack_client.conversations_unarchive(channel=channel_id) + channel["is_archived"] = False + except Exception: + # Channel is already unarchived + pass + + +def _add_slack_conversation_members( + slack_client: WebClient, channel: dict[str, Any], member_ids: list[str] +) -> None: + channel_id = _get_slack_channel_id(channel) + for user_id in member_ids: + try: + slack_client.conversations_invite(channel=channel_id, users=user_id) + except Exception as e: + if "already_in_channel" in str(e): + continue + print(f"Error inviting member: {e}") + print(user_id) + + +def _delete_slack_conversation_messages( + slack_client: WebClient, + channel: dict[str, Any], + message_to_delete: str | None = None, +) -> None: + """deletes all messages from a channel if message_to_delete is None""" + channel_id = _get_slack_channel_id(channel) + for message_batch in get_channel_messages(slack_client, channel): + for message in message_batch: + if default_msg_filter(message): + continue + + if message_to_delete and message.get("text") != message_to_delete: + continue + print(" removing message: ", message.get("text")) + + try: + if not (ts := message.get("ts")): + raise ValueError("Message timestamp is missing") + slack_client.chat_delete(channel=channel_id, ts=ts) + except Exception as e: + print(f"Error deleting message: {e}") + print(message) + + +def _build_slack_channel_from_name( + slack_client: WebClient, + admin_user_id: str, + suffix: str, + is_private: bool, + channel: dict[str, Any] | None, +) -> dict[str, Any]: + base = "public_channel" if not is_private else "private_channel" + channel_name = f"{base}-{suffix}" + if channel: + # If channel is provided, we rename it + channel_id = _get_slack_channel_id(channel) + channel_response = make_slack_api_call_w_retries( + slack_client.conversations_rename, + channel=channel_id, + name=channel_name, + ) + else: + # Otherwise, we create a new channel + channel_response = make_slack_api_call_w_retries( + slack_client.conversations_create, + name=channel_name, + is_private=is_private, + ) + + try: + slack_client.conversations_unarchive(channel=channel_response["channel"]["id"]) + except Exception: + # Channel is already unarchived + pass + try: + slack_client.conversations_invite( + channel=channel_response["channel"]["id"], + users=[admin_user_id], + ) + except Exception: + pass + + final_channel = channel_response["channel"] if channel_response else {} + return final_channel + + +class SlackManager: + @staticmethod + def get_slack_client(token: str) -> WebClient: + return WebClient(token=token) + + @staticmethod + def get_and_provision_available_slack_channels( + slack_client: WebClient, admin_user_id: str + ) -> tuple[dict[str, Any], dict[str, Any], str]: + run_id = str(uuid4()) + public_channels = _get_non_general_channels( + slack_client, get_private=False, get_public=True, only_get_done=True + ) + + first_available_channel = ( + None if len(public_channels) < 1 else public_channels[0] + ) + public_channel = _build_slack_channel_from_name( + slack_client=slack_client, + admin_user_id=admin_user_id, + suffix=run_id, + is_private=False, + channel=first_available_channel, + ) + _delete_slack_conversation_messages( + slack_client=slack_client, channel=public_channel + ) + + private_channels = _get_non_general_channels( + slack_client, get_private=True, get_public=False, only_get_done=True + ) + second_available_channel = ( + None if len(private_channels) < 1 else private_channels[0] + ) + private_channel = _build_slack_channel_from_name( + slack_client=slack_client, + admin_user_id=admin_user_id, + suffix=run_id, + is_private=True, + channel=second_available_channel, + ) + _delete_slack_conversation_messages( + slack_client=slack_client, channel=private_channel + ) + + return public_channel, private_channel, run_id + + @staticmethod + def build_slack_user_email_id_map(slack_client: WebClient) -> dict[str, str]: + users_results = make_slack_api_call_w_retries( + slack_client.users_list, + ) + users: list[dict[str, Any]] = users_results.get("members", []) + user_email_id_map = {} + for user in users: + if not (email := user.get("profile", {}).get("email")): + continue + if not (user_id := user.get("id")): + raise ValueError("User ID is missing") + user_email_id_map[email] = user_id + return user_email_id_map + + @staticmethod + def set_channel_members( + slack_client: WebClient, + admin_user_id: str, + channel: dict[str, Any], + user_ids: list[str], + ) -> None: + _clear_slack_conversation_members( + slack_client=slack_client, + channel=channel, + admin_user_id=admin_user_id, + ) + _add_slack_conversation_members( + slack_client=slack_client, channel=channel, member_ids=user_ids + ) + + @staticmethod + def add_message_to_channel( + slack_client: WebClient, channel: dict[str, Any], message: str + ) -> None: + channel_id = _get_slack_channel_id(channel) + make_slack_api_call_w_retries( + slack_client.chat_postMessage, + channel=channel_id, + text=message, + ) + + @staticmethod + def remove_message_from_channel( + slack_client: WebClient, channel: dict[str, Any], message: str + ) -> None: + _delete_slack_conversation_messages( + slack_client=slack_client, channel=channel, message_to_delete=message + ) + + @staticmethod + def cleanup_after_test( + slack_client: WebClient, + test_id: str, + ) -> None: + channel_types = ["private_channel", "public_channel"] + channels: list[dict[str, Any]] = [] + for result in make_paginated_slack_api_call_w_retries( + slack_client.conversations_list, + exclude_archived=False, + types=channel_types, + ): + channels.extend(result["channels"]) + + for channel in channels: + if test_id not in channel.get("name", ""): + continue + # "done" in the channel name indicates that this channel is free to be used for a new test + new_name = f"done_{str(uuid4())}" + try: + slack_client.conversations_rename(channel=channel["id"], name=new_name) + except SlackApiError as e: + print(f"Error renaming channel {channel['id']}: {e}") diff --git a/backend/tests/integration/connector_job_tests/slack/test_permission_sync.py b/backend/tests/integration/connector_job_tests/slack/test_permission_sync.py new file mode 100644 index 00000000000..d64986ea826 --- /dev/null +++ b/backend/tests/integration/connector_job_tests/slack/test_permission_sync.py @@ -0,0 +1,205 @@ +import os +from datetime import datetime +from datetime import timezone +from typing import Any + +from danswer.connectors.models import InputType +from danswer.db.enums import AccessType +from danswer.server.documents.models import DocumentSource +from tests.integration.common_utils.managers.cc_pair import CCPairManager +from tests.integration.common_utils.managers.connector import ConnectorManager +from tests.integration.common_utils.managers.credential import CredentialManager +from tests.integration.common_utils.managers.document_search import ( + DocumentSearchManager, +) +from tests.integration.common_utils.managers.llm_provider import LLMProviderManager +from tests.integration.common_utils.managers.user import UserManager +from tests.integration.common_utils.test_models import DATestCCPair +from tests.integration.common_utils.test_models import DATestConnector +from tests.integration.common_utils.test_models import DATestCredential +from tests.integration.common_utils.test_models import DATestUser +from tests.integration.common_utils.vespa import vespa_fixture +from tests.integration.connector_job_tests.slack.slack_api_utils import SlackManager + + +# @pytest.mark.xfail(reason="flaky - see DAN-789 for example", strict=False) +def test_slack_permission_sync( + reset: None, + vespa_client: vespa_fixture, + slack_test_setup: tuple[dict[str, Any], dict[str, Any]], +) -> None: + public_channel, private_channel = slack_test_setup + + # Creating an admin user (first user created is automatically an admin) + admin_user: DATestUser = UserManager.create( + email="admin@onyx-test.com", + ) + + # Creating a non-admin user + test_user_1: DATestUser = UserManager.create( + email="test_user_1@onyx-test.com", + ) + + # Creating a non-admin user + test_user_2: DATestUser = UserManager.create( + email="test_user_2@onyx-test.com", + ) + + slack_client = SlackManager.get_slack_client(os.environ["SLACK_BOT_TOKEN"]) + email_id_map = SlackManager.build_slack_user_email_id_map(slack_client) + admin_user_id = email_id_map[admin_user.email] + + LLMProviderManager.create(user_performing_action=admin_user) + + before = datetime.now(timezone.utc) + credential: DATestCredential = CredentialManager.create( + source=DocumentSource.SLACK, + credential_json={ + "slack_bot_token": os.environ["SLACK_BOT_TOKEN"], + }, + user_performing_action=admin_user, + ) + connector: DATestConnector = ConnectorManager.create( + name="Slack", + input_type=InputType.POLL, + source=DocumentSource.SLACK, + connector_specific_config={ + "workspace": "onyx-test-workspace", + "channels": [public_channel["name"], private_channel["name"]], + }, + is_public=True, + groups=[], + user_performing_action=admin_user, + ) + cc_pair: DATestCCPair = CCPairManager.create( + credential_id=credential.id, + connector_id=connector.id, + access_type=AccessType.SYNC, + user_performing_action=admin_user, + ) + CCPairManager.wait_for_indexing( + cc_pair=cc_pair, + after=before, + user_performing_action=admin_user, + ) + + # Add test_user_1 and admin_user to the private channel + desired_channel_members = [admin_user, test_user_1] + SlackManager.set_channel_members( + slack_client=slack_client, + admin_user_id=admin_user_id, + channel=private_channel, + user_ids=[email_id_map[user.email] for user in desired_channel_members], + ) + + public_message = "Steve's favorite number is 809752" + private_message = "Sara's favorite number is 346794" + + # Add messages to channels + SlackManager.add_message_to_channel( + slack_client=slack_client, + channel=public_channel, + message=public_message, + ) + SlackManager.add_message_to_channel( + slack_client=slack_client, + channel=private_channel, + message=private_message, + ) + + # Run indexing + before = datetime.now(timezone.utc) + CCPairManager.run_once(cc_pair, admin_user) + CCPairManager.wait_for_indexing( + cc_pair=cc_pair, + after=before, + user_performing_action=admin_user, + ) + + # Run permission sync + before = datetime.now(timezone.utc) + CCPairManager.sync( + cc_pair=cc_pair, + user_performing_action=admin_user, + ) + CCPairManager.wait_for_sync( + cc_pair=cc_pair, + after=before, + user_performing_action=admin_user, + ) + + # Search as admin with access to both channels + danswer_doc_message_strings = DocumentSearchManager.search_documents( + query="favorite number", + user_performing_action=admin_user, + ) + + # Ensure admin user can see messages from both channels + assert public_message in danswer_doc_message_strings + assert private_message in danswer_doc_message_strings + + # Search as test_user_2 with access to only the public channel + danswer_doc_message_strings = DocumentSearchManager.search_documents( + query="favorite number", + user_performing_action=test_user_2, + ) + print( + "\ntop_documents content before removing from private channel for test_user_2: ", + danswer_doc_message_strings, + ) + + # Ensure test_user_2 can only see messages from the public channel + assert public_message in danswer_doc_message_strings + assert private_message not in danswer_doc_message_strings + + # Search as test_user_1 with access to both channels + danswer_doc_message_strings = DocumentSearchManager.search_documents( + query="favorite number", + user_performing_action=test_user_1, + ) + print( + "\ntop_documents content before removing from private channel for test_user_1: ", + danswer_doc_message_strings, + ) + + # Ensure test_user_1 can see messages from both channels + assert public_message in danswer_doc_message_strings + assert private_message in danswer_doc_message_strings + + # ----------------------MAKE THE CHANGES-------------------------- + print("\nRemoving test_user_1 from the private channel") + # Remove test_user_1 from the private channel + desired_channel_members = [admin_user] + SlackManager.set_channel_members( + slack_client=slack_client, + admin_user_id=admin_user_id, + channel=private_channel, + user_ids=[email_id_map[user.email] for user in desired_channel_members], + ) + + # Run permission sync + CCPairManager.sync( + cc_pair=cc_pair, + user_performing_action=admin_user, + ) + CCPairManager.wait_for_sync( + cc_pair=cc_pair, + after=before, + user_performing_action=admin_user, + ) + + # ----------------------------VERIFY THE CHANGES--------------------------- + # Ensure test_user_1 can no longer see messages from the private channel + # Search as test_user_1 with access to only the public channel + danswer_doc_message_strings = DocumentSearchManager.search_documents( + query="favorite number", + user_performing_action=test_user_1, + ) + print( + "\ntop_documents content after removing from private channel for test_user_1: ", + danswer_doc_message_strings, + ) + + # Ensure test_user_1 can only see messages from the public channel + assert public_message in danswer_doc_message_strings + assert private_message not in danswer_doc_message_strings diff --git a/backend/tests/integration/connector_job_tests/slack/test_prune.py b/backend/tests/integration/connector_job_tests/slack/test_prune.py new file mode 100644 index 00000000000..bcef148a2a0 --- /dev/null +++ b/backend/tests/integration/connector_job_tests/slack/test_prune.py @@ -0,0 +1,208 @@ +import os +from datetime import datetime +from datetime import timezone +from typing import Any + +from danswer.connectors.models import InputType +from danswer.db.enums import AccessType +from danswer.server.documents.models import DocumentSource +from tests.integration.common_utils.managers.cc_pair import CCPairManager +from tests.integration.common_utils.managers.connector import ConnectorManager +from tests.integration.common_utils.managers.credential import CredentialManager +from tests.integration.common_utils.managers.document_search import ( + DocumentSearchManager, +) +from tests.integration.common_utils.managers.llm_provider import LLMProviderManager +from tests.integration.common_utils.managers.user import UserManager +from tests.integration.common_utils.test_models import DATestCCPair +from tests.integration.common_utils.test_models import DATestConnector +from tests.integration.common_utils.test_models import DATestCredential +from tests.integration.common_utils.test_models import DATestUser +from tests.integration.common_utils.vespa import vespa_fixture +from tests.integration.connector_job_tests.slack.slack_api_utils import SlackManager + + +# @pytest.mark.xfail(reason="flaky - see DAN-835 for example", strict=False) +def test_slack_prune( + reset: None, + vespa_client: vespa_fixture, + slack_test_setup: tuple[dict[str, Any], dict[str, Any]], +) -> None: + public_channel, private_channel = slack_test_setup + + # Creating an admin user (first user created is automatically an admin) + admin_user: DATestUser = UserManager.create( + email="admin@onyx-test.com", + ) + + # Creating a non-admin user + test_user_1: DATestUser = UserManager.create( + email="test_user_1@onyx-test.com", + ) + + slack_client = SlackManager.get_slack_client(os.environ["SLACK_BOT_TOKEN"]) + email_id_map = SlackManager.build_slack_user_email_id_map(slack_client) + admin_user_id = email_id_map[admin_user.email] + + LLMProviderManager.create(user_performing_action=admin_user) + + before = datetime.now(timezone.utc) + credential: DATestCredential = CredentialManager.create( + source=DocumentSource.SLACK, + credential_json={ + "slack_bot_token": os.environ["SLACK_BOT_TOKEN"], + }, + user_performing_action=admin_user, + ) + connector: DATestConnector = ConnectorManager.create( + name="Slack", + input_type=InputType.POLL, + source=DocumentSource.SLACK, + connector_specific_config={ + "workspace": "onyx-test-workspace", + "channels": [public_channel["name"], private_channel["name"]], + }, + is_public=True, + groups=[], + user_performing_action=admin_user, + ) + cc_pair: DATestCCPair = CCPairManager.create( + credential_id=credential.id, + connector_id=connector.id, + access_type=AccessType.SYNC, + user_performing_action=admin_user, + ) + CCPairManager.wait_for_indexing( + cc_pair=cc_pair, + after=before, + user_performing_action=admin_user, + ) + + # ----------------------SETUP INITIAL SLACK STATE-------------------------- + # Add test_user_1 and admin_user to the private channel + desired_channel_members = [admin_user, test_user_1] + SlackManager.set_channel_members( + slack_client=slack_client, + admin_user_id=admin_user_id, + channel=private_channel, + user_ids=[email_id_map[user.email] for user in desired_channel_members], + ) + + public_message = "Steve's favorite number is 809752" + private_message = "Sara's favorite number is 346794" + message_to_delete = "Rebecca's favorite number is 753468" + + SlackManager.add_message_to_channel( + slack_client=slack_client, + channel=public_channel, + message=public_message, + ) + SlackManager.add_message_to_channel( + slack_client=slack_client, + channel=private_channel, + message=private_message, + ) + SlackManager.add_message_to_channel( + slack_client=slack_client, + channel=private_channel, + message=message_to_delete, + ) + + # Run indexing + before = datetime.now(timezone.utc) + CCPairManager.run_once(cc_pair, admin_user) + CCPairManager.wait_for_indexing( + cc_pair=cc_pair, + after=before, + user_performing_action=admin_user, + ) + + # Run permission sync + before = datetime.now(timezone.utc) + CCPairManager.sync( + cc_pair=cc_pair, + user_performing_action=admin_user, + ) + CCPairManager.wait_for_sync( + cc_pair=cc_pair, + after=before, + user_performing_action=admin_user, + ) + + # ----------------------TEST THE SETUP-------------------------- + # Search as admin with access to both channels + danswer_doc_message_strings = DocumentSearchManager.search_documents( + query="favorite number", + user_performing_action=admin_user, + ) + print( + "\ntop_documents content before deleting for admin: ", + danswer_doc_message_strings, + ) + + # Ensure admin user can see all messages + assert public_message in danswer_doc_message_strings + assert private_message in danswer_doc_message_strings + assert message_to_delete in danswer_doc_message_strings + + # Search as test_user_1 with access to both channels + danswer_doc_message_strings = DocumentSearchManager.search_documents( + query="favorite number", + user_performing_action=test_user_1, + ) + print( + "\ntop_documents content before deleting for test_user_1: ", + danswer_doc_message_strings, + ) + + # Ensure test_user_1 can see all messages + assert public_message in danswer_doc_message_strings + assert private_message in danswer_doc_message_strings + assert message_to_delete in danswer_doc_message_strings + + # ----------------------MAKE THE CHANGES-------------------------- + # Delete messages + print("\nDeleting message: ", message_to_delete) + SlackManager.remove_message_from_channel( + slack_client=slack_client, + channel=private_channel, + message=message_to_delete, + ) + + # Prune the cc_pair + now = datetime.now(timezone.utc) + CCPairManager.prune(cc_pair, user_performing_action=admin_user) + CCPairManager.wait_for_prune(cc_pair, now, user_performing_action=admin_user) + + # ----------------------------VERIFY THE CHANGES--------------------------- + # Ensure admin user can't see deleted messages + # Search as admin user with access to only the public channel + danswer_doc_message_strings = DocumentSearchManager.search_documents( + query="favorite number", + user_performing_action=admin_user, + ) + print( + "\ntop_documents content after deleting for admin: ", + danswer_doc_message_strings, + ) + + # Ensure admin can't see deleted messages + assert public_message in danswer_doc_message_strings + assert private_message in danswer_doc_message_strings + assert message_to_delete not in danswer_doc_message_strings + + # Ensure test_user_1 can't see deleted messages + # Search as test_user_1 with access to only the public channel + danswer_doc_message_strings = DocumentSearchManager.search_documents( + query="favorite number", + user_performing_action=test_user_1, + ) + print( + "\ntop_documents content after prune for test_user_1: ", + danswer_doc_message_strings, + ) + + # Ensure test_user_1 can't see deleted messages + assert public_message in danswer_doc_message_strings + assert private_message in danswer_doc_message_strings + assert message_to_delete not in danswer_doc_message_strings diff --git a/backend/tests/integration/multitenant_tests/cc_Pair b/backend/tests/integration/multitenant_tests/cc_Pair new file mode 100644 index 00000000000..e69de29bb2d diff --git a/backend/tests/integration/multitenant_tests/syncing/test_search_permissions.py b/backend/tests/integration/multitenant_tests/syncing/test_search_permissions.py new file mode 100644 index 00000000000..454b02412d4 --- /dev/null +++ b/backend/tests/integration/multitenant_tests/syncing/test_search_permissions.py @@ -0,0 +1,150 @@ +from danswer.db.models import UserRole +from tests.integration.common_utils.managers.api_key import APIKeyManager +from tests.integration.common_utils.managers.cc_pair import CCPairManager +from tests.integration.common_utils.managers.chat import ChatSessionManager +from tests.integration.common_utils.managers.document import DocumentManager +from tests.integration.common_utils.managers.llm_provider import LLMProviderManager +from tests.integration.common_utils.managers.tenant import TenantManager +from tests.integration.common_utils.managers.user import UserManager +from tests.integration.common_utils.test_models import DATestAPIKey +from tests.integration.common_utils.test_models import DATestCCPair +from tests.integration.common_utils.test_models import DATestChatSession +from tests.integration.common_utils.test_models import DATestUser + + +def test_multi_tenant_access_control(reset_multitenant: None) -> None: + # Create Tenant 1 and its Admin User + TenantManager.create("tenant_dev1", "test1@test.com") + test_user1: DATestUser = UserManager.create(name="test1", email="test1@test.com") + assert UserManager.verify_role(test_user1, UserRole.ADMIN) + + # Create Tenant 2 and its Admin User + TenantManager.create("tenant_dev2", "test2@test.com") + test_user2: DATestUser = UserManager.create(name="test2", email="test2@test.com") + assert UserManager.verify_role(test_user2, UserRole.ADMIN) + + # Create connectors for Tenant 1 + cc_pair_1: DATestCCPair = CCPairManager.create_from_scratch( + user_performing_action=test_user1, + ) + api_key_1: DATestAPIKey = APIKeyManager.create( + user_performing_action=test_user1, + ) + api_key_1.headers.update(test_user1.headers) + LLMProviderManager.create(user_performing_action=test_user1) + + # Seed documents for Tenant 1 + cc_pair_1.documents = [] + doc1_tenant1 = DocumentManager.seed_doc_with_content( + cc_pair=cc_pair_1, + content="Tenant 1 Document Content", + api_key=api_key_1, + ) + doc2_tenant1 = DocumentManager.seed_doc_with_content( + cc_pair=cc_pair_1, + content="Tenant 1 Document Content", + api_key=api_key_1, + ) + cc_pair_1.documents.extend([doc1_tenant1, doc2_tenant1]) + + # Create connectors for Tenant 2 + cc_pair_2: DATestCCPair = CCPairManager.create_from_scratch( + user_performing_action=test_user2, + ) + api_key_2: DATestAPIKey = APIKeyManager.create( + user_performing_action=test_user2, + ) + api_key_2.headers.update(test_user2.headers) + LLMProviderManager.create(user_performing_action=test_user2) + + # Seed documents for Tenant 2 + cc_pair_2.documents = [] + doc1_tenant2 = DocumentManager.seed_doc_with_content( + cc_pair=cc_pair_2, + content="Tenant 2 Document Content", + api_key=api_key_2, + ) + doc2_tenant2 = DocumentManager.seed_doc_with_content( + cc_pair=cc_pair_2, + content="Tenant 2 Document Content", + api_key=api_key_2, + ) + cc_pair_2.documents.extend([doc1_tenant2, doc2_tenant2]) + + tenant1_doc_ids = {doc1_tenant1.id, doc2_tenant1.id} + tenant2_doc_ids = {doc1_tenant2.id, doc2_tenant2.id} + + # Create chat sessions for each user + chat_session1: DATestChatSession = ChatSessionManager.create( + user_performing_action=test_user1 + ) + chat_session2: DATestChatSession = ChatSessionManager.create( + user_performing_action=test_user2 + ) + + # User 1 sends a message and gets a response + response1 = ChatSessionManager.send_message( + chat_session_id=chat_session1.id, + message="What is in Tenant 1's documents?", + user_performing_action=test_user1, + ) + # Assert that the search tool was used + assert response1.tool_name == "run_search" + + response_doc_ids = {doc["document_id"] for doc in response1.tool_result or []} + assert tenant1_doc_ids.issubset( + response_doc_ids + ), "Not all Tenant 1 document IDs are in the response" + assert not response_doc_ids.intersection( + tenant2_doc_ids + ), "Tenant 2 document IDs should not be in the response" + + # Assert that the contents are correct + for doc in response1.tool_result or []: + assert doc["content"] == "Tenant 1 Document Content" + + # User 2 sends a message and gets a response + response2 = ChatSessionManager.send_message( + chat_session_id=chat_session2.id, + message="What is in Tenant 2's documents?", + user_performing_action=test_user2, + ) + # Assert that the search tool was used + assert response2.tool_name == "run_search" + # Assert that the tool_result contains Tenant 2's documents + response_doc_ids = {doc["document_id"] for doc in response2.tool_result or []} + assert tenant2_doc_ids.issubset( + response_doc_ids + ), "Not all Tenant 2 document IDs are in the response" + assert not response_doc_ids.intersection( + tenant1_doc_ids + ), "Tenant 1 document IDs should not be in the response" + # Assert that the contents are correct + for doc in response2.tool_result or []: + assert doc["content"] == "Tenant 2 Document Content" + + # User 1 tries to access Tenant 2's documents + response_cross = ChatSessionManager.send_message( + chat_session_id=chat_session1.id, + message="What is in Tenant 2's documents?", + user_performing_action=test_user1, + ) + # Assert that the search tool was used + assert response_cross.tool_name == "run_search" + # Assert that the tool_result is empty or does not contain Tenant 2's documents + response_doc_ids = {doc["document_id"] for doc in response_cross.tool_result or []} + # Ensure none of Tenant 2's document IDs are in the response + assert not response_doc_ids.intersection(tenant2_doc_ids) + + # User 2 tries to access Tenant 1's documents + response_cross2 = ChatSessionManager.send_message( + chat_session_id=chat_session2.id, + message="What is in Tenant 1's documents?", + user_performing_action=test_user2, + ) + # Assert that the search tool was used + assert response_cross2.tool_name == "run_search" + # Assert that the tool_result is empty or does not contain Tenant 1's documents + response_doc_ids = {doc["document_id"] for doc in response_cross2.tool_result or []} + # Ensure none of Tenant 1's document IDs are in the response + assert not response_doc_ids.intersection(tenant1_doc_ids) diff --git a/backend/tests/integration/multitenant_tests/tenants/test_tenant_creation.py b/backend/tests/integration/multitenant_tests/tenants/test_tenant_creation.py new file mode 100644 index 00000000000..6088743e317 --- /dev/null +++ b/backend/tests/integration/multitenant_tests/tenants/test_tenant_creation.py @@ -0,0 +1,41 @@ +from danswer.configs.constants import DocumentSource +from danswer.db.enums import AccessType +from danswer.db.models import UserRole +from tests.integration.common_utils.managers.cc_pair import CCPairManager +from tests.integration.common_utils.managers.connector import ConnectorManager +from tests.integration.common_utils.managers.credential import CredentialManager +from tests.integration.common_utils.managers.tenant import TenantManager +from tests.integration.common_utils.managers.user import UserManager +from tests.integration.common_utils.test_models import DATestUser + + +# Test flow from creating tenant to registering as a user +def test_tenant_creation(reset_multitenant: None) -> None: + TenantManager.create("tenant_dev", "test@test.com") + test_user: DATestUser = UserManager.create(name="test", email="test@test.com") + + assert UserManager.verify_role(test_user, UserRole.ADMIN) + + test_credential = CredentialManager.create( + name="admin_test_credential", + source=DocumentSource.FILE, + curator_public=False, + user_performing_action=test_user, + ) + + test_connector = ConnectorManager.create( + name="admin_test_connector", + source=DocumentSource.FILE, + is_public=False, + user_performing_action=test_user, + ) + + test_cc_pair = CCPairManager.create( + connector_id=test_connector.id, + credential_id=test_credential.id, + name="admin_test_cc_pair", + access_type=AccessType.PRIVATE, + user_performing_action=test_user, + ) + + CCPairManager.verify(cc_pair=test_cc_pair, user_performing_action=test_user) diff --git a/backend/tests/integration/tests/connector/test_connector_deletion.py b/backend/tests/integration/tests/connector/test_connector_deletion.py index 46a65f768a9..676ee4d9f4b 100644 --- a/backend/tests/integration/tests/connector/test_connector_deletion.py +++ b/backend/tests/integration/tests/connector/test_connector_deletion.py @@ -11,6 +11,7 @@ from danswer.db.engine import get_sqlalchemy_engine from danswer.db.enums import IndexingStatus +from danswer.db.index_attempt import create_index_attempt from danswer.db.index_attempt import create_index_attempt_error from danswer.db.models import IndexAttempt from danswer.db.search_settings import get_current_search_settings @@ -117,6 +118,22 @@ def test_connector_deletion(reset: None, vespa_client: vespa_fixture) -> None: user_performing_action=admin_user, ) + # inject an index attempt and index attempt error (exercises foreign key errors) + with Session(get_sqlalchemy_engine()) as db_session: + attempt_id = create_index_attempt( + connector_credential_pair_id=cc_pair_1.id, + search_settings_id=1, + db_session=db_session, + ) + create_index_attempt_error( + index_attempt_id=attempt_id, + batch=1, + docs=[], + exception_msg="", + exception_traceback="", + db_session=db_session, + ) + # Update local records to match the database for later comparison user_group_1.cc_pair_ids = [] user_group_2.cc_pair_ids = [cc_pair_2.id] @@ -125,7 +142,9 @@ def test_connector_deletion(reset: None, vespa_client: vespa_fixture) -> None: cc_pair_1.groups = [] cc_pair_2.groups = [user_group_2.id] - CCPairManager.wait_for_deletion_completion(user_performing_action=admin_user) + CCPairManager.wait_for_deletion_completion( + cc_pair_id=cc_pair_1.id, user_performing_action=admin_user + ) # validate vespa documents DocumentManager.verify( @@ -303,7 +322,9 @@ def test_connector_deletion_for_overlapping_connectors( ) # wait for deletion to finish - CCPairManager.wait_for_deletion_completion(user_performing_action=admin_user) + CCPairManager.wait_for_deletion_completion( + cc_pair_id=cc_pair_1.id, user_performing_action=admin_user + ) print("Connector 1 deleted") diff --git a/backend/tests/integration/tests/dev_apis/test_knowledge_chat.py b/backend/tests/integration/tests/dev_apis/test_knowledge_chat.py index 2cf6fd399ea..475085c6777 100644 --- a/backend/tests/integration/tests/dev_apis/test_knowledge_chat.py +++ b/backend/tests/integration/tests/dev_apis/test_knowledge_chat.py @@ -71,8 +71,8 @@ def test_all_stream_chat_message_objects_outputs(reset: None) -> None: answer_1 = response_json["answer"] assert "blue" in answer_1.lower() - # check that the llm selected a document - assert 0 in response_json["llm_selected_doc_indices"] + # FLAKY - check that the llm selected a document + # assert 0 in response_json["llm_selected_doc_indices"] # check that the final context documents are correct # (it should contain all documents because there arent enough to exclude any) @@ -80,11 +80,12 @@ def test_all_stream_chat_message_objects_outputs(reset: None) -> None: assert 1 in response_json["final_context_doc_indices"] assert 2 in response_json["final_context_doc_indices"] - # check that the cited documents are correct - assert cc_pair_1.documents[0].id in response_json["cited_documents"].values() + # FLAKY - check that the cited documents are correct + # assert cc_pair_1.documents[0].id in response_json["cited_documents"].values() - # check that the top documents are correct - assert response_json["top_documents"][0]["document_id"] == cc_pair_1.documents[0].id + # flakiness likely due to non-deterministic rephrasing + # FLAKY - check that the top documents are correct + # assert response_json["top_documents"][0]["document_id"] == cc_pair_1.documents[0].id print("response 1/3 passed") # TESTING RESPONSE FOR QUESTION 2 @@ -117,8 +118,8 @@ def test_all_stream_chat_message_objects_outputs(reset: None) -> None: answer_2 = response_json["answer"] assert "red" in answer_2.lower() - # check that the llm selected a document - assert 0 in response_json["llm_selected_doc_indices"] + # FLAKY - check that the llm selected a document + # assert 0 in response_json["llm_selected_doc_indices"] # check that the final context documents are correct # (it should contain all documents because there arent enough to exclude any) @@ -126,11 +127,12 @@ def test_all_stream_chat_message_objects_outputs(reset: None) -> None: assert 1 in response_json["final_context_doc_indices"] assert 2 in response_json["final_context_doc_indices"] - # check that the cited documents are correct - assert cc_pair_1.documents[1].id in response_json["cited_documents"].values() + # FLAKY - check that the cited documents are correct + # assert cc_pair_1.documents[1].id in response_json["cited_documents"].values() - # check that the top documents are correct - assert response_json["top_documents"][0]["document_id"] == cc_pair_1.documents[1].id + # flakiness likely due to non-deterministic rephrasing + # FLAKY - check that the top documents are correct + # assert response_json["top_documents"][0]["document_id"] == cc_pair_1.documents[1].id print("response 2/3 passed") # TESTING RESPONSE FOR QUESTION 3 @@ -171,8 +173,8 @@ def test_all_stream_chat_message_objects_outputs(reset: None) -> None: answer_3 = response_json["answer"] assert "green" in answer_3.lower() - # check that the llm selected a document - assert 0 in response_json["llm_selected_doc_indices"] + # FLAKY - check that the llm selected a document + # assert 0 in response_json["llm_selected_doc_indices"] # check that the final context documents are correct # (it should contain all documents because there arent enough to exclude any) @@ -180,9 +182,10 @@ def test_all_stream_chat_message_objects_outputs(reset: None) -> None: assert 1 in response_json["final_context_doc_indices"] assert 2 in response_json["final_context_doc_indices"] - # check that the cited documents are correct - assert cc_pair_1.documents[2].id in response_json["cited_documents"].values() + # FLAKY - check that the cited documents are correct + # assert cc_pair_1.documents[2].id in response_json["cited_documents"].values() - # check that the top documents are correct - assert response_json["top_documents"][0]["document_id"] == cc_pair_1.documents[2].id + # flakiness likely due to non-deterministic rephrasing + # FLAKY - check that the top documents are correct + # assert response_json["top_documents"][0]["document_id"] == cc_pair_1.documents[2].id print("response 3/3 passed") diff --git a/backend/tests/integration/tests/dev_apis/test_simple_chat_api.py b/backend/tests/integration/tests/dev_apis/test_simple_chat_api.py index 0a4e7b40b57..0ed40c758d0 100644 --- a/backend/tests/integration/tests/dev_apis/test_simple_chat_api.py +++ b/backend/tests/integration/tests/dev_apis/test_simple_chat_api.py @@ -1,7 +1,10 @@ +import json + import requests from danswer.configs.constants import MessageType from tests.integration.common_utils.constants import API_SERVER_URL +from tests.integration.common_utils.constants import GENERAL_HEADERS from tests.integration.common_utils.constants import NUM_DOCS from tests.integration.common_utils.managers.api_key import APIKeyManager from tests.integration.common_utils.managers.cc_pair import CCPairManager @@ -116,6 +119,7 @@ def test_using_reference_docs_with_simple_with_history_api_flow(reset: None) -> ) assert response.status_code == 200 response_json = response.json() + # get the db_doc_id of the top document to use as a search doc id for second message first_db_doc_id = response_json["top_documents"][0]["db_doc_id"] @@ -138,6 +142,9 @@ def test_using_reference_docs_with_simple_with_history_api_flow(reset: None) -> assert response.status_code == 200 response_json = response.json() + # make sure there is an answer + assert response_json["answer"] + # since we only gave it one search doc, all responses should only contain that doc assert response_json["final_context_doc_indices"] == [0] assert response_json["llm_selected_doc_indices"] == [0] @@ -145,3 +152,85 @@ def test_using_reference_docs_with_simple_with_history_api_flow(reset: None) -> # This ensures the the document we think we are referencing when we send the search_doc_ids in the second # message is the document that we expect it to be assert response_json["top_documents"][0]["document_id"] == cc_pair_1.documents[2].id + + +def test_send_message_simple_with_history_strict_json( + new_admin_user: DATestUser | None, +) -> None: + # create connectors + LLMProviderManager.create(user_performing_action=new_admin_user) + + response = requests.post( + f"{API_SERVER_URL}/chat/send-message-simple-with-history", + json={ + # intentionally not relevant prompt to ensure that the + # structured response format is actually used + "messages": [ + { + "message": "What is green?", + "role": MessageType.USER.value, + } + ], + "persona_id": 0, + "prompt_id": 0, + "structured_response_format": { + "type": "json_schema", + "json_schema": { + "name": "presidents", + "schema": { + "type": "object", + "properties": { + "presidents": { + "type": "array", + "items": {"type": "string"}, + "description": "List of the first three US presidents", + } + }, + "required": ["presidents"], + "additionalProperties": False, + }, + "strict": True, + }, + }, + }, + headers=new_admin_user.headers if new_admin_user else GENERAL_HEADERS, + ) + assert response.status_code == 200 + + response_json = response.json() + + # Check that the answer is present + assert "answer" in response_json + assert response_json["answer"] is not None + + # helper + def clean_json_string(json_string: str) -> str: + return json_string.strip().removeprefix("```json").removesuffix("```").strip() + + # Attempt to parse the answer as JSON + try: + clean_answer = clean_json_string(response_json["answer"]) + parsed_answer = json.loads(clean_answer) + + # NOTE: do not check content, just the structure + assert isinstance(parsed_answer, dict) + assert "presidents" in parsed_answer + assert isinstance(parsed_answer["presidents"], list) + for president in parsed_answer["presidents"]: + assert isinstance(president, str) + except json.JSONDecodeError: + assert ( + False + ), f"The answer is not a valid JSON object - '{response_json['answer']}'" + + # Check that the answer_citationless is also valid JSON + assert "answer_citationless" in response_json + assert response_json["answer_citationless"] is not None + try: + clean_answer_citationless = clean_json_string( + response_json["answer_citationless"] + ) + parsed_answer_citationless = json.loads(clean_answer_citationless) + assert isinstance(parsed_answer_citationless, dict) + except json.JSONDecodeError: + assert False, "The answer_citationless is not a valid JSON object" diff --git a/backend/tests/integration/tests/permissions/test_cc_pair_permissions.py b/backend/tests/integration/tests/permissions/test_cc_pair_permissions.py index 5fba8ff64fc..001daacd0c2 100644 --- a/backend/tests/integration/tests/permissions/test_cc_pair_permissions.py +++ b/backend/tests/integration/tests/permissions/test_cc_pair_permissions.py @@ -171,7 +171,9 @@ def test_cc_pair_permissions(reset: None) -> None: # Test deleting the cc pair CCPairManager.delete(valid_cc_pair, user_performing_action=curator) - CCPairManager.wait_for_deletion_completion(user_performing_action=curator) + CCPairManager.wait_for_deletion_completion( + cc_pair_id=valid_cc_pair.id, user_performing_action=curator + ) CCPairManager.verify( cc_pair=valid_cc_pair, diff --git a/backend/tests/integration/tests/permissions/test_whole_curator_flow.py b/backend/tests/integration/tests/permissions/test_whole_curator_flow.py index 1ce9052c108..751f41413d4 100644 --- a/backend/tests/integration/tests/permissions/test_whole_curator_flow.py +++ b/backend/tests/integration/tests/permissions/test_whole_curator_flow.py @@ -77,7 +77,9 @@ def test_whole_curator_flow(reset: None) -> None: # Verify that the curator can delete the CC pair CCPairManager.delete(cc_pair=test_cc_pair, user_performing_action=curator) - CCPairManager.wait_for_deletion_completion(user_performing_action=curator) + CCPairManager.wait_for_deletion_completion( + cc_pair_id=test_cc_pair.id, user_performing_action=curator + ) # Verify that the CC pair has been deleted CCPairManager.verify( @@ -158,7 +160,9 @@ def test_global_curator_flow(reset: None) -> None: # Verify that the curator can delete the CC pair CCPairManager.delete(cc_pair=test_cc_pair, user_performing_action=global_curator) - CCPairManager.wait_for_deletion_completion(user_performing_action=global_curator) + CCPairManager.wait_for_deletion_completion( + cc_pair_id=test_cc_pair.id, user_performing_action=global_curator + ) # Verify that the CC pair has been deleted CCPairManager.verify( diff --git a/backend/tests/integration/tests/pruning/test_pruning.py b/backend/tests/integration/tests/pruning/test_pruning.py index 084ad80b357..cd8a7bde4d0 100644 --- a/backend/tests/integration/tests/pruning/test_pruning.py +++ b/backend/tests/integration/tests/pruning/test_pruning.py @@ -10,6 +10,10 @@ from time import sleep from typing import Any +import uvicorn +from fastapi import FastAPI +from fastapi.staticfiles import StaticFiles + from danswer.server.documents.models import DocumentSource from danswer.utils.logger import setup_logger from tests.integration.common_utils.managers.api_key import APIKeyManager @@ -21,10 +25,50 @@ logger = setup_logger() +# FastAPI server for serving files +def create_fastapi_app(directory: str) -> FastAPI: + app = FastAPI() + + # Mount the directory to serve static files + app.mount("/", StaticFiles(directory=directory, html=True), name="static") + + return app + + +# as far as we know, this doesn't hang when crawled. This is good. +@contextmanager +def fastapi_server_context( + directory: str, port: int = 8000 +) -> Generator[None, None, None]: + app = create_fastapi_app(directory) + + config = uvicorn.Config(app=app, host="0.0.0.0", port=port, log_level="info") + server = uvicorn.Server(config) + + # Create a thread to run the FastAPI server + server_thread = threading.Thread(target=server.run) + server_thread.daemon = ( + True # Ensures the thread will exit when the main program exits + ) + + try: + # Start the server in the background + server_thread.start() + sleep(5) # Give it a few seconds to start + yield # Yield control back to the calling function (context manager in use) + finally: + # Shutdown the server + server.should_exit = True + server_thread.join() + + +# Leaving this here for posterity and experimentation, but the reason we're +# not using this is python's web servers hang frequently when crawled +# this is obviously not good for a unit test @contextmanager def http_server_context( directory: str, port: int = 8000 -) -> Generator[http.server.HTTPServer, None, None]: +) -> Generator[http.server.ThreadingHTTPServer, None, None]: # Create a handler that serves files from the specified directory def handler_class( *args: Any, **kwargs: Any @@ -34,7 +78,7 @@ def handler_class( ) # Create an HTTPServer instance - httpd = http.server.HTTPServer(("0.0.0.0", port), handler_class) + httpd = http.server.ThreadingHTTPServer(("0.0.0.0", port), handler_class) # Define a thread that runs the server in the background server_thread = threading.Thread(target=httpd.serve_forever) @@ -45,6 +89,7 @@ def handler_class( try: # Start the server in the background server_thread.start() + sleep(5) # give it a few seconds to start yield httpd finally: # Shutdown the server and wait for the thread to finish @@ -65,12 +110,12 @@ def test_web_pruning(reset: None, vespa_client: vespa_fixture) -> None: test_filename = os.path.realpath(__file__) test_directory = os.path.dirname(test_filename) with tempfile.TemporaryDirectory() as temp_dir: - port = 8888 + port = 8889 website_src = os.path.join(test_directory, "website") website_tgt = os.path.join(temp_dir, "website") shutil.copytree(website_src, website_tgt) - with http_server_context(os.path.join(temp_dir, "website"), port): + with fastapi_server_context(os.path.join(temp_dir, "website"), port): sleep(1) # sleep a tiny bit before starting everything hostname = os.getenv("TEST_WEB_HOSTNAME", "localhost") @@ -105,9 +150,7 @@ def test_web_pruning(reset: None, vespa_client: vespa_fixture) -> None: logger.info("Removing courses.html.") os.remove(os.path.join(website_tgt, "courses.html")) - # store the time again as a reference for the pruning timestamps now = datetime.now(timezone.utc) - CCPairManager.prune(cc_pair_1, user_performing_action=admin_user) CCPairManager.wait_for_prune( cc_pair_1, now, timeout=60, user_performing_action=admin_user diff --git a/backend/tests/integration/tests/pruning/website/css/flexslider.css b/backend/tests/integration/tests/pruning/website/css/flexslider.css deleted file mode 100644 index 6088235631c..00000000000 --- a/backend/tests/integration/tests/pruning/website/css/flexslider.css +++ /dev/null @@ -1,226 +0,0 @@ -/* - * jQuery FlexSlider v2.0 - * http://www.woothemes.com/flexslider/ - * - * Copyright 2012 WooThemes - * Free to use under the GPLv2 license. - * http://www.gnu.org/licenses/gpl-2.0.html - * - * Contributing author: Tyler Smith (@mbmufffin) - */ - -/* Browser Resets */ -.flex-container a:active, -.flexslider a:active, -.flex-container a:focus, -.flexslider a:focus { - outline: none; -} -.slides, -.flex-control-nav, -.flex-direction-nav { - margin: 0; - padding: 0; - list-style: none; -} - -/* FlexSlider Necessary Styles -*********************************/ -.flexslider { - margin: 0; - padding: 0; -} -.flexslider .slides > li { - display: none; - -webkit-backface-visibility: hidden; -} /* Hide the slides before the JS is loaded. Avoids image jumping */ -.flexslider .slides img { - width: 100%; - display: block; -} -.flex-pauseplay span { - text-transform: capitalize; -} - -/* Clearfix for the .slides element */ -.slides:after { - content: "."; - display: block; - clear: both; - visibility: hidden; - line-height: 0; - height: 0; -} -html[xmlns] .slides { - display: block; -} -* html .slides { - height: 1%; -} - -/* No JavaScript Fallback */ -/* If you are not using another script, such as Modernizr, make sure you - * include js that eliminates this class on page load */ -.no-js .slides > li:first-child { - display: block; -} - -/* FlexSlider Default Theme -*********************************/ -.flexslider { - background: none; - position: relative; - zoom: 1; -} -.flex-viewport { - max-height: 2000px; - -webkit-transition: all 1s ease; - -moz-transition: all 1s ease; - transition: all 1s ease; -} -.loading .flex-viewport { - max-height: 300px; -} -.flexslider .slides { - zoom: 1; -} - -.carousel li { - margin-right: 5px; -} - -/* Caption style */ - -.flex-caption { - background: rgba(0, 0, 0, 0.8); - margin-left: 5px; - bottom: 5px; - position: absolute; - padding: 20px; - z-index: 99; -} -.flex-caption p { - font-size: 14px !important; - line-height: 22px; - font-weight: 300; - color: #fff; -} -.flex-caption h2, -.flex-caption h4 { - color: #fff; -} - -/* Direction Nav */ -.flex-direction-nav { - *height: 0; -} -.flex-direction-nav a { - width: 30px; - height: 40px; - margin: 0; - display: block; - background: url(../img/bg_direction_nav.png) no-repeat 0 0; - position: absolute; - top: 45%; - z-index: 10; - cursor: pointer; - text-indent: -9999px; - opacity: 0; - -webkit-transition: all 0.3s ease; -} -.flex-direction-nav .flex-next { - background-position: 100% 0; - right: -36px; -} -.flex-direction-nav .flex-prev { - left: -36px; -} -.flexslider:hover .flex-next { - opacity: 0.8; - right: 5px; -} -.flexslider:hover .flex-prev { - opacity: 0.8; - left: 5px; -} -.flexslider:hover .flex-next:hover, -.flexslider:hover .flex-prev:hover { - opacity: 1; -} -.flex-direction-nav .flex-disabled { - opacity: 0.3 !important; - filter: alpha(opacity=30); - cursor: default; -} - -/* Control Nav */ -.flex-control-nav { - width: 100%; - position: absolute; - bottom: 0; - text-align: center; -} -.flex-control-nav li { - margin: 0 6px; - display: inline-block; - zoom: 1; - *display: inline; -} -.flex-control-paging li a { - width: 11px; - height: 11px; - display: block; - background: #666; - background: rgba(0, 0, 0, 0.5); - cursor: pointer; - text-indent: -9999px; - -webkit-border-radius: 20px; - -moz-border-radius: 20px; - -o-border-radius: 20px; - border-radius: 20px; - box-shadow: inset 0 0 3px rgba(0, 0, 0, 0.3); -} -.flex-control-paging li a:hover { - background: #333; - background: rgba(0, 0, 0, 0.7); -} -.flex-control-paging li a.flex-active { - background: #000; - background: rgba(0, 0, 0, 0.9); - cursor: default; -} - -.flex-control-thumbs { - margin: 5px 0 0; - position: static; - overflow: hidden; -} -.flex-control-thumbs li { - width: 25%; - float: left; - margin: 0; -} -.flex-control-thumbs img { - width: 100%; - display: block; - opacity: 0.7; - cursor: pointer; -} -.flex-control-thumbs img:hover { - opacity: 1; -} -.flex-control-thumbs .flex-active { - opacity: 1; - cursor: default; -} - -@media screen and (max-width: 860px) { - .flex-direction-nav .flex-prev { - opacity: 1; - left: 0; - } - .flex-direction-nav .flex-next { - opacity: 1; - right: 0; - } -} diff --git a/backend/tests/load_env_vars.py b/backend/tests/load_env_vars.py new file mode 100644 index 00000000000..2911ad4c598 --- /dev/null +++ b/backend/tests/load_env_vars.py @@ -0,0 +1,16 @@ +import os + + +def load_env_vars(env_file: str = ".env") -> None: + current_dir = os.path.dirname(os.path.abspath(__file__)) + env_path = os.path.join(current_dir, env_file) + try: + with open(env_path, "r") as f: + for line in f: + line = line.strip() + if line and not line.startswith("#"): + key, value = line.split("=", 1) + os.environ[key] = value.strip() + print("Successfully loaded environment variables") + except FileNotFoundError: + print(f"File {env_file} not found") diff --git a/backend/tests/regression/answer_quality/run_qa.py b/backend/tests/regression/answer_quality/run_qa.py index 5de034b3740..f6dd0e0b558 100644 --- a/backend/tests/regression/answer_quality/run_qa.py +++ b/backend/tests/regression/answer_quality/run_qa.py @@ -77,14 +77,15 @@ def _initialize_files(config: dict) -> tuple[str, list[dict]]: "number_of_questions_in_dataset": len(questions), } - env_vars = get_docker_container_env_vars(config["env_name"]) - if env_vars["ENV_SEED_CONFIGURATION"]: - del env_vars["ENV_SEED_CONFIGURATION"] - if env_vars["GPG_KEY"]: - del env_vars["GPG_KEY"] - if metadata["test_config"]["llm"]["api_key"]: - del metadata["test_config"]["llm"]["api_key"] - metadata.update(env_vars) + if config["env_name"]: + env_vars = get_docker_container_env_vars(config["env_name"]) + if env_vars["ENV_SEED_CONFIGURATION"]: + del env_vars["ENV_SEED_CONFIGURATION"] + if env_vars["GPG_KEY"]: + del env_vars["GPG_KEY"] + if metadata["test_config"]["llm"]["api_key"]: + del metadata["test_config"]["llm"]["api_key"] + metadata.update(env_vars) metadata_path = os.path.join(test_output_folder, METADATA_FILENAME) print("saving metadata to:", metadata_path) with open(metadata_path, "w", encoding="utf-8") as yaml_file: @@ -95,17 +96,18 @@ def _initialize_files(config: dict) -> tuple[str, list[dict]]: ) shutil.copy2(questions_file_path, copied_questions_file_path) - zipped_files_path = config["zipped_documents_file"] - copied_zipped_documents_path = os.path.join( - test_output_folder, os.path.basename(zipped_files_path) - ) - shutil.copy2(zipped_files_path, copied_zipped_documents_path) + if config["zipped_documents_file"]: + zipped_files_path = config["zipped_documents_file"] + copied_zipped_documents_path = os.path.join( + test_output_folder, os.path.basename(zipped_files_path) + ) + shutil.copy2(zipped_files_path, copied_zipped_documents_path) - zipped_files_folder = os.path.dirname(zipped_files_path) - jsonl_file_path = os.path.join(zipped_files_folder, "target_docs.jsonl") - if os.path.exists(jsonl_file_path): - copied_jsonl_path = os.path.join(test_output_folder, "target_docs.jsonl") - shutil.copy2(jsonl_file_path, copied_jsonl_path) + zipped_files_folder = os.path.dirname(zipped_files_path) + jsonl_file_path = os.path.join(zipped_files_folder, "target_docs.jsonl") + if os.path.exists(jsonl_file_path): + copied_jsonl_path = os.path.join(test_output_folder, "target_docs.jsonl") + shutil.copy2(jsonl_file_path, copied_jsonl_path) return test_output_folder, questions diff --git a/backend/tests/unit/danswer/connectors/confluence/test_rate_limit_handler.py b/backend/tests/unit/danswer/connectors/confluence/test_rate_limit_handler.py index 92bccaa050d..d1f263a7793 100644 --- a/backend/tests/unit/danswer/connectors/confluence/test_rate_limit_handler.py +++ b/backend/tests/unit/danswer/connectors/confluence/test_rate_limit_handler.py @@ -1,11 +1,10 @@ from unittest.mock import Mock -from unittest.mock import patch import pytest from requests import HTTPError -from danswer.connectors.confluence.rate_limit_handler import ( - make_confluence_call_handle_rate_limit, +from danswer.connectors.confluence.onyx_confluence import ( + handle_confluence_rate_limit, ) @@ -14,36 +13,41 @@ def mock_confluence_call() -> Mock: return Mock() -@pytest.mark.parametrize( - "status_code,text,retry_after", - [ - (429, "Rate limit exceeded", "5"), - (200, "Rate limit exceeded", None), - (429, "Some other error", "5"), - ], -) -def test_rate_limit_handling( - mock_confluence_call: Mock, status_code: int, text: str, retry_after: str | None -) -> None: - with patch("time.sleep") as mock_sleep: - mock_confluence_call.side_effect = [ - HTTPError( - response=Mock( - status_code=status_code, - text=text, - headers={"Retry-After": retry_after} if retry_after else {}, - ) - ), - ] * 2 + ["Success"] - - handled_call = make_confluence_call_handle_rate_limit(mock_confluence_call) - result = handled_call() - - assert result == "Success" - assert mock_confluence_call.call_count == 3 - assert mock_sleep.call_count == 2 - if retry_after: - mock_sleep.assert_called_with(int(retry_after)) +# ***** Checking call count to sleep() won't correctly reflect test correctness +# especially since we really need to sleep multiple times and check for +# abort signals moving forward. Disabling this test for now until we come up with +# a better way forward. + +# @pytest.mark.parametrize( +# "status_code,text,retry_after", +# [ +# (429, "Rate limit exceeded", "5"), +# (200, "Rate limit exceeded", None), +# (429, "Some other error", "5"), +# ], +# ) +# def test_rate_limit_handling( +# mock_confluence_call: Mock, status_code: int, text: str, retry_after: str | None +# ) -> None: +# with patch("time.sleep") as mock_sleep: +# mock_confluence_call.side_effect = [ +# HTTPError( +# response=Mock( +# status_code=status_code, +# text=text, +# headers={"Retry-After": retry_after} if retry_after else {}, +# ) +# ), +# ] * 2 + ["Success"] + +# handled_call = make_confluence_call_handle_rate_limit(mock_confluence_call) +# result = handled_call() + +# assert result == "Success" +# assert mock_confluence_call.call_count == 3 +# assert mock_sleep.call_count == 2 +# if retry_after: +# mock_sleep.assert_called_with(int(retry_after)) def test_non_rate_limit_error(mock_confluence_call: Mock) -> None: @@ -51,7 +55,7 @@ def test_non_rate_limit_error(mock_confluence_call: Mock) -> None: response=Mock(status_code=500, text="Internal Server Error") ) - handled_call = make_confluence_call_handle_rate_limit(mock_confluence_call) + handled_call = handle_confluence_rate_limit(mock_confluence_call) with pytest.raises(HTTPError): handled_call() diff --git a/backend/tests/unit/danswer/connectors/gmail/test_connector.py b/backend/tests/unit/danswer/connectors/gmail/test_connector.py index 2689e2a2751..31661cbcc06 100644 --- a/backend/tests/unit/danswer/connectors/gmail/test_connector.py +++ b/backend/tests/unit/danswer/connectors/gmail/test_connector.py @@ -1,205 +1,42 @@ import datetime - -import pytest -from pytest_mock import MockFixture +import json +import os from danswer.configs.constants import DocumentSource from danswer.connectors.cross_connector_utils.miscellaneous_utils import time_str_to_utc -from danswer.connectors.gmail.connector import GmailConnector +from danswer.connectors.gmail.connector import _build_time_range_query +from danswer.connectors.gmail.connector import thread_to_document from danswer.connectors.models import Document -def test_email_to_document() -> None: - connector = GmailConnector() - email_id = "18cabedb1ea46b03" - email_subject = "Danswer Test Subject" - email_sender = "Google " - email_recipient = "test.mail@gmail.com" - email_date = "Wed, 27 Dec 2023 15:38:49 GMT" - email_labels = ["UNREAD", "IMPORTANT", "CATEGORY_UPDATES", "STARRED", "INBOX"] - full_email = { - "id": email_id, - "threadId": email_id, - "labelIds": email_labels, - "snippet": "A new sign-in. We noticed a new sign-in to your Google Account. If this was you, you don't need to do", - "payload": { - "partId": "", - "mimeType": "multipart/alternative", - "filename": "", - "headers": [ - {"name": "Delivered-To", "value": email_recipient}, - {"name": "Date", "value": email_date}, - { - "name": "Message-ID", - "value": "", - }, - {"name": "Subject", "value": email_subject}, - {"name": "From", "value": email_sender}, - {"name": "To", "value": email_recipient}, - ], - "body": {"size": 0}, - "parts": [ - { - "partId": "0", - "mimeType": "text/plain", - "filename": "", - "headers": [ - { - "name": "Content-Type", - "value": 'text/plain; charset="UTF-8"; format=flowed; delsp=yes', - }, - {"name": "Content-Transfer-Encoding", "value": "base64"}, - ], - "body": { - "size": 9, - "data": "dGVzdCBkYXRh", - }, - }, - { - "partId": "1", - "mimeType": "text/html", - "filename": "", - "headers": [ - {"name": "Content-Type", "value": 'text/html; charset="UTF-8"'}, - { - "name": "Content-Transfer-Encoding", - "value": "quoted-printable", - }, - ], - "body": { - "size": 9, - "data": "dGVzdCBkYXRh", - }, - }, - ], - }, - "sizeEstimate": 12048, - "historyId": "697762", - "internalDate": "1703691529000", - } - doc = connector._email_to_document(full_email) +def test_thread_to_document() -> None: + json_path = os.path.join(os.path.dirname(__file__), "thread.json") + with open(json_path, "r") as f: + full_email_thread = json.load(f) + + doc = thread_to_document(full_email_thread) assert type(doc) == Document assert doc.source == DocumentSource.GMAIL - assert doc.title == "Danswer Test Subject" + assert doc.semantic_identifier == "Email Chain 1" assert doc.doc_updated_at == datetime.datetime( - 2023, 12, 27, 15, 38, 49, tzinfo=datetime.timezone.utc + 2024, 11, 2, 17, 34, 55, tzinfo=datetime.timezone.utc ) - assert doc.metadata == { - "labels": email_labels, - "from": email_sender, - "to": email_recipient, - "date": email_date, - "subject": email_subject, - } - - -def test_fetch_mails_from_gmail_empty(mocker: MockFixture) -> None: - mock_discovery = mocker.patch("danswer.connectors.gmail.connector.discovery") - mock_discovery.build.return_value.users.return_value.messages.return_value.list.return_value.execute.return_value = { - "messages": [] - } - connector = GmailConnector() - connector.creds = mocker.Mock() - with pytest.raises(StopIteration): - next(connector.load_from_state()) - - -def test_fetch_mails_from_gmail(mocker: MockFixture) -> None: - mock_discovery = mocker.patch("danswer.connectors.gmail.connector.discovery") - email_id = "18cabedb1ea46b03" - email_subject = "Danswer Test Subject" - email_sender = "Google " - email_recipient = "test.mail@gmail.com" - mock_discovery.build.return_value.users.return_value.messages.return_value.list.return_value.execute.return_value = { - "messages": [{"id": email_id, "threadId": email_id}], - "nextPageToken": "14473313008248105741", - "resultSizeEstimate": 201, - } - mock_discovery.build.return_value.users.return_value.messages.return_value.get.return_value.execute.return_value = { - "id": email_id, - "threadId": email_id, - "labelIds": ["UNREAD", "IMPORTANT", "CATEGORY_UPDATES", "STARRED", "INBOX"], - "snippet": "A new sign-in. We noticed a new sign-in to your Google Account. If this was you, you don't need to do", - "payload": { - "partId": "", - "mimeType": "multipart/alternative", - "filename": "", - "headers": [ - {"name": "Delivered-To", "value": email_recipient}, - {"name": "Date", "value": "Wed, 27 Dec 2023 15:38:49 GMT"}, - { - "name": "Message-ID", - "value": "", - }, - {"name": "Subject", "value": email_subject}, - {"name": "From", "value": email_sender}, - {"name": "To", "value": email_recipient}, - ], - "body": {"size": 0}, - "parts": [ - { - "partId": "0", - "mimeType": "text/plain", - "filename": "", - "headers": [ - { - "name": "Content-Type", - "value": 'text/plain; charset="UTF-8"; format=flowed; delsp=yes', - }, - {"name": "Content-Transfer-Encoding", "value": "base64"}, - ], - "body": { - "size": 9, - "data": "dGVzdCBkYXRh", - }, - }, - { - "partId": "1", - "mimeType": "text/html", - "filename": "", - "headers": [ - {"name": "Content-Type", "value": 'text/html; charset="UTF-8"'}, - { - "name": "Content-Transfer-Encoding", - "value": "quoted-printable", - }, - ], - "body": { - "size": 9, - "data": "dGVzdCBkYXRh", - }, - }, - ], - }, - "sizeEstimate": 12048, - "historyId": "697762", - "internalDate": "1703691529000", - } - - connector = GmailConnector() - connector.creds = mocker.Mock() - docs = next(connector.load_from_state()) - assert len(docs) == 1 - doc: Document = docs[0] - assert type(doc) == Document - assert doc.id == email_id - assert doc.title == email_subject - assert email_recipient in doc.sections[0].text - assert email_sender in doc.sections[0].text + assert len(doc.sections) == 4 + assert doc.metadata == {} def test_build_time_range_query() -> None: time_range_start = 1703066296.159339 time_range_end = 1704984791.657404 - query = GmailConnector._build_time_range_query(time_range_start, time_range_end) + query = _build_time_range_query(time_range_start, time_range_end) assert query == "after:1703066296 before:1704984791" - query = GmailConnector._build_time_range_query(time_range_start, None) + query = _build_time_range_query(time_range_start, None) assert query == "after:1703066296" - query = GmailConnector._build_time_range_query(None, time_range_end) + query = _build_time_range_query(None, time_range_end) assert query == "before:1704984791" - query = GmailConnector._build_time_range_query(0.0, time_range_end) + query = _build_time_range_query(0.0, time_range_end) assert query == "before:1704984791" - query = GmailConnector._build_time_range_query(None, None) + query = _build_time_range_query(None, None) assert query is None diff --git a/backend/tests/unit/danswer/connectors/gmail/thread.json b/backend/tests/unit/danswer/connectors/gmail/thread.json new file mode 100644 index 00000000000..53f0b83ce3a --- /dev/null +++ b/backend/tests/unit/danswer/connectors/gmail/thread.json @@ -0,0 +1,349 @@ +{ + "id": "192edefb315737c3", + "messages": [ + { + "id": "192edeff0dc743cf", + "payload": { + "headers": [ + { + "name": "MIME-Version", + "value": "1.0" + }, + { + "name": "Date", + "value": "Sat, 2 Nov 2024 10:32:57 -0700" + }, + { + "name": "Message-ID", + "value": "" + }, + { + "name": "Subject", + "value": "Email Chain 1" + }, + { + "name": "From", + "value": "Test Admin Admin " + }, + { + "name": "To", + "value": "test-group-1@onyx-test.com" + }, + { + "name": "Content-Type", + "value": "multipart/alternative; boundary=\"0000000000004480480625f17117\"" + } + ], + "parts": [ + { + "mimeType": "text/plain", + "body": { + "data": "VGhpcyBpcyBlbWFpbCAxIGluIGNoYWluIDENCg==" + } + }, + { + "mimeType": "text/html", + "body": { + "data": "PGRpdiBkaXI9Imx0ciI-VGhpcyBpcyBlbWFpbCAxIGluIGNoYWluIDE8L2Rpdj4NCg==" + } + } + ] + } + }, + { + "id": "192edf07fbcc8b2c", + "payload": { + "headers": [ + { + "name": "Delivered-To", + "value": "admin@onyx-test.com" + }, + { + "name": "Received", + "value": "by 2002:a59:b3cc:0:b0:491:1bbc:5e54 with SMTP id g12csp1873533vqt; Sat, 2 Nov 2024 10:33:34 -0700 (PDT)" + }, + { + "name": "X-Received", + "value": "by 2002:a05:6102:1284:b0:4a9:555b:fb50 with SMTP id ada2fe7eead31-4a9555bfd21mr8428882137.20.1730568814436; Sat, 02 Nov 2024 10:33:34 -0700 (PDT)" + }, + { + "name": "ARC-Seal", + "value": "i=1; a=rsa-sha256; t=1730568814; cv=none; d=google.com; s=arc-20240605; b=A75GBczY/LN8OhNdpZ1VM3opx5VWU3HWYnwCIL9TLBqEpNz2X74TXNkCevJkImB3VF BkFY7gHg7d8oGdsQvUp2EEdRBXKoYT8P4PTc3ZSD2W8LYU2XCudIbA5xtGObELmI0h0f bCXT8dE7m6hGJPTg0WPSlkvGs2bY52bmSbCbrnrA/Mx/oyxYPzwv5cMw3CLMXo/8nOLO FAzrnMTKRqYtn/QvYjUne7PpVSYPk0Edg5261/jn9qatyyL8VePU4FriQTffjAC85Ayc jikVA5QnsYO79aXJE0SIw4xBHwtOgmyWhU9TPw2NfuQHZWrm39JudUYlmZb8MV4VpX6p otxw==" + }, + { + "name": "ARC-Message-Signature", + "value": "i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20240605; h=to:subject:message-id:date:from:in-reply-to:references:mime-version :dkim-signature; bh=9Eo5wYdnqXP3axXBCAwTODK4DvptOqG5RNct/xfBak4=; fh=/JhVJcrFVXWWzpGRY8HXA/cCDTQzCntn8VCeyDmjzic=; b=bkhR3iHOUD64TOG3Mqfd9BMT/2IF9gHEjHZWR/tet5J05UKFhk2d4k69wuSLNJcxlF dB6zzgt1vvEnCbSV+XBCEG1zW76T/sN6Ldn7+5xomsGFYvTZsW4E7OJqxkedfdpFeWwc eBlgX765wnBs4ztktDhK6gO8igWx3CaYH5wbX72DV4wqcQpDNpMqNHK7sHrlOG2YJGzV 7i3tli4dJqu1zgQK+lo1or1QQyadFzhbwX2iFdSLTNSNR3s70kqqBOT69lDMv84dfKCp +hXE0uwjOY/9lGG9rO1/e5WWEDC2BSZ7wzjvvyBRjDG+lavBqTggUizd8W+MlRYXONAX t7Kg==; dara=google.com" + }, + { + "name": "ARC-Authentication-Results", + "value": "i=1; mx.google.com; dkim=pass header.i=@onyx-test-com.20230601.gappssmtp.com header.s=20230601 header.b=Z57TqzI7; spf=none (google.com: test_user_1@onyx-test.com does not designate permitted sender hosts) smtp.mailfrom=test_user_1@onyx-test.com; dara=pass header.i=@onyx-test.com" + }, + { + "name": "Return-Path", + "value": "" + }, + { + "name": "Received", + "value": "from mail-sor-f41.google.com (mail-sor-f41.google.com. [209.85.220.41]) by mx.google.com with SMTPS id a1e0cc1a2514c-855dae589a1sor1192309241.6.2024.11.02.10.33.34 for (Google Transport Security); Sat, 02 Nov 2024 10:33:34 -0700 (PDT)" + }, + { + "name": "Received-SPF", + "value": "none (google.com: test_user_1@onyx-test.com does not designate permitted sender hosts) client-ip=209.85.220.41;" + }, + { + "name": "Authentication-Results", + "value": "mx.google.com; dkim=pass header.i=@onyx-test-com.20230601.gappssmtp.com header.s=20230601 header.b=Z57TqzI7; spf=none (google.com: test_user_1@onyx-test.com does not designate permitted sender hosts) smtp.mailfrom=test_user_1@onyx-test.com; dara=pass header.i=@onyx-test.com" + }, + { + "name": "DKIM-Signature", + "value": "v=1; a=rsa-sha256; c=relaxed/relaxed; d=onyx-test-com.20230601.gappssmtp.com; s=20230601; t=1730568814; x=1731173614; darn=onyx-test.com; h=to:subject:message-id:date:from:in-reply-to:references:mime-version :from:to:cc:subject:date:message-id:reply-to; bh=9Eo5wYdnqXP3axXBCAwTODK4DvptOqG5RNct/xfBak4=; b=Z57TqzI7sEwwOumQx0z6YhibC1x2CHlNmBjwyQT1mNOUScZbzo6nmH8Ydo7slsTfgZ rgwKEEYkf/CYlFWGUEzGzc22jVUCSMjNMFB0nEtfj+GPJaNjDR9FxjFLTUfSq64H/RCI eO9+oEAJHaa5QmceX2yiSJFXNqmVEMJNT+K6CnlbN5gW6CUD2tBt46vW83PVJgxKMc76 A7/eaDxdZDLUvpjHes4SvM7x0eBM9t7w9wb/jEjGqA54HI2YHVcxM4HJxrbCChYn8UoG 7+UOpfOmHTZLdLYgMtSqYanJ3BTENEdyVp2LIOZOhlUT7Hbr9esyeVyy765XTuRAWxmo DGPQ==" + }, + { + "name": "X-Google-DKIM-Signature", + "value": "v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1730568814; x=1731173614; h=to:subject:message-id:date:from:in-reply-to:references:mime-version :x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=9Eo5wYdnqXP3axXBCAwTODK4DvptOqG5RNct/xfBak4=; b=fxuobWT2rW8kvQ14LUHbJEJOdCM4uBP+Obo7jL4w0BvwLrBNNbMPqMUc8d8u17dnS7 gczFCprOr5PZnVNmOZMQvmRTJ6poTkWOGQhsOyDOSLNI0IzuaN2wh9qjmFez6Z9nTx3f Lo0I0uahwzNkExywHC9x0H3NOZlS4074qkyLJObgnOHa5vml8SEcChMuzOQuCSU9wNjO t26urEoct8LArf0K/xztjxpEpDCgnf4Cr/KmZfi4/2Sjv4jwQzkLVuiwADraHIJbLv1m UMNs92dakWYK0cBbuwOx/sYpUWWyhVmv6Q0LqXzJjtpY4Z0zsnpI2UCrkAdAOSh7geEJ LCnw==" + }, + { + "name": "X-Gm-Message-State", + "value": "AOJu0YyCYZOHIzoRHgMd7foUCpX2JYDwPS2XsTjWiMkkR364/mhFKFsQ vixTj7QM6pDecoDxn8pS0btM7b8z+cwo/8hFiYNgp26wK5L0aGymu+M8OuEk/73fuEthWVV0eko B9LvS5+qixa/oNO/HkRJpVTQmAH7OTT25KeZJj0Dd3x1JqsrfiNE=" + }, + { + "name": "X-Google-Smtp-Source", + "value": "AGHT+IHCMrQhOT9sgPUOQJL1oVfxMruiLg3BZ5DXqKMdQ7PYF2puka6Ovabv3BPg08CeyS1ovKydIdwHT2uleZkkAaU=" + }, + { + "name": "X-Received", + "value": "by 2002:a05:6102:5092:b0:4a3:e05e:f6a3 with SMTP id ada2fe7eead31-4a900e11589mr14462681137.3.1730568813787; Sat, 02 Nov 2024 10:33:33 -0700 (PDT)" + }, + { + "name": "MIME-Version", + "value": "1.0" + }, + { + "name": "References", + "value": "" + }, + { + "name": "In-Reply-To", + "value": "" + }, + { + "name": "From", + "value": "test_user_1 1 " + }, + { + "name": "Date", + "value": "Sat, 2 Nov 2024 10:33:22 -0700" + }, + { + "name": "Message-ID", + "value": "" + }, + { + "name": "Subject", + "value": "Re: Email Chain 1" + }, + { + "name": "To", + "value": "Test Admin Admin " + }, + { + "name": "Content-Type", + "value": "multipart/alternative; boundary=\"00000000000067dbf70625f1730f\"" + } + ], + "parts": [ + { + "mimeType": "text/plain", + "body": { + "data": "VGhpcyBpcyBlbWFpbCAyIGluIGNoYWluIDENCg0KT24gU2F0LCBOb3YgMiwgMjAyNCBhdCAxMDozM-KAr0FNIFRlc3QgQWRtaW4gQWRtaW4gPGFkbWluQG9ueXgtdGVzdC5jb20-DQp3cm90ZToNCg0KPiBUaGlzIGlzIGVtYWlsIDEgaW4gY2hhaW4gMQ0KPg0K" + } + }, + { + "mimeType": "text/html", + "body": { + "data": "PGRpdiBkaXI9Imx0ciI-VGhpcyBpcyBlbWFpbCAyIGluIGNoYWluIDE8L2Rpdj48YnI-PGRpdiBjbGFzcz0iZ21haWxfcXVvdGUiPjxkaXYgZGlyPSJsdHIiIGNsYXNzPSJnbWFpbF9hdHRyIj5PbiBTYXQsIE5vdiAyLCAyMDI0IGF0IDEwOjMz4oCvQU0gVGVzdCBBZG1pbiBBZG1pbiAmbHQ7PGEgaHJlZj0ibWFpbHRvOmFkbWluQG9ueXgtdGVzdC5jb20iPmFkbWluQG9ueXgtdGVzdC5jb208L2E-Jmd0OyB3cm90ZTo8YnI-PC9kaXY-PGJsb2NrcXVvdGUgY2xhc3M9ImdtYWlsX3F1b3RlIiBzdHlsZT0ibWFyZ2luOjBweCAwcHggMHB4IDAuOGV4O2JvcmRlci1sZWZ0OjFweCBzb2xpZCByZ2IoMjA0LDIwNCwyMDQpO3BhZGRpbmctbGVmdDoxZXgiPjxkaXYgZGlyPSJsdHIiPlRoaXMgaXMgZW1haWwgMSBpbiBjaGFpbiAxPC9kaXY-DQo8L2Jsb2NrcXVvdGU-PC9kaXY-DQo=" + } + } + ] + } + }, + { + "id": "192edf157175fcec", + "payload": { + "headers": [ + { + "name": "MIME-Version", + "value": "1.0" + }, + { + "name": "Date", + "value": "Sat, 2 Nov 2024 10:34:29 -0700" + }, + { + "name": "References", + "value": " " + }, + { + "name": "In-Reply-To", + "value": "" + }, + { + "name": "Bcc", + "value": "test_user_3@onyx-test.com" + }, + { + "name": "Message-ID", + "value": "" + }, + { + "name": "Subject", + "value": "Fwd: Email Chain 1" + }, + { + "name": "From", + "value": "Test Admin Admin " + }, + { + "name": "To", + "value": "test_user_2 2 " + }, + { + "name": "Content-Type", + "value": "multipart/alternative; boundary=\"000000000000bf7afd0625f1764f\"" + } + ], + "parts": [ + { + "mimeType": "text/plain", + "body": { + "data": "VGhpcyBpcyBlbWFpbCAzIGluIGNoYWluIDENCg0KLS0tLS0tLS0tLSBGb3J3YXJkZWQgbWVzc2FnZSAtLS0tLS0tLS0NCkZyb206IHRlc3RfdXNlcl8xIDEgPHRlc3RfdXNlcl8xQG9ueXgtdGVzdC5jb20-DQpEYXRlOiBTYXQsIE5vdiAyLCAyMDI0IGF0IDEwOjMz4oCvQU0NClN1YmplY3Q6IFJlOiBFbWFpbCBDaGFpbiAxDQpUbzogVGVzdCBBZG1pbiBBZG1pbiA8YWRtaW5Ab255eC10ZXN0LmNvbT4NCg0KDQpUaGlzIGlzIGVtYWlsIDIgaW4gY2hhaW4gMQ0KDQpPbiBTYXQsIE5vdiAyLCAyMDI0IGF0IDEwOjMz4oCvQU0gVGVzdCBBZG1pbiBBZG1pbiA8YWRtaW5Ab255eC10ZXN0LmNvbT4NCndyb3RlOg0KDQo-IFRoaXMgaXMgZW1haWwgMSBpbiBjaGFpbiAxDQo-DQo=" + } + }, + { + "mimeType": "text/html", + "body": { + "data": "PGRpdiBkaXI9Imx0ciI-PGRpdiBkaXI9Imx0ciI-VGhpcyBpcyBlbWFpbCAzIGluIGNoYWluIDE8L2Rpdj48YnI-PGRpdiBjbGFzcz0iZ21haWxfcXVvdGUiPjxkaXYgZGlyPSJsdHIiIGNsYXNzPSJnbWFpbF9hdHRyIj4tLS0tLS0tLS0tIEZvcndhcmRlZCBtZXNzYWdlIC0tLS0tLS0tLTxicj5Gcm9tOiA8c3Ryb25nIGNsYXNzPSJnbWFpbF9zZW5kZXJuYW1lIiBkaXI9ImF1dG8iPnRlc3RfdXNlcl8xIDE8L3N0cm9uZz4gPHNwYW4gZGlyPSJhdXRvIj4mbHQ7PGEgaHJlZj0ibWFpbHRvOnRlc3RfdXNlcl8xQG9ueXgtdGVzdC5jb20iPnRlc3RfdXNlcl8xQG9ueXgtdGVzdC5jb208L2E-Jmd0Ozwvc3Bhbj48YnI-RGF0ZTogU2F0LCBOb3YgMiwgMjAyNCBhdCAxMDozM-KAr0FNPGJyPlN1YmplY3Q6IFJlOiBFbWFpbCBDaGFpbiAxPGJyPlRvOiBUZXN0IEFkbWluIEFkbWluICZsdDs8YSBocmVmPSJtYWlsdG86YWRtaW5Ab255eC10ZXN0LmNvbSI-YWRtaW5Ab255eC10ZXN0LmNvbTwvYT4mZ3Q7PGJyPjwvZGl2Pjxicj48YnI-PGRpdiBkaXI9Imx0ciI-VGhpcyBpcyBlbWFpbCAyIGluIGNoYWluIDE8L2Rpdj48YnI-PGRpdiBjbGFzcz0iZ21haWxfcXVvdGUiPjxkaXYgZGlyPSJsdHIiIGNsYXNzPSJnbWFpbF9hdHRyIj5PbiBTYXQsIE5vdiAyLCAyMDI0IGF0IDEwOjMz4oCvQU0gVGVzdCBBZG1pbiBBZG1pbiAmbHQ7PGEgaHJlZj0ibWFpbHRvOmFkbWluQG9ueXgtdGVzdC5jb20iIHRhcmdldD0iX2JsYW5rIj5hZG1pbkBvbnl4LXRlc3QuY29tPC9hPiZndDsgd3JvdGU6PGJyPjwvZGl2PjxibG9ja3F1b3RlIGNsYXNzPSJnbWFpbF9xdW90ZSIgc3R5bGU9Im1hcmdpbjowcHggMHB4IDBweCAwLjhleDtib3JkZXItbGVmdDoxcHggc29saWQgcmdiKDIwNCwyMDQsMjA0KTtwYWRkaW5nLWxlZnQ6MWV4Ij48ZGl2IGRpcj0ibHRyIj5UaGlzIGlzIGVtYWlsIDEgaW4gY2hhaW4gMTwvZGl2Pg0KPC9ibG9ja3F1b3RlPjwvZGl2Pg0KPC9kaXY-PC9kaXY-DQo=" + } + } + ] + } + }, + { + "id": "192edf1e8f7ecbb4", + "payload": { + "headers": [ + { + "name": "Delivered-To", + "value": "admin@onyx-test.com" + }, + { + "name": "Received", + "value": "by 2002:a59:b3cc:0:b0:491:1bbc:5e54 with SMTP id g12csp1874156vqt; Sat, 2 Nov 2024 10:35:07 -0700 (PDT)" + }, + { + "name": "X-Received", + "value": "by 2002:a05:6122:319c:b0:50d:81f9:5210 with SMTP id 71dfb90a1353d-5105d128958mr15853812e0c.13.1730568906834; Sat, 02 Nov 2024 10:35:06 -0700 (PDT)" + }, + { + "name": "ARC-Seal", + "value": "i=1; a=rsa-sha256; t=1730568906; cv=none; d=google.com; s=arc-20240605; b=JUd7S6ql1poKM5ox92op2g2Z67AS8sEkp5f/S+Mr5+7KSichsjAwixWg/YhhRhvaY/ UcykrbdaAeWfCuGtJgSq1nr1z5hB3iAltv/D2XCdJdOXzVDpVvaV9lT/YU6266VKtsnq gFVKfjyMe/MnNKvDITQL67A2gRvhiR3XWxwEVvrMArMpUb9bbudlF/5L3MQY4BCIvWLL 9uBv1ZnclghscsxspoG3CkULkGqHGUTKq6bPoUn/hOljiVdsVVagoOwhbDEcyMRKUDnm 2t3H7iiujhlBIDbRoLJR/6C+A6AMyNKPAFA3axM6EXrTOADMZ8a0JqFj8O4rktYpRV+d zHxQ==" + }, + { + "name": "ARC-Message-Signature", + "value": "i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20240605; h=to:subject:message-id:date:from:in-reply-to:references:mime-version :dkim-signature; bh=K0g0X/4URFSC1nuXjI7ZESJA66WnWcqwgfHOUDQ/kQo=; fh=/JhVJcrFVXWWzpGRY8HXA/cCDTQzCntn8VCeyDmjzic=; b=IarHhl5g5tjBhlMRRXo6WwTzaFOI4Q3w4ebNunftDUHwzV7Qu1hY0y7r3SRNaBb+qD ZncYUI6PF/Oo7eMG65IloXfu+kHUI8NJMaoERUWgEk21Tj6cOSRO4x/W6V5PSX7a4lWZ K1cNdAlaiWI09Esv07Vel975Bgrd+XiCwoVgJAAslHOJ2bZwSYWzvwLqdkCRVrAGJQ9/ I80kvOnNVesIFdIR6SGrhdz8xNIIoe60k8PjJRzkmzy/tEeKCYBz6W+NW4xoIaAVmKUw RvjI8JozUVkGzh+LLyx64MakPCZPWM+ft+D35JodarYh+KesF+HV/Oe7rjaw7JXZ1WoE OdJQ==; dara=google.com" + }, + { + "name": "ARC-Authentication-Results", + "value": "i=1; mx.google.com; dkim=pass header.i=@onyx-test-com.20230601.gappssmtp.com header.s=20230601 header.b=1U8JkCbL; spf=none (google.com: test_user_3@onyx-test.com does not designate permitted sender hosts) smtp.mailfrom=test_user_3@onyx-test.com; dara=pass header.i=@onyx-test.com" + }, + { + "name": "Return-Path", + "value": "" + }, + { + "name": "Received", + "value": "from mail-sor-f41.google.com (mail-sor-f41.google.com. [209.85.220.41]) by mx.google.com with SMTPS id 71dfb90a1353d-5106f3f9037sor1051490e0c.7.2024.11.02.10.35.06 for (Google Transport Security); Sat, 02 Nov 2024 10:35:06 -0700 (PDT)" + }, + { + "name": "Received-SPF", + "value": "none (google.com: test_user_3@onyx-test.com does not designate permitted sender hosts) client-ip=209.85.220.41;" + }, + { + "name": "Authentication-Results", + "value": "mx.google.com; dkim=pass header.i=@onyx-test-com.20230601.gappssmtp.com header.s=20230601 header.b=1U8JkCbL; spf=none (google.com: test_user_3@onyx-test.com does not designate permitted sender hosts) smtp.mailfrom=test_user_3@onyx-test.com; dara=pass header.i=@onyx-test.com" + }, + { + "name": "DKIM-Signature", + "value": "v=1; a=rsa-sha256; c=relaxed/relaxed; d=onyx-test-com.20230601.gappssmtp.com; s=20230601; t=1730568906; x=1731173706; darn=onyx-test.com; h=to:subject:message-id:date:from:in-reply-to:references:mime-version :from:to:cc:subject:date:message-id:reply-to; bh=K0g0X/4URFSC1nuXjI7ZESJA66WnWcqwgfHOUDQ/kQo=; b=1U8JkCbLjicGtH7otVX3QjKv/XK5fGnmOIVMTD/b9cO1w8ai2GwCuJbBo+z1IuGqto aRuNCcEqUIaFvVFiezvhL9xg7scIwHHvLOrSpmc0h0JMSx8q4kKaUGKEJpewsYvkStmr DYv/cUIeaPTIChSuUDV7FVMhf7jIyIaYry3i9/EIlw+on18nD30C9kXwds5yWW8XGvtR /OUuSdgJzuoNmypUt8v9Ebqd+LP23YTs+78/G1Ag+JjugxxF+C9cm7SxmooWueukRkm8 o8nQO5QVx/y/xsCZdM2XXcKCLcZIntuY48amlfFyIqrhG1/DEM6htD64meMGctNTptQf jHrw==" + }, + { + "name": "X-Google-DKIM-Signature", + "value": "v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1730568906; x=1731173706; h=to:subject:message-id:date:from:in-reply-to:references:mime-version :x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=K0g0X/4URFSC1nuXjI7ZESJA66WnWcqwgfHOUDQ/kQo=; b=J4+ozlusGGM1Hn95EZkDeYbExgkyOlAdcY6LcV4Wx1zeI78HtEXGgvqcZ5sP7HzS1X /A3i7WkgmjpC9bU2/zKLrfXDvYQ7udQwTJtKsKaUo4O65Al7Wtgz8e8rBDYikhqEEAZQ GbEwqp+qa+v0T4rPhkQKd4zpIE3AUd3eh5u5iF/UEYc1NcyV35uMGWRP4jOK6F67MwS7 73MgObcGqmBH48I4K+ITYAkNEMGOBpY6fheGxCxyDpcG5gbf8swlWX2Dd0EM9H72o+Xb jvAslOq1lZzPZUgyyZJ2wVEASxF8S7depiOLcTPKwsw+pgXIMAUBExBvu0u4PhO0qG+z pftQ==" + }, + { + "name": "X-Gm-Message-State", + "value": "AOJu0Yy2r0aT3w7HBU7t0JGla+x3AddG9WdnQT06r6T/HGZwZ9Wp9TUs Orb/HMtgvXivtYFkG14NJkMTBO4EqSynmzaxAvEheDXB1uYE2LS21XoqrvycvYQh3GUHBwUdS8L lE6BUjm4TJfXlZWAqKRxg4C0j1UFSuVdkXf6P1GCsdyKKTeS6A9eohw==" + }, + { + "name": "X-Google-Smtp-Source", + "value": "AGHT+IHXTB7Ar9w/Q3G3gCT19SVELYvWl30pNGuNiTmkYZgMWFS7YUWTkG/DS4/mrjMRXpYuclOLHv8BeOmw9Jovkr4=" + }, + { + "name": "X-Received", + "value": "by 2002:a05:6102:3a10:b0:4a9:49:26d2 with SMTP id ada2fe7eead31-4a90109fb68mr15589362137.29.1730568906301; Sat, 02 Nov 2024 10:35:06 -0700 (PDT)" + }, + { + "name": "MIME-Version", + "value": "1.0" + }, + { + "name": "References", + "value": " " + }, + { + "name": "In-Reply-To", + "value": "" + }, + { + "name": "From", + "value": "test_user_3 3 " + }, + { + "name": "Date", + "value": "Sat, 2 Nov 2024 10:34:55 -0700" + }, + { + "name": "Message-ID", + "value": "" + }, + { + "name": "Subject", + "value": "Re: Email Chain 1" + }, + { + "name": "To", + "value": "Test Admin Admin " + }, + { + "name": "Content-Type", + "value": "multipart/alternative; boundary=\"000000000000eb82a70625f178cf\"" + } + ], + "parts": [ + { + "mimeType": "text/plain", + "body": { + "data": "VGhpcyBpcyBlbWFpbCA0IGluIGNoYWluIDENCg0KT24gU2F0LCBOb3YgMiwgMjAyNCBhdCAxMDozNOKAr0FNIFRlc3QgQWRtaW4gQWRtaW4gPGFkbWluQG9ueXgtdGVzdC5jb20-DQp3cm90ZToNCg0KPiBUaGlzIGlzIGVtYWlsIDMgaW4gY2hhaW4gMQ0KPg0KPiAtLS0tLS0tLS0tIEZvcndhcmRlZCBtZXNzYWdlIC0tLS0tLS0tLQ0KPiBGcm9tOiB0ZXN0X3VzZXJfMSAxIDx0ZXN0X3VzZXJfMUBvbnl4LXRlc3QuY29tPg0KPiBEYXRlOiBTYXQsIE5vdiAyLCAyMDI0IGF0IDEwOjMz4oCvQU0NCj4gU3ViamVjdDogUmU6IEVtYWlsIENoYWluIDENCj4gVG86IFRlc3QgQWRtaW4gQWRtaW4gPGFkbWluQG9ueXgtdGVzdC5jb20-DQo-DQo-DQo-IFRoaXMgaXMgZW1haWwgMiBpbiBjaGFpbiAxDQo-DQo-IE9uIFNhdCwgTm92IDIsIDIwMjQgYXQgMTA6MzPigK9BTSBUZXN0IEFkbWluIEFkbWluIDxhZG1pbkBvbnl4LXRlc3QuY29tPg0KPiB3cm90ZToNCj4NCj4-IFRoaXMgaXMgZW1haWwgMSBpbiBjaGFpbiAxDQo-Pg0KPg0K" + } + }, + { + "mimeType": "text/html", + "body": { + "data": "PGRpdiBkaXI9Imx0ciI-VGhpcyBpcyBlbWFpbCA0IGluIGNoYWluIDE8YnIgY2xhc3M9ImdtYWlsLUFwcGxlLWludGVyY2hhbmdlLW5ld2xpbmUiPjwvZGl2Pjxicj48ZGl2IGNsYXNzPSJnbWFpbF9xdW90ZSI-PGRpdiBkaXI9Imx0ciIgY2xhc3M9ImdtYWlsX2F0dHIiPk9uIFNhdCwgTm92IDIsIDIwMjQgYXQgMTA6MzTigK9BTSBUZXN0IEFkbWluIEFkbWluICZsdDs8YSBocmVmPSJtYWlsdG86YWRtaW5Ab255eC10ZXN0LmNvbSI-YWRtaW5Ab255eC10ZXN0LmNvbTwvYT4mZ3Q7IHdyb3RlOjxicj48L2Rpdj48YmxvY2txdW90ZSBjbGFzcz0iZ21haWxfcXVvdGUiIHN0eWxlPSJtYXJnaW46MHB4IDBweCAwcHggMC44ZXg7Ym9yZGVyLWxlZnQ6MXB4IHNvbGlkIHJnYigyMDQsMjA0LDIwNCk7cGFkZGluZy1sZWZ0OjFleCI-PGRpdiBkaXI9Imx0ciI-PGRpdiBkaXI9Imx0ciI-VGhpcyBpcyBlbWFpbCAzIGluIGNoYWluIDE8L2Rpdj48YnI-PGRpdiBjbGFzcz0iZ21haWxfcXVvdGUiPjxkaXYgZGlyPSJsdHIiIGNsYXNzPSJnbWFpbF9hdHRyIj4tLS0tLS0tLS0tIEZvcndhcmRlZCBtZXNzYWdlIC0tLS0tLS0tLTxicj5Gcm9tOiA8c3Ryb25nIGNsYXNzPSJnbWFpbF9zZW5kZXJuYW1lIiBkaXI9ImF1dG8iPnRlc3RfdXNlcl8xIDE8L3N0cm9uZz4gPHNwYW4gZGlyPSJhdXRvIj4mbHQ7PGEgaHJlZj0ibWFpbHRvOnRlc3RfdXNlcl8xQG9ueXgtdGVzdC5jb20iIHRhcmdldD0iX2JsYW5rIj50ZXN0X3VzZXJfMUBvbnl4LXRlc3QuY29tPC9hPiZndDs8L3NwYW4-PGJyPkRhdGU6IFNhdCwgTm92IDIsIDIwMjQgYXQgMTA6MzPigK9BTTxicj5TdWJqZWN0OiBSZTogRW1haWwgQ2hhaW4gMTxicj5UbzogVGVzdCBBZG1pbiBBZG1pbiAmbHQ7PGEgaHJlZj0ibWFpbHRvOmFkbWluQG9ueXgtdGVzdC5jb20iIHRhcmdldD0iX2JsYW5rIj5hZG1pbkBvbnl4LXRlc3QuY29tPC9hPiZndDs8YnI-PC9kaXY-PGJyPjxicj48ZGl2IGRpcj0ibHRyIj5UaGlzIGlzIGVtYWlsIDIgaW4gY2hhaW4gMTwvZGl2Pjxicj48ZGl2IGNsYXNzPSJnbWFpbF9xdW90ZSI-PGRpdiBkaXI9Imx0ciIgY2xhc3M9ImdtYWlsX2F0dHIiPk9uIFNhdCwgTm92IDIsIDIwMjQgYXQgMTA6MzPigK9BTSBUZXN0IEFkbWluIEFkbWluICZsdDs8YSBocmVmPSJtYWlsdG86YWRtaW5Ab255eC10ZXN0LmNvbSIgdGFyZ2V0PSJfYmxhbmsiPmFkbWluQG9ueXgtdGVzdC5jb208L2E-Jmd0OyB3cm90ZTo8YnI-PC9kaXY-PGJsb2NrcXVvdGUgY2xhc3M9ImdtYWlsX3F1b3RlIiBzdHlsZT0ibWFyZ2luOjBweCAwcHggMHB4IDAuOGV4O2JvcmRlci1sZWZ0OjFweCBzb2xpZCByZ2IoMjA0LDIwNCwyMDQpO3BhZGRpbmctbGVmdDoxZXgiPjxkaXYgZGlyPSJsdHIiPlRoaXMgaXMgZW1haWwgMSBpbiBjaGFpbiAxPC9kaXY-DQo8L2Jsb2NrcXVvdGU-PC9kaXY-DQo8L2Rpdj48L2Rpdj4NCjwvYmxvY2txdW90ZT48L2Rpdj4NCg==" + } + } + ] + } + } + ] +} diff --git a/backend/tests/unit/danswer/connectors/mediawiki/test_mediawiki_family.py b/backend/tests/unit/danswer/connectors/mediawiki/test_mediawiki_family.py index 35a189f6dd4..95e8a0cd902 100644 --- a/backend/tests/unit/danswer/connectors/mediawiki/test_mediawiki_family.py +++ b/backend/tests/unit/danswer/connectors/mediawiki/test_mediawiki_family.py @@ -7,10 +7,14 @@ from danswer.connectors.mediawiki import family + +# Disabling these tests as they are flaky and rely on external wikis that are maintained by just fan communities + + NON_BUILTIN_WIKIS: Final[list[tuple[str, str]]] = [ ("https://fallout.fandom.com", "falloutwiki"), ("https://harrypotter.fandom.com/wiki/", "harrypotterwiki"), - ("https://artofproblemsolving.com/wiki", "artofproblemsolving"), + # ("https://artofproblemsolving.com/wiki", "artofproblemsolving"), # FLAKY ("https://www.bogleheads.org/wiki/Main_Page", "bogleheadswiki"), ("https://bogleheads.org/wiki/Main_Page", "bogleheadswiki"), ("https://www.dandwiki.com/wiki/", "dungeonsanddragons"), @@ -19,6 +23,7 @@ # TODO: Add support for more builtin family types from `pywikibot.families`. +@pytest.mark.skip(reason="Temporarily skipped") @pytest.mark.parametrize( "url, name, expected", [ @@ -48,6 +53,7 @@ def test_family_class_dispatch_builtins( assert family.family_class_dispatch(url, name) == expected +@pytest.mark.skip(reason="Temporarily skipped") @pytest.mark.parametrize("url, name", NON_BUILTIN_WIKIS) def test_family_class_dispatch_on_non_builtins_generates_new_class_fast( url: str, name: str, mocker: MockFixture @@ -58,6 +64,7 @@ def test_family_class_dispatch_on_non_builtins_generates_new_class_fast( mock_generate_family_class.assert_called_once_with(url, name) +@pytest.mark.skip(reason="Temporarily skipped") @pytest.mark.slow @pytest.mark.parametrize("url, name", NON_BUILTIN_WIKIS) def test_family_class_dispatch_on_non_builtins_generates_new_class_slow( diff --git a/backend/tests/unit/danswer/connectors/mediawiki/test_wiki.py b/backend/tests/unit/danswer/connectors/mediawiki/test_wiki.py index 2a2c841a466..9659c0afc0f 100644 --- a/backend/tests/unit/danswer/connectors/mediawiki/test_wiki.py +++ b/backend/tests/unit/danswer/connectors/mediawiki/test_wiki.py @@ -1,6 +1,7 @@ from __future__ import annotations import datetime +import tempfile from collections.abc import Iterable import pytest @@ -9,6 +10,10 @@ from danswer.connectors.mediawiki import wiki +# Some of these tests are disabled for now due to flakiness with wikipedia as the backend + +pywikibot.config.base_dir = tempfile.TemporaryDirectory().name + @pytest.fixture def site() -> pywikibot.Site: @@ -80,6 +85,7 @@ def latest_revision(self) -> pywikibot.page.Revision: ) +@pytest.mark.skip(reason="Test disabled") def test_get_doc_from_page(site: pywikibot.Site) -> None: test_page = MockPage(site, "Test Page", _has_categories=True) doc = wiki.get_doc_from_page(test_page, site, wiki.DocumentSource.MEDIAWIKI) @@ -100,9 +106,10 @@ def test_get_doc_from_page(site: pywikibot.Site) -> None: assert doc.metadata == { "categories": [category.title() for category in test_page.categories()] } - assert doc.id == test_page.pageid + assert doc.id == f"MEDIAWIKI_{test_page.pageid}_{test_page.full_url()}" +@pytest.mark.skip(reason="Test disabled") def test_mediawiki_connector_recurse_depth() -> None: """Test that the recurse_depth parameter is parsed correctly. @@ -132,6 +139,7 @@ def test_mediawiki_connector_recurse_depth() -> None: assert connector.recurse_depth == recurse_depth +@pytest.mark.skip(reason="Test disabled") def test_load_from_state_calls_poll_source_with_nones(mocker: MockFixture) -> None: connector = wiki.MediaWikiConnector("wikipedia.org", [], [], 0, "test") poll_source = mocker.patch.object(connector, "poll_source") diff --git a/backend/tests/unit/danswer/llm/answering/conftest.py b/backend/tests/unit/danswer/llm/answering/conftest.py new file mode 100644 index 00000000000..a0077b53917 --- /dev/null +++ b/backend/tests/unit/danswer/llm/answering/conftest.py @@ -0,0 +1,113 @@ +import json +from datetime import datetime +from unittest.mock import MagicMock + +import pytest +from langchain_core.messages import SystemMessage + +from danswer.chat.models import LlmDoc +from danswer.configs.constants import DocumentSource +from danswer.llm.answering.models import AnswerStyleConfig +from danswer.llm.answering.models import CitationConfig +from danswer.llm.answering.models import PromptConfig +from danswer.llm.answering.prompts.build import AnswerPromptBuilder +from danswer.llm.interfaces import LLMConfig +from danswer.tools.models import ToolResponse +from danswer.tools.tool_implementations.search.search_tool import SearchTool +from danswer.tools.tool_implementations.search_like_tool_utils import ( + FINAL_CONTEXT_DOCUMENTS_ID, +) + +QUERY = "Test question" +DEFAULT_SEARCH_ARGS = {"query": "search"} + + +@pytest.fixture +def answer_style_config() -> AnswerStyleConfig: + return AnswerStyleConfig(citation_config=CitationConfig()) + + +@pytest.fixture +def prompt_config() -> PromptConfig: + return PromptConfig( + system_prompt="System prompt", + task_prompt="Task prompt", + datetime_aware=False, + include_citations=True, + ) + + +@pytest.fixture +def mock_llm() -> MagicMock: + mock_llm_obj = MagicMock() + mock_llm_obj.config = LLMConfig( + model_provider="openai", + model_name="gpt-4o", + temperature=0.0, + api_key=None, + api_base=None, + api_version=None, + ) + return mock_llm_obj + + +@pytest.fixture +def mock_search_results() -> list[LlmDoc]: + return [ + LlmDoc( + content="Search result 1", + source_type=DocumentSource.WEB, + metadata={"id": "doc1"}, + document_id="doc1", + blurb="Blurb 1", + semantic_identifier="Semantic ID 1", + updated_at=datetime(2023, 1, 1), + link="https://example.com/doc1", + source_links={0: "https://example.com/doc1"}, + ), + LlmDoc( + content="Search result 2", + source_type=DocumentSource.WEB, + metadata={"id": "doc2"}, + document_id="doc2", + blurb="Blurb 2", + semantic_identifier="Semantic ID 2", + updated_at=datetime(2023, 1, 2), + link="https://example.com/doc2", + source_links={0: "https://example.com/doc2"}, + ), + ] + + +@pytest.fixture +def mock_search_tool(mock_search_results: list[LlmDoc]) -> MagicMock: + mock_tool = MagicMock(spec=SearchTool) + mock_tool.name = "search" + mock_tool.build_tool_message_content.return_value = "search_response" + mock_tool.get_args_for_non_tool_calling_llm.return_value = DEFAULT_SEARCH_ARGS + mock_tool.final_result.return_value = [ + json.loads(doc.model_dump_json()) for doc in mock_search_results + ] + mock_tool.run.return_value = [ + ToolResponse(id=FINAL_CONTEXT_DOCUMENTS_ID, response=mock_search_results) + ] + mock_tool.tool_definition.return_value = { + "type": "function", + "function": { + "name": "search", + "description": "Search for information", + "parameters": { + "type": "object", + "properties": { + "query": {"type": "string", "description": "The search query"}, + }, + "required": ["query"], + }, + }, + } + mock_post_search_tool_prompt_builder = MagicMock(spec=AnswerPromptBuilder) + mock_post_search_tool_prompt_builder.build.return_value = [ + SystemMessage(content="Updated system prompt"), + ] + mock_tool.build_next_prompt.return_value = mock_post_search_tool_prompt_builder + return mock_tool diff --git a/backend/tests/unit/danswer/llm/answering/stream_processing/test_citation_processing.py b/backend/tests/unit/danswer/llm/answering/stream_processing/test_citation_processing.py index 12e3254d6d6..e6a5fe1f027 100644 --- a/backend/tests/unit/danswer/llm/answering/stream_processing/test_citation_processing.py +++ b/backend/tests/unit/danswer/llm/answering/stream_processing/test_citation_processing.py @@ -7,7 +7,7 @@ from danswer.chat.models import LlmDoc from danswer.configs.constants import DocumentSource from danswer.llm.answering.stream_processing.citation_processing import ( - extract_citations_from_stream, + CitationProcessor, ) from danswer.llm.answering.stream_processing.utils import DocumentIdOrderMapping @@ -70,14 +70,16 @@ def process_text( ) -> tuple[str, list[CitationInfo]]: mock_docs, mock_doc_id_to_rank_map = mock_data mapping = DocumentIdOrderMapping(order_mapping=mock_doc_id_to_rank_map) - result = list( - extract_citations_from_stream( - tokens=iter(tokens), - context_docs=mock_docs, - doc_id_to_rank_map=mapping, - stop_stream=None, - ) + processor = CitationProcessor( + context_docs=mock_docs, + doc_id_to_rank_map=mapping, + stop_stream=None, ) + result: list[DanswerAnswerPiece | CitationInfo] = [] + for token in tokens: + result.extend(processor.process_token(token)) + result.extend(processor.process_token(None)) + final_answer_text = "" citations = [] for piece in result: diff --git a/backend/tests/unit/danswer/llm/answering/stream_processing/test_quote_processing.py b/backend/tests/unit/danswer/llm/answering/stream_processing/test_quote_processing.py index e80c5c4f657..c154c5a5b0c 100644 --- a/backend/tests/unit/danswer/llm/answering/stream_processing/test_quote_processing.py +++ b/backend/tests/unit/danswer/llm/answering/stream_processing/test_quote_processing.py @@ -6,7 +6,7 @@ from danswer.chat.models import LlmDoc from danswer.configs.constants import DocumentSource from danswer.llm.answering.stream_processing.quotes_processing import ( - process_model_tokens, + QuotesProcessor, ) mock_docs = [ @@ -25,179 +25,202 @@ ] -tokens_with_quotes = [ - "{", - "\n ", - '"answer": "Yes', - ", Danswer allows", - " customized prompts. This", - " feature", - " is currently being", - " developed and implemente", - "d to", - " improve", - " the accuracy", - " of", - " Language", - " Models (", - "LL", - "Ms) for", - " different", - " companies", - ".", - " The custom", - "ized prompts feature", - " woul", - "d allow users to ad", - "d person", - "alized prom", - "pts through", - " an", - " interface or", - " metho", - "d,", - " which would then be used to", - " train", - " the LLM.", - " This enhancement", - " aims to make", - " Danswer more", - " adaptable to", - " different", - " business", - " contexts", - " by", - " tail", - "oring it", - " to the specific language", - " an", - "d terminology", - " used within", - " a", - " company.", - " Additionally", - ",", - " Danswer already", - " supports creating", - " custom AI", - " Assistants with", - " different", - " prom", - "pts and backing", - " knowledge", - " sets", - ",", - " which", - " is", - " a form", - " of prompt", - " customization. However, it", - "'s important to nLogging Details LiteLLM-Success Call: Noneote that some", - " aspects", - " of prompt", - " customization,", - " such as for", - " Sl", - "ack", - "b", - "ots, may", - " still", - " be in", - " development or have", - ' limitations.",', - '\n "quotes": [', - '\n "We', - " woul", - "d like to ad", - "d customized prompts for", - " different", - " companies to improve the accuracy of", - " Language", - " Model", - " (LLM)", - '.",\n "A', - " new", - " feature that", - " allows users to add personalize", - "d prompts.", - " This would involve", - " creating", - " an interface or method for", - " users to input", - " their", - " own", - " prom", - "pts,", - " which would then be used to", - ' train the LLM.",', - '\n "Create', - " custom AI Assistants with", - " different prompts and backing knowledge", - ' sets.",', - '\n "This', - " PR", - " fixes", - " https", - "://github.com/dan", - "swer-ai/dan", - "swer/issues/1", - "584", - " by", - " setting", - " the system", - " default", - " prompt for", - " sl", - "ackbots const", - "rained by", - " ", - "document sets", - ".", - " It", - " probably", - " isn", - "'t ideal", - " -", - " it", - " might", - " be pref", - "erable to be", - " able to select", - " a prompt for", - " the", - " slackbot from", - " the", - " admin", - " panel", - " -", - " but it sol", - "ves the immediate problem", - " of", - " the slack", - " listener", - " cr", - "ashing when", - " configure", - "d this", - ' way."\n ]', - "\n}", - "", -] +def _process_tokens( + processor: QuotesProcessor, tokens: list[str] +) -> tuple[str, list[str]]: + """Process a list of tokens and return the answer and quotes. + + Args: + processor: QuotesProcessor instance + tokens: List of tokens to process + + Returns: + Tuple of (answer_text, list_of_quotes) + """ + answer = "" + quotes: list[str] = [] + + # need to add a None to the end to simulate the end of the stream + for token in tokens + [None]: + for output in processor.process_token(token): + if isinstance(output, DanswerAnswerPiece): + if output.answer_piece: + answer += output.answer_piece + elif isinstance(output, DanswerQuotes): + quotes.extend(q.quote for q in output.quotes) + + return answer, quotes def test_process_model_tokens_answer() -> None: - gen = process_model_tokens(tokens=iter(tokens_with_quotes), context_docs=mock_docs) + tokens_with_quotes = [ + "{", + "\n ", + '"answer": "Yes', + ", Danswer allows", + " customized prompts. This", + " feature", + " is currently being", + " developed and implemente", + "d to", + " improve", + " the accuracy", + " of", + " Language", + " Models (", + "LL", + "Ms) for", + " different", + " companies", + ".", + " The custom", + "ized prompts feature", + " woul", + "d allow users to ad", + "d person", + "alized prom", + "pts through", + " an", + " interface or", + " metho", + "d,", + " which would then be used to", + " train", + " the LLM.", + " This enhancement", + " aims to make", + " Danswer more", + " adaptable to", + " different", + " business", + " contexts", + " by", + " tail", + "oring it", + " to the specific language", + " an", + "d terminology", + " used within", + " a", + " company.", + " Additionally", + ",", + " Danswer already", + " supports creating", + " custom AI", + " Assistants with", + " different", + " prom", + "pts and backing", + " knowledge", + " sets", + ",", + " which", + " is", + " a form", + " of prompt", + " customization. However, it", + "'s important to nLogging Details LiteLLM-Success Call: Noneote that some", + " aspects", + " of prompt", + " customization,", + " such as for", + " Sl", + "ack", + "b", + "ots, may", + " still", + " be in", + " development or have", + ' limitations.",', + '\n "quotes": [', + '\n "We', + " woul", + "d like to ad", + "d customized prompts for", + " different", + " companies to improve the accuracy of", + " Language", + " Model", + " (LLM)", + '.",\n "A', + " new", + " feature that", + " allows users to add personalize", + "d prompts.", + " This would involve", + " creating", + " an interface or method for", + " users to input", + " their", + " own", + " prom", + "pts,", + " which would then be used to", + ' train the LLM.",', + '\n "Create', + " custom AI Assistants with", + " different prompts and backing knowledge", + ' sets.",', + '\n "This', + " PR", + " fixes", + " https", + "://github.com/dan", + "swer-ai/dan", + "swer/issues/1", + "584", + " by", + " setting", + " the system", + " default", + " prompt for", + " sl", + "ackbots const", + "rained by", + " ", + "document sets", + ".", + " It", + " probably", + " isn", + "'t ideal", + " -", + " it", + " might", + " be pref", + "erable to be", + " able to select", + " a prompt for", + " the", + " slackbot from", + " the", + " admin", + " panel", + " -", + " but it sol", + "ves the immediate problem", + " of", + " the slack", + " listener", + " cr", + "ashing when", + " configure", + "d this", + ' way."\n ]', + "\n}", + "", + ] + + processor = QuotesProcessor(context_docs=mock_docs) + answer, quotes = _process_tokens(processor, tokens_with_quotes) s_json = "".join(tokens_with_quotes) j = json.loads(s_json) expected_answer = j["answer"] - actual = "" - for o in gen: - if isinstance(o, DanswerAnswerPiece): - if o.answer_piece: - actual += o.answer_piece - - assert expected_answer == actual + assert expected_answer == answer + # NOTE: no quotes, since the docs don't match the quotes + assert len(quotes) == 0 def test_simple_json_answer() -> None: @@ -214,16 +237,11 @@ def test_simple_json_answer() -> None: "\n", "```", ] - gen = process_model_tokens(tokens=iter(tokens), context_docs=mock_docs) - - expected_answer = "This is a simple answer." - actual = "".join( - o.answer_piece - for o in gen - if isinstance(o, DanswerAnswerPiece) and o.answer_piece - ) + processor = QuotesProcessor(context_docs=mock_docs) + answer, quotes = _process_tokens(processor, tokens) - assert expected_answer == actual + assert "This is a simple answer." == answer + assert len(quotes) == 0 def test_json_answer_with_quotes() -> None: @@ -242,16 +260,21 @@ def test_json_answer_with_quotes() -> None: "\n", "```", ] - gen = process_model_tokens(tokens=iter(tokens), context_docs=mock_docs) + processor = QuotesProcessor(context_docs=mock_docs) + answer, quotes = _process_tokens(processor, tokens) + + assert "This is a split answer." == answer + assert len(quotes) == 0 - expected_answer = "This is a split answer." - actual = "".join( - o.answer_piece - for o in gen - if isinstance(o, DanswerAnswerPiece) and o.answer_piece - ) - assert expected_answer == actual +def test_json_answer_with_quotes_one_chunk() -> None: + tokens = ['```json\n{"answer": "z",\n"quotes": ["Document"]\n}\n```'] + processor = QuotesProcessor(context_docs=mock_docs) + answer, quotes = _process_tokens(processor, tokens) + + assert "z" == answer + assert len(quotes) == 1 + assert quotes[0] == "Document" def test_json_answer_split_tokens() -> None: @@ -271,16 +294,11 @@ def test_json_answer_split_tokens() -> None: "\n", "```", ] - gen = process_model_tokens(tokens=iter(tokens), context_docs=mock_docs) - - expected_answer = "This is a split answer." - actual = "".join( - o.answer_piece - for o in gen - if isinstance(o, DanswerAnswerPiece) and o.answer_piece - ) + processor = QuotesProcessor(context_docs=mock_docs) + answer, quotes = _process_tokens(processor, tokens) - assert expected_answer == actual + assert "This is a split answer." == answer + assert len(quotes) == 0 def test_lengthy_prefixed_json_with_quotes() -> None: @@ -298,23 +316,12 @@ def test_lengthy_prefixed_json_with_quotes() -> None: "\n", "```", ] + processor = QuotesProcessor(context_docs=mock_docs) + answer, quotes = _process_tokens(processor, tokens) - gen = process_model_tokens(tokens=iter(tokens), context_docs=mock_docs) - - actual_answer = "" - actual_count = 0 - for o in gen: - if isinstance(o, DanswerAnswerPiece): - if o.answer_piece: - actual_answer += o.answer_piece - continue - - if isinstance(o, DanswerQuotes): - for q in o.quotes: - assert q.quote == "Document" - actual_count += 1 - assert "This is a simple answer." == actual_answer - assert 1 == actual_count + assert "This is a simple answer." == answer + assert len(quotes) == 1 + assert quotes[0] == "Document" def test_prefixed_json_with_quotes() -> None: @@ -331,21 +338,9 @@ def test_prefixed_json_with_quotes() -> None: "\n", "```", ] + processor = QuotesProcessor(context_docs=mock_docs) + answer, quotes = _process_tokens(processor, tokens) - gen = process_model_tokens(tokens=iter(tokens), context_docs=mock_docs) - - actual_answer = "" - actual_count = 0 - for o in gen: - if isinstance(o, DanswerAnswerPiece): - if o.answer_piece: - actual_answer += o.answer_piece - continue - - if isinstance(o, DanswerQuotes): - for q in o.quotes: - assert q.quote == "Document" - actual_count += 1 - - assert "This is a simple answer." == actual_answer - assert 1 == actual_count + assert "This is a simple answer." == answer + assert len(quotes) == 1 + assert quotes[0] == "Document" diff --git a/backend/tests/unit/danswer/llm/answering/test_answer.py b/backend/tests/unit/danswer/llm/answering/test_answer.py new file mode 100644 index 00000000000..96c791cd47b --- /dev/null +++ b/backend/tests/unit/danswer/llm/answering/test_answer.py @@ -0,0 +1,405 @@ +import json +from typing import cast +from unittest.mock import MagicMock +from unittest.mock import Mock + +import pytest +from langchain_core.messages import AIMessageChunk +from langchain_core.messages import BaseMessage +from langchain_core.messages import HumanMessage +from langchain_core.messages import SystemMessage +from langchain_core.messages import ToolCall +from langchain_core.messages import ToolCallChunk + +from danswer.chat.models import CitationInfo +from danswer.chat.models import DanswerAnswerPiece +from danswer.chat.models import DanswerQuote +from danswer.chat.models import DanswerQuotes +from danswer.chat.models import LlmDoc +from danswer.chat.models import StreamStopInfo +from danswer.chat.models import StreamStopReason +from danswer.llm.answering.answer import Answer +from danswer.llm.answering.models import AnswerStyleConfig +from danswer.llm.answering.models import PromptConfig +from danswer.llm.answering.models import QuotesConfig +from danswer.llm.interfaces import LLM +from danswer.tools.force import ForceUseTool +from danswer.tools.models import ToolCallFinalResult +from danswer.tools.models import ToolCallKickoff +from danswer.tools.models import ToolResponse +from tests.unit.danswer.llm.answering.conftest import DEFAULT_SEARCH_ARGS +from tests.unit.danswer.llm.answering.conftest import QUERY + + +@pytest.fixture +def answer_instance( + mock_llm: LLM, answer_style_config: AnswerStyleConfig, prompt_config: PromptConfig +) -> Answer: + return Answer( + question=QUERY, + answer_style_config=answer_style_config, + llm=mock_llm, + prompt_config=prompt_config, + force_use_tool=ForceUseTool(force_use=False, tool_name="", args=None), + ) + + +def test_basic_answer(answer_instance: Answer) -> None: + mock_llm = cast(Mock, answer_instance.llm) + mock_llm.stream.return_value = [ + AIMessageChunk(content="This is a "), + AIMessageChunk(content="mock answer."), + ] + + output = list(answer_instance.processed_streamed_output) + assert len(output) == 2 + assert isinstance(output[0], DanswerAnswerPiece) + assert isinstance(output[1], DanswerAnswerPiece) + + full_answer = "".join( + piece.answer_piece + for piece in output + if isinstance(piece, DanswerAnswerPiece) and piece.answer_piece is not None + ) + assert full_answer == "This is a mock answer." + + assert answer_instance.llm_answer == "This is a mock answer." + assert answer_instance.citations == [] + + assert mock_llm.stream.call_count == 1 + mock_llm.stream.assert_called_once_with( + prompt=[ + SystemMessage(content="System prompt"), + HumanMessage(content="Task prompt\n\nQUERY:\nTest question"), + ], + tools=None, + tool_choice=None, + structured_response_format=None, + ) + + +@pytest.mark.parametrize( + "force_use_tool, expected_tool_args", + [ + ( + ForceUseTool(force_use=False, tool_name="", args=None), + DEFAULT_SEARCH_ARGS, + ), + ( + ForceUseTool( + force_use=True, tool_name="search", args={"query": "forced search"} + ), + {"query": "forced search"}, + ), + ], +) +def test_answer_with_search_call( + answer_instance: Answer, + mock_search_results: list[LlmDoc], + mock_search_tool: MagicMock, + force_use_tool: ForceUseTool, + expected_tool_args: dict, +) -> None: + answer_instance.tools = [mock_search_tool] + answer_instance.force_use_tool = force_use_tool + + # Set up the LLM mock to return search results and then an answer + mock_llm = cast(Mock, answer_instance.llm) + + stream_side_effect: list[list[BaseMessage]] = [] + + if not force_use_tool.force_use: + tool_call_chunk = AIMessageChunk(content="") + tool_call_chunk.tool_calls = [ + ToolCall( + id="search", + name="search", + args=expected_tool_args, + ) + ] + tool_call_chunk.tool_call_chunks = [ + ToolCallChunk( + id="search", + name="search", + args=json.dumps(expected_tool_args), + index=0, + ) + ] + stream_side_effect.append([tool_call_chunk]) + + stream_side_effect.append( + [ + AIMessageChunk(content="Based on the search results, "), + AIMessageChunk(content="the answer is abc[1]. "), + AIMessageChunk(content="This is some other stuff."), + ], + ) + mock_llm.stream.side_effect = stream_side_effect + + # Process the output + output = list(answer_instance.processed_streamed_output) + print(output) + + # Updated assertions + assert len(output) == 7 + assert output[0] == ToolCallKickoff( + tool_name="search", tool_args=expected_tool_args + ) + assert output[1] == ToolResponse( + id="final_context_documents", + response=mock_search_results, + ) + assert output[2] == ToolCallFinalResult( + tool_name="search", + tool_args=expected_tool_args, + tool_result=[json.loads(doc.model_dump_json()) for doc in mock_search_results], + ) + assert output[3] == DanswerAnswerPiece(answer_piece="Based on the search results, ") + expected_citation = CitationInfo(citation_num=1, document_id="doc1") + assert output[4] == expected_citation + assert output[5] == DanswerAnswerPiece( + answer_piece="the answer is abc[[1]](https://example.com/doc1). " + ) + assert output[6] == DanswerAnswerPiece(answer_piece="This is some other stuff.") + + expected_answer = ( + "Based on the search results, " + "the answer is abc[[1]](https://example.com/doc1). " + "This is some other stuff." + ) + full_answer = "".join( + piece.answer_piece + for piece in output + if isinstance(piece, DanswerAnswerPiece) and piece.answer_piece is not None + ) + assert full_answer == expected_answer + + assert answer_instance.llm_answer == expected_answer + assert len(answer_instance.citations) == 1 + assert answer_instance.citations[0] == expected_citation + + # Verify LLM calls + if not force_use_tool.force_use: + assert mock_llm.stream.call_count == 2 + first_call, second_call = mock_llm.stream.call_args_list + + # First call should include the search tool definition + assert len(first_call.kwargs["tools"]) == 1 + assert ( + first_call.kwargs["tools"][0] + == mock_search_tool.tool_definition.return_value + ) + + # Second call should not include tools (as we're just generating the final answer) + assert "tools" not in second_call.kwargs or not second_call.kwargs["tools"] + # Second call should use the returned prompt from build_next_prompt + assert ( + second_call.kwargs["prompt"] + == mock_search_tool.build_next_prompt.return_value.build.return_value + ) + + # Verify that tool_definition was called on the mock_search_tool + mock_search_tool.tool_definition.assert_called_once() + else: + assert mock_llm.stream.call_count == 1 + + call = mock_llm.stream.call_args_list[0] + assert ( + call.kwargs["prompt"] + == mock_search_tool.build_next_prompt.return_value.build.return_value + ) + + +def test_answer_with_search_no_tool_calling( + answer_instance: Answer, + mock_search_results: list[LlmDoc], + mock_search_tool: MagicMock, +) -> None: + answer_instance.tools = [mock_search_tool] + + # Set up the LLM mock to return an answer + mock_llm = cast(Mock, answer_instance.llm) + mock_llm.stream.return_value = [ + AIMessageChunk(content="Based on the search results, "), + AIMessageChunk(content="the answer is abc[1]. "), + AIMessageChunk(content="This is some other stuff."), + ] + + # Force non-tool calling behavior + answer_instance.using_tool_calling_llm = False + + # Process the output + output = list(answer_instance.processed_streamed_output) + + # Assertions + assert len(output) == 7 + assert output[0] == ToolCallKickoff( + tool_name="search", tool_args=DEFAULT_SEARCH_ARGS + ) + assert output[1] == ToolResponse( + id="final_context_documents", + response=mock_search_results, + ) + assert output[2] == ToolCallFinalResult( + tool_name="search", + tool_args=DEFAULT_SEARCH_ARGS, + tool_result=[json.loads(doc.model_dump_json()) for doc in mock_search_results], + ) + assert output[3] == DanswerAnswerPiece(answer_piece="Based on the search results, ") + expected_citation = CitationInfo(citation_num=1, document_id="doc1") + assert output[4] == expected_citation + assert output[5] == DanswerAnswerPiece( + answer_piece="the answer is abc[[1]](https://example.com/doc1). " + ) + assert output[6] == DanswerAnswerPiece(answer_piece="This is some other stuff.") + + expected_answer = ( + "Based on the search results, " + "the answer is abc[[1]](https://example.com/doc1). " + "This is some other stuff." + ) + assert answer_instance.llm_answer == expected_answer + assert len(answer_instance.citations) == 1 + assert answer_instance.citations[0] == expected_citation + + # Verify LLM calls + assert mock_llm.stream.call_count == 1 + call_args = mock_llm.stream.call_args + + # Verify that no tools were passed to the LLM + assert "tools" not in call_args.kwargs or not call_args.kwargs["tools"] + + # Verify that the prompt was built correctly + assert ( + call_args.kwargs["prompt"] + == mock_search_tool.build_next_prompt.return_value.build.return_value + ) + + # Verify that get_args_for_non_tool_calling_llm was called on the mock_search_tool + mock_search_tool.get_args_for_non_tool_calling_llm.assert_called_once_with( + f"Task prompt\n\nQUERY:\n{QUERY}", [], answer_instance.llm + ) + + # Verify that the search tool's run method was called + mock_search_tool.run.assert_called_once() + + +def test_answer_with_search_call_quotes_enabled( + answer_instance: Answer, + mock_search_results: list[LlmDoc], + mock_search_tool: MagicMock, +) -> None: + answer_instance.tools = [mock_search_tool] + answer_instance.force_use_tool = ForceUseTool( + force_use=False, tool_name="", args=None + ) + answer_instance.answer_style_config.citation_config = None + answer_instance.answer_style_config.quotes_config = QuotesConfig() + + # Set up the LLM mock to return search results and then an answer + mock_llm = cast(Mock, answer_instance.llm) + + tool_call_chunk = AIMessageChunk(content="") + tool_call_chunk.tool_calls = [ + ToolCall( + id="search", + name="search", + args=DEFAULT_SEARCH_ARGS, + ) + ] + tool_call_chunk.tool_call_chunks = [ + ToolCallChunk( + id="search", + name="search", + args=json.dumps(DEFAULT_SEARCH_ARGS), + index=0, + ) + ] + + # needs to be short due to the "anti-hallucination" check in QuotesProcessor + answer_content = "z" + quote_content = mock_search_results[0].content + mock_llm.stream.side_effect = [ + [tool_call_chunk], + [ + AIMessageChunk( + content=( + '{"answer": "' + + answer_content + + '", "quotes": ["' + + quote_content + + '"]}' + ) + ), + ], + ] + + # Process the output + output = list(answer_instance.processed_streamed_output) + + # Assertions + assert len(output) == 5 + assert output[0] == ToolCallKickoff( + tool_name="search", tool_args=DEFAULT_SEARCH_ARGS + ) + assert output[1] == ToolResponse( + id="final_context_documents", + response=mock_search_results, + ) + assert output[2] == ToolCallFinalResult( + tool_name="search", + tool_args=DEFAULT_SEARCH_ARGS, + tool_result=[json.loads(doc.model_dump_json()) for doc in mock_search_results], + ) + assert output[3] == DanswerAnswerPiece(answer_piece=answer_content) + assert output[4] == DanswerQuotes( + quotes=[ + DanswerQuote( + quote=quote_content, + document_id=mock_search_results[0].document_id, + link=mock_search_results[0].link, + source_type=mock_search_results[0].source_type, + semantic_identifier=mock_search_results[0].semantic_identifier, + blurb=mock_search_results[0].blurb, + ) + ] + ) + + assert answer_instance.llm_answer == answer_content + + +def test_is_cancelled(answer_instance: Answer) -> None: + # Set up the LLM mock to return multiple chunks + mock_llm = Mock() + answer_instance.llm = mock_llm + mock_llm.stream.return_value = [ + AIMessageChunk(content="This is the "), + AIMessageChunk(content="first part."), + AIMessageChunk(content="This should not be seen."), + ] + + # Create a mutable object to control is_connected behavior + connection_status = {"connected": True} + answer_instance.is_connected = lambda: connection_status["connected"] + + # Process the output + output = [] + for i, chunk in enumerate(answer_instance.processed_streamed_output): + output.append(chunk) + # Simulate disconnection after the second chunk + if i == 1: + connection_status["connected"] = False + + assert len(output) == 3 + assert output[0] == DanswerAnswerPiece(answer_piece="This is the ") + assert output[1] == DanswerAnswerPiece(answer_piece="first part.") + assert output[2] == StreamStopInfo(stop_reason=StreamStopReason.CANCELLED) + + # Verify that the stream was cancelled + assert answer_instance.is_cancelled() is True + + # Verify that the final answer only contains the streamed parts + assert answer_instance.llm_answer == "This is the first part." + + # Verify LLM calls + mock_llm.stream.assert_called_once() diff --git a/backend/tests/unit/danswer/llm/answering/test_skip_gen_ai.py b/backend/tests/unit/danswer/llm/answering/test_skip_gen_ai.py new file mode 100644 index 00000000000..7bd4a498bd7 --- /dev/null +++ b/backend/tests/unit/danswer/llm/answering/test_skip_gen_ai.py @@ -0,0 +1,139 @@ +from typing import Any +from typing import cast +from unittest.mock import Mock + +import pytest +from pytest_mock import MockerFixture + +from danswer.llm.answering.answer import Answer +from danswer.llm.answering.models import AnswerStyleConfig +from danswer.llm.answering.models import PromptConfig +from danswer.one_shot_answer.answer_question import AnswerObjectIterator +from danswer.tools.force import ForceUseTool +from danswer.tools.tool_implementations.search.search_tool import SearchTool +from tests.regression.answer_quality.run_qa import _process_and_write_query_results + + +@pytest.mark.parametrize( + "config", + [ + { + "skip_gen_ai_answer_generation": True, + "question": "What is the capital of the moon?", + }, + { + "skip_gen_ai_answer_generation": False, + "question": "What is the capital of the moon but twice?", + }, + ], +) +def test_skip_gen_ai_answer_generation_flag( + config: dict[str, Any], + mock_search_tool: SearchTool, + answer_style_config: AnswerStyleConfig, + prompt_config: PromptConfig, +) -> None: + question = config["question"] + skip_gen_ai_answer_generation = config["skip_gen_ai_answer_generation"] + + mock_llm = Mock() + mock_llm.config = Mock() + mock_llm.config.model_name = "gpt-4o-mini" + mock_llm.stream = Mock() + mock_llm.stream.return_value = [Mock()] + answer = Answer( + question=question, + answer_style_config=answer_style_config, + prompt_config=prompt_config, + llm=mock_llm, + single_message_history="history", + tools=[mock_search_tool], + force_use_tool=( + ForceUseTool( + tool_name=mock_search_tool.name, + args={"query": question}, + force_use=True, + ) + ), + skip_explicit_tool_calling=True, + return_contexts=True, + skip_gen_ai_answer_generation=skip_gen_ai_answer_generation, + ) + count = 0 + for _ in cast(AnswerObjectIterator, answer.processed_streamed_output): + count += 1 + assert count == 3 if skip_gen_ai_answer_generation else 4 + if not skip_gen_ai_answer_generation: + mock_llm.stream.assert_called_once() + else: + mock_llm.stream.assert_not_called() + + +##### From here down is the client side test that was not working ##### + + +class FinishedTestException(Exception): + pass + + +# could not get this to work, it seems like the mock is not being used +# tests that the main run_qa function passes the skip_gen_ai_answer_generation flag to the Answer object +@pytest.mark.parametrize( + "config, questions", + [ + ( + { + "skip_gen_ai_answer_generation": True, + "output_folder": "./test_output_folder", + "zipped_documents_file": "./test_docs.jsonl", + "questions_file": "./test_questions.jsonl", + "commit_sha": None, + "launch_web_ui": False, + "only_retrieve_docs": True, + "use_cloud_gpu": False, + "model_server_ip": "PUT_PUBLIC_CLOUD_IP_HERE", + "model_server_port": "PUT_PUBLIC_CLOUD_PORT_HERE", + "environment_name": "", + "env_name": "", + "limit": None, + }, + [{"uid": "1", "question": "What is the capital of the moon?"}], + ), + ( + { + "skip_gen_ai_answer_generation": False, + "output_folder": "./test_output_folder", + "zipped_documents_file": "./test_docs.jsonl", + "questions_file": "./test_questions.jsonl", + "commit_sha": None, + "launch_web_ui": False, + "only_retrieve_docs": True, + "use_cloud_gpu": False, + "model_server_ip": "PUT_PUBLIC_CLOUD_IP_HERE", + "model_server_port": "PUT_PUBLIC_CLOUD_PORT_HERE", + "environment_name": "", + "env_name": "", + "limit": None, + }, + [{"uid": "1", "question": "What is the capital of the moon but twice?"}], + ), + ], +) +@pytest.mark.skip(reason="not working") +def test_run_qa_skip_gen_ai( + config: dict[str, Any], questions: list[dict[str, Any]], mocker: MockerFixture +) -> None: + mocker.patch( + "tests.regression.answer_quality.run_qa._initialize_files", + return_value=("test", questions), + ) + + def arg_checker(question_data: dict, config: dict, question_number: int) -> None: + assert question_data == questions[0] + raise FinishedTestException() + + mocker.patch( + "tests.regression.answer_quality.run_qa._process_question", arg_checker + ) + with pytest.raises(FinishedTestException): + _process_and_write_query_results(config) diff --git a/backend/tests/unit/danswer/llm/test_chat_llm.py b/backend/tests/unit/danswer/llm/test_chat_llm.py new file mode 100644 index 00000000000..efe0281f53c --- /dev/null +++ b/backend/tests/unit/danswer/llm/test_chat_llm.py @@ -0,0 +1,290 @@ +from unittest.mock import patch + +import litellm +import pytest +from langchain_core.messages import AIMessage +from langchain_core.messages import AIMessageChunk +from langchain_core.messages import HumanMessage +from litellm.types.utils import ChatCompletionDeltaToolCall +from litellm.types.utils import Delta +from litellm.types.utils import Function as LiteLLMFunction + +from danswer.llm.chat_llm import DefaultMultiLLM + + +def _create_delta( + role: str | None = None, + content: str | None = None, + tool_calls: list[ChatCompletionDeltaToolCall] | None = None, +) -> Delta: + delta = Delta(role=role, content=content) + # NOTE: for some reason, if you pass tool_calls to the constructor, it doesn't actually + # get set, so we have to do it this way + delta.tool_calls = tool_calls + return delta + + +@pytest.fixture +def default_multi_llm() -> DefaultMultiLLM: + return DefaultMultiLLM( + api_key="test_key", + timeout=30, + model_provider="openai", + model_name="gpt-3.5-turbo", + ) + + +def test_multiple_tool_calls(default_multi_llm: DefaultMultiLLM) -> None: + # Mock the litellm.completion function + with patch("danswer.llm.chat_llm.litellm.completion") as mock_completion: + # Create a mock response with multiple tool calls using litellm objects + mock_response = litellm.ModelResponse( + id="chatcmpl-123", + choices=[ + litellm.Choices( + finish_reason="tool_calls", + index=0, + message=litellm.Message( + content=None, + role="assistant", + tool_calls=[ + litellm.ChatCompletionMessageToolCall( + id="call_1", + function=LiteLLMFunction( + name="get_weather", + arguments='{"location": "New York"}', + ), + type="function", + ), + litellm.ChatCompletionMessageToolCall( + id="call_2", + function=LiteLLMFunction( + name="get_time", arguments='{"timezone": "EST"}' + ), + type="function", + ), + ], + ), + ) + ], + model="gpt-3.5-turbo", + usage=litellm.Usage( + prompt_tokens=50, completion_tokens=30, total_tokens=80 + ), + ) + mock_completion.return_value = mock_response + + # Define input messages + messages = [HumanMessage(content="What's the weather and time in New York?")] + + # Define available tools + tools = [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather for a location", + "parameters": { + "type": "object", + "properties": {"location": {"type": "string"}}, + "required": ["location"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "get_time", + "description": "Get the current time for a timezone", + "parameters": { + "type": "object", + "properties": {"timezone": {"type": "string"}}, + "required": ["timezone"], + }, + }, + }, + ] + + # Call the _invoke_implementation method + result = default_multi_llm.invoke(messages, tools) + + # Assert that the result is an AIMessage + assert isinstance(result, AIMessage) + + # Assert that the content is None (as per the mock response) + assert result.content == "" + + # Assert that there are two tool calls + assert len(result.tool_calls) == 2 + + # Assert the details of the first tool call + assert result.tool_calls[0]["id"] == "call_1" + assert result.tool_calls[0]["name"] == "get_weather" + assert result.tool_calls[0]["args"] == {"location": "New York"} + + # Assert the details of the second tool call + assert result.tool_calls[1]["id"] == "call_2" + assert result.tool_calls[1]["name"] == "get_time" + assert result.tool_calls[1]["args"] == {"timezone": "EST"} + + # Verify that litellm.completion was called with the correct arguments + mock_completion.assert_called_once_with( + model="openai/gpt-3.5-turbo", + api_key="test_key", + base_url=None, + api_version=None, + custom_llm_provider=None, + messages=[ + {"role": "user", "content": "What's the weather and time in New York?"} + ], + tools=tools, + tool_choice=None, + stream=False, + temperature=0.0, # Default value from GEN_AI_TEMPERATURE + timeout=30, + parallel_tool_calls=False, + ) + + +def test_multiple_tool_calls_streaming(default_multi_llm: DefaultMultiLLM) -> None: + # Mock the litellm.completion function + with patch("danswer.llm.chat_llm.litellm.completion") as mock_completion: + # Create a mock response with multiple tool calls using litellm objects + mock_response = [ + litellm.ModelResponse( + id="chatcmpl-123", + choices=[ + litellm.Choices( + delta=_create_delta( + role="assistant", + tool_calls=[ + ChatCompletionDeltaToolCall( + id="call_1", + function=LiteLLMFunction( + name="get_weather", arguments='{"location": ' + ), + type="function", + index=0, + ) + ], + ), + finish_reason=None, + index=0, + ) + ], + model="gpt-3.5-turbo", + ), + litellm.ModelResponse( + id="chatcmpl-123", + choices=[ + litellm.Choices( + delta=_create_delta( + tool_calls=[ + ChatCompletionDeltaToolCall( + id="", + function=LiteLLMFunction(arguments='"New York"}'), + type="function", + index=0, + ) + ] + ), + finish_reason=None, + index=0, + ) + ], + model="gpt-3.5-turbo", + ), + litellm.ModelResponse( + id="chatcmpl-123", + choices=[ + litellm.Choices( + delta=_create_delta( + tool_calls=[ + ChatCompletionDeltaToolCall( + id="call_2", + function=LiteLLMFunction( + name="get_time", arguments='{"timezone": "EST"}' + ), + type="function", + index=1, + ) + ] + ), + finish_reason="tool_calls", + index=0, + ) + ], + model="gpt-3.5-turbo", + ), + ] + mock_completion.return_value = mock_response + + # Define input messages and tools (same as in the non-streaming test) + messages = [HumanMessage(content="What's the weather and time in New York?")] + + tools = [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the current weather for a location", + "parameters": { + "type": "object", + "properties": {"location": {"type": "string"}}, + "required": ["location"], + }, + }, + }, + { + "type": "function", + "function": { + "name": "get_time", + "description": "Get the current time for a timezone", + "parameters": { + "type": "object", + "properties": {"timezone": {"type": "string"}}, + "required": ["timezone"], + }, + }, + }, + ] + + # Call the stream method + stream_result = list(default_multi_llm.stream(messages, tools)) + + # Assert that we received the correct number of chunks + assert len(stream_result) == 3 + + # Combine all chunks into a single AIMessage + combined_result: AIMessage = AIMessageChunk(content="") + for chunk in stream_result: + combined_result += chunk # type: ignore + + # Assert that the combined result matches our expectations + assert isinstance(combined_result, AIMessage) + assert combined_result.content == "" + assert len(combined_result.tool_calls) == 2 + assert combined_result.tool_calls[0]["id"] == "call_1" + assert combined_result.tool_calls[0]["name"] == "get_weather" + assert combined_result.tool_calls[0]["args"] == {"location": "New York"} + assert combined_result.tool_calls[1]["id"] == "call_2" + assert combined_result.tool_calls[1]["name"] == "get_time" + assert combined_result.tool_calls[1]["args"] == {"timezone": "EST"} + + # Verify that litellm.completion was called with the correct arguments + mock_completion.assert_called_once_with( + model="openai/gpt-3.5-turbo", + api_key="test_key", + base_url=None, + api_version=None, + custom_llm_provider=None, + messages=[ + {"role": "user", "content": "What's the weather and time in New York?"} + ], + tools=tools, + tool_choice=None, + stream=True, + temperature=0.0, # Default value from GEN_AI_TEMPERATURE + timeout=30, + parallel_tool_calls=False, + ) diff --git a/backend/tests/unit/danswer/tools/custom/test_custom_tools.py b/backend/tests/unit/danswer/tools/custom/test_custom_tools.py index fcc48b98d21..4d47a8761ff 100644 --- a/backend/tests/unit/danswer/tools/custom/test_custom_tools.py +++ b/backend/tests/unit/danswer/tools/custom/test_custom_tools.py @@ -1,17 +1,23 @@ import unittest +import uuid from typing import Any from unittest.mock import patch import pytest -from danswer.tools.custom.custom_tool import ( +from danswer.tools.models import DynamicSchemaInfo +from danswer.tools.models import ToolResponse +from danswer.tools.tool_implementations.custom.custom_tool import ( build_custom_tools_from_openapi_schema_and_headers, ) -from danswer.tools.custom.custom_tool import CUSTOM_TOOL_RESPONSE_ID -from danswer.tools.custom.custom_tool import CustomToolCallSummary -from danswer.tools.custom.custom_tool import validate_openapi_schema -from danswer.tools.models import DynamicSchemaInfo -from danswer.tools.tool import ToolResponse +from danswer.tools.tool_implementations.custom.custom_tool import ( + CUSTOM_TOOL_RESPONSE_ID, +) +from danswer.tools.tool_implementations.custom.custom_tool import CustomToolCallSummary +from danswer.tools.tool_implementations.custom.custom_tool import ( + validate_openapi_schema, +) +from danswer.utils.headers import HeaderItemDict class TestCustomTool(unittest.TestCase): @@ -73,10 +79,10 @@ def setUp(self) -> None: } validate_openapi_schema(self.openapi_schema) self.dynamic_schema_info: DynamicSchemaInfo = DynamicSchemaInfo( - chat_session_id=10, message_id=20 + chat_session_id=uuid.uuid4(), message_id=20 ) - @patch("danswer.tools.custom.custom_tool.requests.request") + @patch("danswer.tools.tool_implementations.custom.custom_tool.requests.request") def test_custom_tool_run_get(self, mock_request: unittest.mock.MagicMock) -> None: """ Test the GET method of a custom tool. @@ -104,7 +110,7 @@ def test_custom_tool_run_get(self, mock_request: unittest.mock.MagicMock) -> Non "Tool name in response does not match expected value", ) - @patch("danswer.tools.custom.custom_tool.requests.request") + @patch("danswer.tools.tool_implementations.custom.custom_tool.requests.request") def test_custom_tool_run_post(self, mock_request: unittest.mock.MagicMock) -> None: """ Test the POST method of a custom tool. @@ -134,7 +140,7 @@ def test_custom_tool_run_post(self, mock_request: unittest.mock.MagicMock) -> No "Tool name in response does not match expected value", ) - @patch("danswer.tools.custom.custom_tool.requests.request") + @patch("danswer.tools.tool_implementations.custom.custom_tool.requests.request") def test_custom_tool_with_headers( self, mock_request: unittest.mock.MagicMock ) -> None: @@ -142,7 +148,7 @@ def test_custom_tool_with_headers( Test the custom tool with custom headers. Verifies that the tool correctly includes the custom headers in the request. """ - custom_headers: list[dict[str, str]] = [ + custom_headers: list[HeaderItemDict] = [ {"key": "Authorization", "value": "Bearer token123"}, {"key": "Custom-Header", "value": "CustomValue"}, ] @@ -162,7 +168,7 @@ def test_custom_tool_with_headers( "GET", expected_url, json=None, headers=expected_headers ) - @patch("danswer.tools.custom.custom_tool.requests.request") + @patch("danswer.tools.tool_implementations.custom.custom_tool.requests.request") def test_custom_tool_with_empty_headers( self, mock_request: unittest.mock.MagicMock ) -> None: @@ -170,7 +176,7 @@ def test_custom_tool_with_empty_headers( Test the custom tool with an empty list of custom headers. Verifies that the tool correctly handles an empty list of headers. """ - custom_headers: list[dict[str, str]] = [] + custom_headers: list[HeaderItemDict] = [] tools = build_custom_tools_from_openapi_schema_and_headers( self.openapi_schema, custom_headers=custom_headers, @@ -209,6 +215,7 @@ def test_custom_tool_final_result(self) -> None: mock_response = ToolResponse( id=CUSTOM_TOOL_RESPONSE_ID, response=CustomToolCallSummary( + response_type="json", tool_name="getAssistant", tool_result={"id": "789", "name": "Final Assistant"}, ), diff --git a/deployment/cloud_kubernetes/hpa/workers_hpa.yaml b/deployment/cloud_kubernetes/hpa/workers_hpa.yaml new file mode 100644 index 00000000000..fd24b9eeac3 --- /dev/null +++ b/deployment/cloud_kubernetes/hpa/workers_hpa.yaml @@ -0,0 +1,56 @@ +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: celery-worker-heavy-hpa +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: celery-worker-heavy + minReplicas: 1 + maxReplicas: 5 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 60 +--- +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: celery-worker-light-hpa +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: celery-worker-light + minReplicas: 1 + maxReplicas: 10 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 70 +--- +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: celery-worker-indexing-hpa +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: celery-worker-indexing + minReplicas: 1 + maxReplicas: 10 + metrics: + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: 70 diff --git a/deployment/cloud_kubernetes/workers/beat.yaml b/deployment/cloud_kubernetes/workers/beat.yaml new file mode 100644 index 00000000000..a9d053f7295 --- /dev/null +++ b/deployment/cloud_kubernetes/workers/beat.yaml @@ -0,0 +1,44 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: celery-beat +spec: + replicas: 1 + selector: + matchLabels: + app: celery-beat + template: + metadata: + labels: + app: celery-beat + spec: + containers: + - name: celery-beat + image: danswer/danswer-backend:v0.11.0-cloud.beta.4 + imagePullPolicy: Always + command: + [ + "celery", + "-A", + "danswer.background.celery.versioned_apps.beat", + "beat", + "--loglevel=INFO", + ] + env: + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + name: danswer-secrets + key: redis_password + - name: DANSWER_VERSION + value: "v0.11.0-cloud.beta.4" + envFrom: + - configMapRef: + name: env-configmap + resources: + requests: + cpu: "250m" + memory: "512Mi" + limits: + cpu: "500m" + memory: "1Gi" diff --git a/deployment/cloud_kubernetes/workers/heavy_worker.yaml b/deployment/cloud_kubernetes/workers/heavy_worker.yaml new file mode 100644 index 00000000000..682cadee647 --- /dev/null +++ b/deployment/cloud_kubernetes/workers/heavy_worker.yaml @@ -0,0 +1,60 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: celery-worker-heavy +spec: + replicas: 2 + selector: + matchLabels: + app: celery-worker-heavy + template: + metadata: + labels: + app: celery-worker-heavy + spec: + containers: + - name: celery-worker-heavy + image: danswer/danswer-backend:v0.11.0-cloud.beta.4 + imagePullPolicy: Always + command: + [ + "celery", + "-A", + "danswer.background.celery.versioned_apps.heavy", + "worker", + "--loglevel=INFO", + "--hostname=heavy@%n", + "-Q", + "connector_pruning", + ] + env: + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + name: danswer-secrets + key: redis_password + - name: DANSWER_VERSION + value: "v0.11.0-cloud.beta.4" + envFrom: + - configMapRef: + name: env-configmap + volumeMounts: + - name: vespa-certificates + mountPath: "/app/certs" + readOnly: true + resources: + requests: + cpu: "1000m" + memory: "2Gi" + limits: + cpu: "2000m" + memory: "4Gi" + volumes: + - name: vespa-certificates + secret: + secretName: vespa-certificates + items: + - key: cert.pem + path: cert.pem + - key: key.pem + path: key.pem diff --git a/deployment/cloud_kubernetes/workers/indexing_worker.yaml b/deployment/cloud_kubernetes/workers/indexing_worker.yaml new file mode 100644 index 00000000000..47d92999f2e --- /dev/null +++ b/deployment/cloud_kubernetes/workers/indexing_worker.yaml @@ -0,0 +1,60 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: celery-worker-indexing +spec: + replicas: 3 + selector: + matchLabels: + app: celery-worker-indexing + template: + metadata: + labels: + app: celery-worker-indexing + spec: + containers: + - name: celery-worker-indexing + image: danswer/danswer-backend:v0.11.0-cloud.beta.4 + imagePullPolicy: Always + command: + [ + "celery", + "-A", + "danswer.background.celery.versioned_apps.indexing", + "worker", + "--loglevel=INFO", + "--hostname=indexing@%n", + "-Q", + "connector_indexing", + ] + env: + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + name: danswer-secrets + key: redis_password + - name: DANSWER_VERSION + value: "v0.11.0-cloud.beta.4" + envFrom: + - configMapRef: + name: env-configmap + volumeMounts: + - name: vespa-certificates + mountPath: "/app/certs" + readOnly: true + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" + volumes: + - name: vespa-certificates + secret: + secretName: vespa-certificates + items: + - key: cert.pem + path: cert.pem + - key: key.pem + path: key.pem diff --git a/deployment/cloud_kubernetes/workers/light_worker.yaml b/deployment/cloud_kubernetes/workers/light_worker.yaml new file mode 100644 index 00000000000..c3c59354514 --- /dev/null +++ b/deployment/cloud_kubernetes/workers/light_worker.yaml @@ -0,0 +1,60 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: celery-worker-light +spec: + replicas: 2 + selector: + matchLabels: + app: celery-worker-light + template: + metadata: + labels: + app: celery-worker-light + spec: + containers: + - name: celery-worker-light + image: danswer/danswer-backend:v0.11.0-cloud.beta.4 + imagePullPolicy: Always + command: + [ + "celery", + "-A", + "danswer.background.celery.versioned_apps.light", + "worker", + "--loglevel=INFO", + "--hostname=light@%n", + "-Q", + "vespa_metadata_sync,connector_deletion", + ] + env: + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + name: danswer-secrets + key: redis_password + - name: DANSWER_VERSION + value: "v0.11.0-cloud.beta.4" + envFrom: + - configMapRef: + name: env-configmap + volumeMounts: + - name: vespa-certificates + mountPath: "/app/certs" + readOnly: true + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" + volumes: + - name: vespa-certificates + secret: + secretName: vespa-certificates + items: + - key: cert.pem + path: cert.pem + - key: key.pem + path: key.pem diff --git a/deployment/cloud_kubernetes/workers/primary.yaml b/deployment/cloud_kubernetes/workers/primary.yaml new file mode 100644 index 00000000000..7408e3bfb42 --- /dev/null +++ b/deployment/cloud_kubernetes/workers/primary.yaml @@ -0,0 +1,60 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: celery-worker-primary +spec: + replicas: 1 + selector: + matchLabels: + app: celery-worker-primary + template: + metadata: + labels: + app: celery-worker-primary + spec: + containers: + - name: celery-worker-primary + image: danswer/danswer-backend:v0.11.0-cloud.beta.4 + imagePullPolicy: Always + command: + [ + "celery", + "-A", + "danswer.background.celery.versioned_apps.primary", + "worker", + "--loglevel=INFO", + "--hostname=primary@%n", + "-Q", + "celery,periodic_tasks,vespa_metadata_sync", + ] + env: + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + name: danswer-secrets + key: redis_password + - name: DANSWER_VERSION + value: "v0.11.0-cloud.beta.4" + envFrom: + - configMapRef: + name: env-configmap + volumeMounts: + - name: vespa-certificates + mountPath: "/app/certs" + readOnly: true + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" + volumes: + - name: vespa-certificates + secret: + secretName: vespa-certificates + items: + - key: cert.pem + path: cert.pem + - key: key.pem + path: key.pem diff --git a/deployment/docker_compose/docker-compose.dev.yml b/deployment/docker_compose/docker-compose.dev.yml index 885c9350049..62ac7b7d0f3 100644 --- a/deployment/docker_compose/docker-compose.dev.yml +++ b/deployment/docker_compose/docker-compose.dev.yml @@ -29,6 +29,7 @@ services: - SMTP_PORT=${SMTP_PORT:-587} # For sending verification emails, if unspecified then defaults to '587' - SMTP_USER=${SMTP_USER:-} - SMTP_PASS=${SMTP_PASS:-} + - ENABLE_EMAIL_INVITES=${ENABLE_EMAIL_INVITES:-} # If enabled, will send users (using SMTP settings) an email to join the workspace - EMAIL_FROM=${EMAIL_FROM:-} - OAUTH_CLIENT_ID=${OAUTH_CLIENT_ID:-} - OAUTH_CLIENT_SECRET=${OAUTH_CLIENT_SECRET:-} @@ -62,6 +63,7 @@ services: - QA_PROMPT_OVERRIDE=${QA_PROMPT_OVERRIDE:-} # Other services - POSTGRES_HOST=relational_db + - POSTGRES_DEFAULT_SCHEMA=${POSTGRES_DEFAULT_SCHEMA:-} - VESPA_HOST=index - REDIS_HOST=cache - WEB_DOMAIN=${WEB_DOMAIN:-} # For frontend redirect auth purpose @@ -77,8 +79,8 @@ services: # Leave this on pretty please? Nothing sensitive is collected! # https://docs.danswer.dev/more/telemetry - DISABLE_TELEMETRY=${DISABLE_TELEMETRY:-} - - LOG_LEVEL=${LOG_LEVEL:-info} # Set to debug to get more fine-grained logs - - LOG_ALL_MODEL_INTERACTIONS=${LOG_ALL_MODEL_INTERACTIONS:-} # LiteLLM Verbose Logging + - LOG_LEVEL=${LOG_LEVEL:-info} # Set to debug to get more fine-grained logs + - LOG_ALL_MODEL_INTERACTIONS=${LOG_ALL_MODEL_INTERACTIONS:-} # LiteLLM Verbose Logging # Log all of Danswer prompts and interactions with the LLM - LOG_DANSWER_MODEL_INTERACTIONS=${LOG_DANSWER_MODEL_INTERACTIONS:-} # If set to `true` will enable additional logs about Vespa query performance @@ -89,9 +91,12 @@ services: - LOG_POSTGRES_CONN_COUNTS=${LOG_POSTGRES_CONN_COUNTS:-} - CELERY_BROKER_POOL_LIMIT=${CELERY_BROKER_POOL_LIMIT:-} + # Analytics Configs + - SENTRY_DSN=${SENTRY_DSN:-} + # Chat Configs - HARD_DELETE_CHATS=${HARD_DELETE_CHATS:-} - + # Enterprise Edition only - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false} - API_KEY_HASH_ROUNDS=${API_KEY_HASH_ROUNDS:-} @@ -109,7 +114,12 @@ services: build: context: ../../backend dockerfile: Dockerfile - command: /usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf + command: > + /bin/sh -c " + if [ -f /etc/ssl/certs/custom-ca.crt ]; then + update-ca-certificates; + fi && + /usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf" depends_on: - relational_db - index @@ -143,6 +153,7 @@ services: - POSTGRES_USER=${POSTGRES_USER:-} - POSTGRES_PASSWORD=${POSTGRES_PASSWORD:-} - POSTGRES_DB=${POSTGRES_DB:-} + - POSTGRES_DEFAULT_SCHEMA=${POSTGRES_DEFAULT_SCHEMA:-} - VESPA_HOST=index - REDIS_HOST=cache - WEB_DOMAIN=${WEB_DOMAIN:-} # For frontend redirect auth purpose for OAuth2 connectors @@ -170,6 +181,11 @@ services: - GONG_CONNECTOR_START_TIME=${GONG_CONNECTOR_START_TIME:-} - NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP=${NOTION_CONNECTOR_ENABLE_RECURSIVE_PAGE_LOOKUP:-} - GITHUB_CONNECTOR_BASE_URL=${GITHUB_CONNECTOR_BASE_URL:-} + # Celery Configs (defaults are set in the supervisord.conf file. + # prefer doing that to have one source of defaults) + - CELERY_WORKER_INDEXING_CONCURRENCY=${CELERY_WORKER_INDEXING_CONCURRENCY:-} + - CELERY_WORKER_LIGHT_CONCURRENCY=${CELERY_WORKER_LIGHT_CONCURRENCY:-} + - CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER=${CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER:-} # Danswer SlackBot Configs - DANSWER_BOT_SLACK_APP_TOKEN=${DANSWER_BOT_SLACK_APP_TOKEN:-} @@ -186,12 +202,15 @@ services: # Leave this on pretty please? Nothing sensitive is collected! # https://docs.danswer.dev/more/telemetry - DISABLE_TELEMETRY=${DISABLE_TELEMETRY:-} - - LOG_LEVEL=${LOG_LEVEL:-info} # Set to debug to get more fine-grained logs - - LOG_ALL_MODEL_INTERACTIONS=${LOG_ALL_MODEL_INTERACTIONS:-} # LiteLLM Verbose Logging + - LOG_LEVEL=${LOG_LEVEL:-info} # Set to debug to get more fine-grained logs + - LOG_ALL_MODEL_INTERACTIONS=${LOG_ALL_MODEL_INTERACTIONS:-} # LiteLLM Verbose Logging # Log all of Danswer prompts and interactions with the LLM - LOG_DANSWER_MODEL_INTERACTIONS=${LOG_DANSWER_MODEL_INTERACTIONS:-} - LOG_VESPA_TIMING_INFORMATION=${LOG_VESPA_TIMING_INFORMATION:-} + # Analytics Configs + - SENTRY_DSN=${SENTRY_DSN:-} + # Enterprise Edition stuff - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=${ENABLE_PAID_ENTERPRISE_EDITION_FEATURES:-false} extra_hosts: @@ -201,6 +220,11 @@ services: options: max-size: "50m" max-file: "6" + # Uncomment the following lines if you need to include a custom CA certificate + # Optional volume mount for CA certificate + # volumes: + # # Maps to the CA_CERT_PATH environment variable in the Dockerfile + # - ${CA_CERT_PATH:-./custom-ca.crt}:/etc/ssl/certs/custom-ca.crt:ro web_server: image: danswer/danswer-web-server:${IMAGE_TAG:-latest} @@ -249,6 +273,9 @@ services: - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-} # Set to debug to get more fine-grained logs - LOG_LEVEL=${LOG_LEVEL:-info} + + # Analytics Configs + - SENTRY_DSN=${SENTRY_DSN:-} volumes: # Not necessary, this is just to reduce download time during startup - model_cache_huggingface:/root/.cache/huggingface/ @@ -277,6 +304,10 @@ services: - INDEXING_ONLY=True # Set to debug to get more fine-grained logs - LOG_LEVEL=${LOG_LEVEL:-info} + - CLIENT_EMBEDDING_TIMEOUT=${CLIENT_EMBEDDING_TIMEOUT:-} + + # Analytics Configs + - SENTRY_DSN=${SENTRY_DSN:-} volumes: # Not necessary, this is just to reduce download time during startup - indexing_huggingface_model_cache:/root/.cache/huggingface/ @@ -288,7 +319,7 @@ services: relational_db: image: postgres:15.2-alpine - command: -c 'max_connections=150' + command: -c 'max_connections=250' restart: always environment: - POSTGRES_USER=${POSTGRES_USER:-postgres} @@ -317,7 +348,7 @@ services: image: nginx:1.23.4-alpine restart: always # nginx will immediately crash with `nginx: [emerg] host not found in upstream` - # if api_server / web_server are not up + # if api_server / web_server are not up depends_on: - api_server - web_server @@ -333,20 +364,20 @@ services: options: max-size: "50m" max-file: "6" - # The specified script waits for the api_server to start up. - # Without this we've seen issues where nginx shows no error logs but + # The specified script waits for the api_server to start up. + # Without this we've seen issues where nginx shows no error logs but # does not recieve any traffic # NOTE: we have to use dos2unix to remove Carriage Return chars from the file # in order to make this work on both Unix-like systems and windows - command: > - /bin/sh -c "dos2unix /etc/nginx/conf.d/run-nginx.sh - && /etc/nginx/conf.d/run-nginx.sh app.conf.template.dev" + command: > + /bin/sh -c "dos2unix /etc/nginx/conf.d/run-nginx.sh + && /etc/nginx/conf.d/run-nginx.sh app.conf.template.dev" cache: image: redis:7.4-alpine restart: always ports: - - '6379:6379' + - "6379:6379" # docker silently mounts /data even without an explicit volume mount, which enables # persistence. explicitly setting save and appendonly forces ephemeral behavior. command: redis-server --save "" --appendonly no diff --git a/deployment/docker_compose/docker-compose.gpu-dev.yml b/deployment/docker_compose/docker-compose.gpu-dev.yml index 59ea69e5457..a7e0a2afe97 100644 --- a/deployment/docker_compose/docker-compose.gpu-dev.yml +++ b/deployment/docker_compose/docker-compose.gpu-dev.yml @@ -70,6 +70,9 @@ services: - DISABLE_RERANK_FOR_STREAMING=${DISABLE_RERANK_FOR_STREAMING:-} - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server} - MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-} + - VESPA_REQUEST_TIMEOUT=${VESPA_REQUEST_TIMEOUT:-} + # We do not recommend changing this value + - SYSTEM_RECURSION_LIMIT=${SYSTEM_RECURSION_LIMIT:-} # Leave this on pretty please? Nothing sensitive is collected! # https://docs.danswer.dev/more/telemetry - DISABLE_TELEMETRY=${DISABLE_TELEMETRY:-} @@ -183,6 +186,11 @@ services: # Log all of Danswer prompts and interactions with the LLM - LOG_DANSWER_MODEL_INTERACTIONS=${LOG_DANSWER_MODEL_INTERACTIONS:-} - LOG_VESPA_TIMING_INFORMATION=${LOG_VESPA_TIMING_INFORMATION:-} + # Celery Configs (defaults are set in the supervisord.conf file. + # prefer doing that to have one source of defaults) + - CELERY_WORKER_INDEXING_CONCURRENCY=${CELERY_WORKER_INDEXING_CONCURRENCY:-} + - CELERY_WORKER_LIGHT_CONCURRENCY=${CELERY_WORKER_LIGHT_CONCURRENCY:-} + - CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER=${CELERY_WORKER_LIGHT_PREFETCH_MULTIPLIER:-} # Enterprise Edition only - API_KEY_HASH_ROUNDS=${API_KEY_HASH_ROUNDS:-} @@ -248,6 +256,7 @@ services: - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-} # Set to debug to get more fine-grained logs - LOG_LEVEL=${LOG_LEVEL:-info} + - CLIENT_EMBEDDING_TIMEOUT=${CLIENT_EMBEDDING_TIMEOUT:-} volumes: # Not necessary, this is just to reduce download time during startup - model_cache_huggingface:/root/.cache/huggingface/ @@ -298,7 +307,7 @@ services: relational_db: image: postgres:15.2-alpine - command: -c 'max_connections=150' + command: -c 'max_connections=250' restart: always environment: - POSTGRES_USER=${POSTGRES_USER:-postgres} diff --git a/deployment/docker_compose/docker-compose.prod-cloud.yml b/deployment/docker_compose/docker-compose.prod-cloud.yml new file mode 100644 index 00000000000..392d7c67ad4 --- /dev/null +++ b/deployment/docker_compose/docker-compose.prod-cloud.yml @@ -0,0 +1,243 @@ +services: + api_server: + image: danswer/danswer-backend:${IMAGE_TAG:-latest} + build: + context: ../../backend + dockerfile: Dockerfile.cloud + command: > + /bin/sh -c "alembic -n schema_private upgrade head && + echo \"Starting Danswer Api Server\" && + uvicorn danswer.main:app --host 0.0.0.0 --port 8080" + depends_on: + - relational_db + - index + - cache + - inference_model_server + restart: always + env_file: + - .env + environment: + - AUTH_TYPE=${AUTH_TYPE:-oidc} + - POSTGRES_HOST=relational_db + - VESPA_HOST=index + - REDIS_HOST=cache + - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server} + extra_hosts: + - "host.docker.internal:host-gateway" + logging: + driver: json-file + options: + max-size: "50m" + max-file: "6" + + + background: + image: danswer/danswer-backend:${IMAGE_TAG:-latest} + build: + context: ../../backend + dockerfile: Dockerfile + command: /usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf + depends_on: + - relational_db + - index + - cache + - inference_model_server + - indexing_model_server + restart: always + env_file: + - .env + environment: + - AUTH_TYPE=${AUTH_TYPE:-oidc} + - POSTGRES_HOST=relational_db + - VESPA_HOST=index + - REDIS_HOST=cache + - MODEL_SERVER_HOST=${MODEL_SERVER_HOST:-inference_model_server} + - INDEXING_MODEL_SERVER_HOST=${INDEXING_MODEL_SERVER_HOST:-indexing_model_server} + extra_hosts: + - "host.docker.internal:host-gateway" + logging: + driver: json-file + options: + max-size: "50m" + max-file: "6" + + web_server: + image: danswer/danswer-web-server:${IMAGE_TAG:-latest} + build: + context: ../../web + dockerfile: Dockerfile + args: + - NEXT_PUBLIC_DISABLE_STREAMING=${NEXT_PUBLIC_DISABLE_STREAMING:-false} + - NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA=${NEXT_PUBLIC_NEW_CHAT_DIRECTS_TO_SAME_PERSONA:-false} + - NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS:-} + - NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS=${NEXT_PUBLIC_NEGATIVE_PREDEFINED_FEEDBACK_OPTIONS:-} + - NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT:-} + - NEXT_PUBLIC_THEME=${NEXT_PUBLIC_THEME:-} + depends_on: + - api_server + restart: always + env_file: + - .env + environment: + - INTERNAL_URL=http://api_server:8080 + logging: + driver: json-file + options: + max-size: "50m" + max-file: "6" + + + relational_db: + image: postgres:15.2-alpine + command: -c 'max_connections=250' + restart: always + # POSTGRES_USER and POSTGRES_PASSWORD should be set in .env file + env_file: + - .env + volumes: + - db_volume:/var/lib/postgresql/data + logging: + driver: json-file + options: + max-size: "50m" + max-file: "6" + + + inference_model_server: + image: danswer/danswer-model-server:${IMAGE_TAG:-latest} + build: + context: ../../backend + dockerfile: Dockerfile.model_server + command: > + /bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-false}\" = \"True\" ]; then + echo 'Skipping service...'; + exit 0; + else + exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000; + fi" + restart: on-failure + environment: + - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-} + # Set to debug to get more fine-grained logs + - LOG_LEVEL=${LOG_LEVEL:-info} + volumes: + # Not necessary, this is just to reduce download time during startup + - model_cache_huggingface:/root/.cache/huggingface/ + logging: + driver: json-file + options: + max-size: "50m" + max-file: "6" + + + indexing_model_server: + image: danswer/danswer-model-server:${IMAGE_TAG:-latest} + build: + context: ../../backend + dockerfile: Dockerfile.model_server + command: > + /bin/sh -c "if [ \"${DISABLE_MODEL_SERVER:-false}\" = \"True\" ]; then + echo 'Skipping service...'; + exit 0; + else + exec uvicorn model_server.main:app --host 0.0.0.0 --port 9000; + fi" + restart: on-failure + environment: + - MIN_THREADS_ML_MODELS=${MIN_THREADS_ML_MODELS:-} + - INDEXING_ONLY=True + # Set to debug to get more fine-grained logs + - LOG_LEVEL=${LOG_LEVEL:-info} + - VESPA_SEARCHER_THREADS=${VESPA_SEARCHER_THREADS:-1} + volumes: + # Not necessary, this is just to reduce download time during startup + - indexing_huggingface_model_cache:/root/.cache/huggingface/ + logging: + driver: json-file + options: + max-size: "50m" + max-file: "6" + + + # This container name cannot have an underscore in it due to Vespa expectations of the URL + index: + image: vespaengine/vespa:8.277.17 + restart: always + ports: + - "19071:19071" + - "8081:8081" + volumes: + - vespa_volume:/opt/vespa/var + logging: + driver: json-file + options: + max-size: "50m" + max-file: "6" + + + nginx: + image: nginx:1.23.4-alpine + restart: always + # nginx will immediately crash with `nginx: [emerg] host not found in upstream` + # if api_server / web_server are not up + depends_on: + - api_server + - web_server + ports: + - "80:80" + - "443:443" + volumes: + - ../data/nginx:/etc/nginx/conf.d + - ../data/certbot/conf:/etc/letsencrypt + - ../data/certbot/www:/var/www/certbot + # sleep a little bit to allow the web_server / api_server to start up. + # Without this we've seen issues where nginx shows no error logs but + # does not recieve any traffic + logging: + driver: json-file + options: + max-size: "50m" + max-file: "6" + # The specified script waits for the api_server to start up. + # Without this we've seen issues where nginx shows no error logs but + # does not recieve any traffic + # NOTE: we have to use dos2unix to remove Carriage Return chars from the file + # in order to make this work on both Unix-like systems and windows + command: > + /bin/sh -c "dos2unix /etc/nginx/conf.d/run-nginx.sh + && /etc/nginx/conf.d/run-nginx.sh app.conf.template" + env_file: + - .env.nginx + + + # follows https://pentacent.medium.com/nginx-and-lets-encrypt-with-docker-in-less-than-5-minutes-b4b8a60d3a71 + certbot: + image: certbot/certbot + restart: always + volumes: + - ../data/certbot/conf:/etc/letsencrypt + - ../data/certbot/www:/var/www/certbot + logging: + driver: json-file + options: + max-size: "50m" + max-file: "6" + entrypoint: "/bin/sh -c 'trap exit TERM; while :; do certbot renew; sleep 12h & wait $${!}; done;'" + + + cache: + image: redis:7.4-alpine + restart: always + ports: + - '6379:6379' + # docker silently mounts /data even without an explicit volume mount, which enables + # persistence. explicitly setting save and appendonly forces ephemeral behavior. + command: redis-server --save "" --appendonly no + + +volumes: + db_volume: + vespa_volume: + # Created by the container itself + model_cache_huggingface: + indexing_huggingface_model_cache: diff --git a/deployment/docker_compose/docker-compose.prod-no-letsencrypt.yml b/deployment/docker_compose/docker-compose.prod-no-letsencrypt.yml index 7a56346f074..8ec6d437646 100644 --- a/deployment/docker_compose/docker-compose.prod-no-letsencrypt.yml +++ b/deployment/docker_compose/docker-compose.prod-no-letsencrypt.yml @@ -147,7 +147,7 @@ services: relational_db: image: postgres:15.2-alpine - command: -c 'max_connections=150' + command: -c 'max_connections=250' restart: always # POSTGRES_USER and POSTGRES_PASSWORD should be set in .env file env_file: diff --git a/deployment/docker_compose/docker-compose.prod.yml b/deployment/docker_compose/docker-compose.prod.yml index 983881fff7f..d4ba321736e 100644 --- a/deployment/docker_compose/docker-compose.prod.yml +++ b/deployment/docker_compose/docker-compose.prod.yml @@ -5,7 +5,8 @@ services: context: ../../backend dockerfile: Dockerfile command: > - /bin/sh -c "alembic upgrade head && + /bin/sh -c " + alembic upgrade head && echo \"Starting Danswer Api Server\" && uvicorn danswer.main:app --host 0.0.0.0 --port 8080" depends_on: @@ -29,14 +30,17 @@ services: options: max-size: "50m" max-file: "6" - - background: image: danswer/danswer-backend:${IMAGE_TAG:-latest} build: context: ../../backend dockerfile: Dockerfile - command: /usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf + command: > + /bin/sh -c " + if [ -f /etc/ssl/certs/custom-ca.crt ]; then + update-ca-certificates; + fi && + /usr/bin/supervisord -c /etc/supervisor/conf.d/supervisord.conf" depends_on: - relational_db - index @@ -60,6 +64,10 @@ services: options: max-size: "50m" max-file: "6" + # Uncomment the following lines if you need to include a custom CA certificate + # volumes: + # # Maps to the CA_CERT_PATH environment variable in the Dockerfile + # - ${CA_CERT_PATH:-./custom-ca.crt}:/etc/ssl/certs/custom-ca.crt:ro web_server: image: danswer/danswer-web-server:${IMAGE_TAG:-latest} @@ -86,10 +94,9 @@ services: max-size: "50m" max-file: "6" - relational_db: image: postgres:15.2-alpine - command: -c 'max_connections=150' + command: -c 'max_connections=250' restart: always # POSTGRES_USER and POSTGRES_PASSWORD should be set in .env file env_file: @@ -102,7 +109,6 @@ services: max-size: "50m" max-file: "6" - inference_model_server: image: danswer/danswer-model-server:${IMAGE_TAG:-latest} build: @@ -129,7 +135,6 @@ services: max-size: "50m" max-file: "6" - indexing_model_server: image: danswer/danswer-model-server:${IMAGE_TAG:-latest} build: @@ -158,7 +163,6 @@ services: max-size: "50m" max-file: "6" - # This container name cannot have an underscore in it due to Vespa expectations of the URL index: image: vespaengine/vespa:8.277.17 @@ -174,12 +178,11 @@ services: max-size: "50m" max-file: "6" - nginx: image: nginx:1.23.4-alpine restart: always # nginx will immediately crash with `nginx: [emerg] host not found in upstream` - # if api_server / web_server are not up + # if api_server / web_server are not up depends_on: - api_server - web_server @@ -190,26 +193,25 @@ services: - ../data/nginx:/etc/nginx/conf.d - ../data/certbot/conf:/etc/letsencrypt - ../data/certbot/www:/var/www/certbot - # sleep a little bit to allow the web_server / api_server to start up. - # Without this we've seen issues where nginx shows no error logs but + # sleep a little bit to allow the web_server / api_server to start up. + # Without this we've seen issues where nginx shows no error logs but # does not recieve any traffic logging: driver: json-file options: max-size: "50m" max-file: "6" - # The specified script waits for the api_server to start up. - # Without this we've seen issues where nginx shows no error logs but - # does not recieve any traffic + # The specified script waits for the api_server to start up. + # Without this we've seen issues where nginx shows no error logs but + # does not recieve any traffic # NOTE: we have to use dos2unix to remove Carriage Return chars from the file # in order to make this work on both Unix-like systems and windows - command: > + command: > /bin/sh -c "dos2unix /etc/nginx/conf.d/run-nginx.sh && /etc/nginx/conf.d/run-nginx.sh app.conf.template" env_file: - .env.nginx - # follows https://pentacent.medium.com/nginx-and-lets-encrypt-with-docker-in-less-than-5-minutes-b4b8a60d3a71 certbot: image: certbot/certbot @@ -224,17 +226,15 @@ services: max-file: "6" entrypoint: "/bin/sh -c 'trap exit TERM; while :; do certbot renew; sleep 12h & wait $${!}; done;'" - cache: image: redis:7.4-alpine restart: always ports: - - '6379:6379' + - "6379:6379" # docker silently mounts /data even without an explicit volume mount, which enables # persistence. explicitly setting save and appendonly forces ephemeral behavior. command: redis-server --save "" --appendonly no - volumes: db_volume: vespa_volume: diff --git a/deployment/docker_compose/docker-compose.search-testing.yml b/deployment/docker_compose/docker-compose.search-testing.yml index a64b30f09d7..fab950c064e 100644 --- a/deployment/docker_compose/docker-compose.search-testing.yml +++ b/deployment/docker_compose/docker-compose.search-testing.yml @@ -26,6 +26,9 @@ services: - MODEL_SERVER_PORT=${MODEL_SERVER_PORT:-} - ENV_SEED_CONFIGURATION=${ENV_SEED_CONFIGURATION:-} - ENABLE_PAID_ENTERPRISE_EDITION_FEATURES=True + # To enable the LLM for testing, update the value below + # NOTE: this is disabled by default since this is a high volume eval that can be costly + - DISABLE_GENERATIVE_AI=${DISABLE_GENERATIVE_AI:-true} extra_hosts: - "host.docker.internal:host-gateway" logging: @@ -148,7 +151,7 @@ services: relational_db: image: postgres:15.2-alpine - command: -c 'max_connections=150' + command: -c 'max_connections=250' restart: always environment: - POSTGRES_USER=${POSTGRES_USER:-postgres} diff --git a/deployment/helm/charts/danswer/Chart.yaml b/deployment/helm/charts/danswer/Chart.yaml index 490ce69a39b..f76db88f905 100644 --- a/deployment/helm/charts/danswer/Chart.yaml +++ b/deployment/helm/charts/danswer/Chart.yaml @@ -5,8 +5,8 @@ home: https://www.danswer.ai/ sources: - "https://github.com/stackhpc/danswer" type: application -version: 0.2.3 -appVersion: v0.7.4 +version: 0.2.4 +appVersion: v0.12.0 dependencies: - name: postgresql version: 14.3.1 diff --git a/deployment/helm/charts/danswer/templates/inference-model-deployment.yaml b/deployment/helm/charts/danswer/templates/inference-model-deployment.yaml index 391a8e4289b..3897bba078e 100644 --- a/deployment/helm/charts/danswer/templates/inference-model-deployment.yaml +++ b/deployment/helm/charts/danswer/templates/inference-model-deployment.yaml @@ -25,6 +25,10 @@ spec: image: "{{ .Values.inferenceCapability.deployment.image.repository }}:{{ .Values.inferenceCapability.deployment.image.tag | default .Values.appVersionOverride | default .Chart.AppVersion }}" imagePullPolicy: {{ .Values.inferenceCapability.deployment.image.pullPolicy }} command: {{ toYaml .Values.inferenceCapability.deployment.command | nindent 14 }} + {{- if .Values.inferenceCapability.deployment.resources }} + resources: + {{- toYaml .Values.inferenceCapability.deployment.resources | nindent 10 }} + {{- end }} ports: - containerPort: {{ .Values.inferenceCapability.service.port }} envFrom: diff --git a/deployment/helm/charts/danswer/values.yaml b/deployment/helm/charts/danswer/values.yaml index b1b6c88a148..e965fe0b303 100644 --- a/deployment/helm/charts/danswer/values.yaml +++ b/deployment/helm/charts/danswer/values.yaml @@ -11,7 +11,7 @@ appVersionOverride: # e.g "v0.3.93" # tags to refer to downstream StackHPC-modified images. # The full image ref will be: # {{ image-name }}:{{ image-tag or appVersion }}-{{ tagSuffix }} -tagSuffix: stackhpc.4 +tagSuffix: stackhpc.1 zenithClient: iconUrl: https://raw.githubusercontent.com/danswer-ai/danswer/1fabd9372d66cd54238847197c33f091a724803b/Danswer.png @@ -39,6 +39,7 @@ inferenceCapability: tag: pullPolicy: IfNotPresent command: ["uvicorn", "model_server.main:app", "--host", "0.0.0.0", "--port", "9000"] + resources: port: 9000 volumeMounts: - name: inference-model-storage diff --git a/deployment/kubernetes/api_server-service-deployment.yaml b/deployment/kubernetes/api_server-service-deployment.yaml index ccbbc906d61..0e1cd79af58 100644 --- a/deployment/kubernetes/api_server-service-deployment.yaml +++ b/deployment/kubernetes/api_server-service-deployment.yaml @@ -27,36 +27,36 @@ spec: app: api-server spec: containers: - - name: api-server - image: danswer/danswer-backend:latest - imagePullPolicy: IfNotPresent - command: - - "/bin/sh" - - "-c" - - | - alembic upgrade head && - echo "Starting Danswer Api Server" && - uvicorn danswer.main:app --host 0.0.0.0 --port 8080 - ports: - - containerPort: 8080 - # There are some extra values since this is shared between services - # There are no conflicts though, extra env variables are simply ignored - env: - - name: OAUTH_CLIENT_ID - valueFrom: - secretKeyRef: - name: danswer-secrets - key: google_oauth_client_id - - name: OAUTH_CLIENT_SECRET - valueFrom: - secretKeyRef: - name: danswer-secrets - key: google_oauth_client_secret - - name: REDIS_PASSWORD - valueFrom: - secretKeyRef: - name: danswer-secrets - key: redis_password - envFrom: - - configMapRef: - name: env-configmap + - name: api-server + image: danswer/danswer-backend:latest + imagePullPolicy: IfNotPresent + command: + - "/bin/sh" + - "-c" + - | + alembic upgrade head && + echo "Starting Danswer Api Server" && + uvicorn danswer.main:app --host 0.0.0.0 --port 8080 + ports: + - containerPort: 8080 + # There are some extra values since this is shared between services + # There are no conflicts though, extra env variables are simply ignored + env: + - name: OAUTH_CLIENT_ID + valueFrom: + secretKeyRef: + name: danswer-secrets + key: google_oauth_client_id + - name: OAUTH_CLIENT_SECRET + valueFrom: + secretKeyRef: + name: danswer-secrets + key: google_oauth_client_secret + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + name: danswer-secrets + key: redis_password + envFrom: + - configMapRef: + name: env-configmap diff --git a/deployment/kubernetes/background-deployment.yaml b/deployment/kubernetes/background-deployment.yaml index dcb3c89a0c6..4989d889fdf 100644 --- a/deployment/kubernetes/background-deployment.yaml +++ b/deployment/kubernetes/background-deployment.yaml @@ -13,18 +13,35 @@ spec: app: background spec: containers: - - name: background - image: danswer/danswer-backend:latest - imagePullPolicy: IfNotPresent - command: ["/usr/bin/supervisord", "-c", "/etc/supervisor/conf.d/supervisord.conf"] - # There are some extra values since this is shared between services - # There are no conflicts though, extra env variables are simply ignored - env: - - name: REDIS_PASSWORD - valueFrom: - secretKeyRef: - name: danswer-secrets - key: redis_password - envFrom: - - configMapRef: - name: env-configmap \ No newline at end of file + - name: background + image: danswer/danswer-backend:latest + imagePullPolicy: IfNotPresent + command: + [ + "/usr/bin/supervisord", + "-c", + "/etc/supervisor/conf.d/supervisord.conf", + ] + env: + - name: REDIS_PASSWORD + valueFrom: + secretKeyRef: + name: danswer-secrets + key: redis_password + envFrom: + - configMapRef: + name: env-configmap + # Uncomment the following lines if you need to include a custom CA certificate + # Optional volume mount for CA certificate + # volumeMounts: + # - name: my-ca-cert-volume + # mountPath: /etc/ssl/certs/custom-ca.crt + # subPath: my-ca.crt + # Optional volume for CA certificate + # volumes: + # - name: my-cas-cert-volume + # secret: + # secretName: my-ca-cert + # items: + # - key: my-ca.crt + # path: my-ca.crt diff --git a/deployment/kubernetes/env-configmap.yaml b/deployment/kubernetes/env-configmap.yaml index b833e0791ec..e1eefaeca90 100644 --- a/deployment/kubernetes/env-configmap.yaml +++ b/deployment/kubernetes/env-configmap.yaml @@ -2,7 +2,8 @@ apiVersion: v1 kind: ConfigMap metadata: name: env-configmap -data: +data: + # Auth Setting, also check the secrets file AUTH_TYPE: "disabled" # Change this for production uses unless Danswer is only accessible behind VPN ENCRYPTION_KEY_SECRET: "" # This should not be specified directly in the yaml, this is just for reference @@ -31,6 +32,7 @@ data: QA_PROMPT_OVERRIDE: "" # Other Services POSTGRES_HOST: "relational-db-service" + POSTGRES_DEFAULT_SCHEMA: "" VESPA_HOST: "document-index-service" REDIS_HOST: "redis-service" # Internet Search Tool diff --git a/deployment/kubernetes/postgres-service-deployment.yaml b/deployment/kubernetes/postgres-service-deployment.yaml index 4a0b2bbdbcc..e89e625589d 100644 --- a/deployment/kubernetes/postgres-service-deployment.yaml +++ b/deployment/kubernetes/postgres-service-deployment.yaml @@ -40,7 +40,7 @@ spec: secretKeyRef: name: danswer-secrets key: postgres_password - args: ["-c", "max_connections=150"] + args: ["-c", "max_connections=250"] ports: - containerPort: 5432 volumeMounts: diff --git a/web/.eslintrc.json b/web/.eslintrc.json index bffb357a712..f0f3abee419 100644 --- a/web/.eslintrc.json +++ b/web/.eslintrc.json @@ -1,3 +1,6 @@ { - "extends": "next/core-web-vitals" + "extends": "next/core-web-vitals", + "rules": { + "@next/next/no-img-element": "off" + } } diff --git a/web/.gitignore b/web/.gitignore index c87c9b392c0..e2a2a775c3b 100644 --- a/web/.gitignore +++ b/web/.gitignore @@ -1,5 +1,5 @@ # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. - +.env.sentry-build-plugin # dependencies /node_modules /.pnp diff --git a/web/Dockerfile b/web/Dockerfile index 48c13f57be1..0edad2a9446 100644 --- a/web/Dockerfile +++ b/web/Dockerfile @@ -8,7 +8,7 @@ Edition features outside of personal development or testing purposes. Please rea founders@danswer.ai for more information. Please visit https://github.com/danswer-ai/danswer" # Default DANSWER_VERSION, typically overriden during builds by GitHub Actions. -ARG DANSWER_VERSION=0.3-dev +ARG DANSWER_VERSION=0.8-dev ENV DANSWER_VERSION=${DANSWER_VERSION} RUN echo "DANSWER_VERSION: ${DANSWER_VERSION}" @@ -61,6 +61,13 @@ ENV NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT} ARG NEXT_PUBLIC_CUSTOM_REFRESH_URL ENV NEXT_PUBLIC_CUSTOM_REFRESH_URL=${NEXT_PUBLIC_CUSTOM_REFRESH_URL} +ARG NEXT_PUBLIC_POSTHOG_KEY +ARG NEXT_PUBLIC_POSTHOG_HOST +ENV NEXT_PUBLIC_POSTHOG_KEY=${NEXT_PUBLIC_POSTHOG_KEY} +ENV NEXT_PUBLIC_POSTHOG_HOST=${NEXT_PUBLIC_POSTHOG_HOST} + +ARG NEXT_PUBLIC_SENTRY_DSN +ENV NEXT_PUBLIC_SENTRY_DSN=${NEXT_PUBLIC_SENTRY_DSN} RUN npx next build @@ -122,6 +129,15 @@ ENV NEXT_PUBLIC_DISABLE_LOGOUT=${NEXT_PUBLIC_DISABLE_LOGOUT} ARG NEXT_PUBLIC_CUSTOM_REFRESH_URL ENV NEXT_PUBLIC_CUSTOM_REFRESH_URL=${NEXT_PUBLIC_CUSTOM_REFRESH_URL} + +ARG NEXT_PUBLIC_POSTHOG_KEY +ARG NEXT_PUBLIC_POSTHOG_HOST +ENV NEXT_PUBLIC_POSTHOG_KEY=${NEXT_PUBLIC_POSTHOG_KEY} +ENV NEXT_PUBLIC_POSTHOG_HOST=${NEXT_PUBLIC_POSTHOG_HOST} +ARG NEXT_PUBLIC_SENTRY_DSN +ENV NEXT_PUBLIC_SENTRY_DSN=${NEXT_PUBLIC_SENTRY_DSN} + + # Note: Don't expose ports here, Compose will handle that for us if necessary. # If you want to run this without compose, specify the ports to # expose via cli diff --git a/web/components.json b/web/components.json new file mode 100644 index 00000000000..cbde1f415b0 --- /dev/null +++ b/web/components.json @@ -0,0 +1,20 @@ +{ + "$schema": "https://ui.shadcn.com/schema.json", + "style": "default", + "rsc": true, + "tsx": true, + "tailwind": { + "config": "tailwind.config.js", + "css": "src/app/globals.css", + "baseColor": "neutral", + "cssVariables": false, + "prefix": "" + }, + "aliases": { + "components": "@/components", + "utils": "@/lib/utils", + "ui": "@/components/ui", + "lib": "@/lib", + "hooks": "@/hooks" + } +} diff --git a/web/instrumentation.ts b/web/instrumentation.ts new file mode 100644 index 00000000000..dc0128848ea --- /dev/null +++ b/web/instrumentation.ts @@ -0,0 +1,12 @@ +export async function register() { + if (process.env.NEXT_PUBLIC_SENTRY_DSN) { + if (process.env.NEXT_RUNTIME === "nodejs") { + await import("./sentry.client.config"); + await import("./sentry.server.config"); + } + + if (process.env.NEXT_RUNTIME === "edge") { + await import("./sentry.edge.config"); + } + } +} diff --git a/web/next.config.js b/web/next.config.js index 92812c513b7..7e001f764ba 100644 --- a/web/next.config.js +++ b/web/next.config.js @@ -4,13 +4,40 @@ const env_version = process.env.DANSWER_VERSION; // version from env variable // Use env version if set & valid, otherwise default to package version const version = env_version || package_version; +// Always require withSentryConfig +const { withSentryConfig } = require("@sentry/nextjs"); + /** @type {import('next').NextConfig} */ const nextConfig = { output: "standalone", - swcMinify: true, publicRuntimeConfig: { version, }, }; -module.exports = nextConfig; +// Sentry configuration for error monitoring: +// - Without SENTRY_AUTH_TOKEN and NEXT_PUBLIC_SENTRY_DSN: Sentry is completely disabled +// - With both configured: Only unhandled errors are captured (no performance/session tracking) + +// Determine if Sentry should be enabled +const sentryEnabled = Boolean( + process.env.SENTRY_AUTH_TOKEN && process.env.NEXT_PUBLIC_SENTRY_DSN +); + +// Sentry webpack plugin options +const sentryWebpackPluginOptions = { + org: process.env.SENTRY_ORG || "danswer", + project: process.env.SENTRY_PROJECT || "data-plane-web", + authToken: process.env.SENTRY_AUTH_TOKEN, + silent: !sentryEnabled, // Silence output when Sentry is disabled + dryRun: !sentryEnabled, // Don't upload source maps when Sentry is disabled + sourceMaps: { + include: ["./.next"], + validate: false, + urlPrefix: "~/_next", + skip: !sentryEnabled, + }, +}; + +// Export the module with conditional Sentry configuration +module.exports = withSentryConfig(nextConfig, sentryWebpackPluginOptions); diff --git a/web/package-lock.json b/web/package-lock.json index 338cf0a9f0f..e8c0cd86f15 100644 --- a/web/package-lock.json +++ b/web/package-lock.json @@ -11,11 +11,19 @@ "@dnd-kit/core": "^6.1.0", "@dnd-kit/modifiers": "^7.0.0", "@dnd-kit/sortable": "^8.0.0", + "@dnd-kit/utilities": "^3.2.2", + "@headlessui/react": "^2.2.0", + "@headlessui/tailwindcss": "^0.2.1", "@phosphor-icons/react": "^2.0.8", "@radix-ui/react-dialog": "^1.0.5", - "@radix-ui/react-popover": "^1.0.7", - "@radix-ui/react-tooltip": "^1.0.7", - "@tremor/react": "^3.9.2", + "@radix-ui/react-popover": "^1.1.2", + "@radix-ui/react-select": "^2.1.2", + "@radix-ui/react-separator": "^1.1.0", + "@radix-ui/react-slot": "^1.1.0", + "@radix-ui/react-tabs": "^1.1.1", + "@radix-ui/react-tooltip": "^1.1.3", + "@sentry/nextjs": "^8.34.0", + "@stripe/stripe-js": "^4.6.0", "@types/js-cookie": "^3.0.3", "@types/lodash": "^4.17.0", "@types/node": "18.15.11", @@ -24,30 +32,40 @@ "@types/react-dom": "18.0.11", "@types/uuid": "^9.0.8", "autoprefixer": "^10.4.14", + "class-variance-authority": "^0.7.0", + "clsx": "^2.1.1", + "date-fns": "^3.6.0", "formik": "^2.2.9", "js-cookie": "^3.0.5", "lodash": "^4.17.21", + "lucide-react": "^0.454.0", "mdast-util-find-and-replace": "^3.0.1", - "next": "^14.2.3", + "next": "^15.0.2", "npm": "^10.8.0", "postcss": "^8.4.31", + "posthog-js": "^1.176.0", "prismjs": "^1.29.0", "react": "^18.3.1", + "react-day-picker": "^8.10.1", "react-dom": "^18.3.1", "react-dropzone": "^14.2.3", "react-icons": "^4.8.0", "react-loader-spinner": "^5.4.5", "react-markdown": "^9.0.1", "react-select": "^5.8.0", + "recharts": "^2.13.1", "rehype-prism-plus": "^2.0.0", "remark-gfm": "^4.0.0", "semver": "^7.5.4", - "sharp": "^0.32.6", + "sharp": "^0.33.5", + "stripe": "^17.0.0", "swr": "^2.1.5", + "tailwind-merge": "^2.5.4", "tailwindcss": "^3.3.1", + "tailwindcss-animate": "^1.0.7", "typescript": "5.0.3", "uuid": "^9.0.1", - "yup": "^1.1.1" + "yup": "^1.4.0" }, "devDependencies": { "@tailwindcss/typography": "^0.5.10", @@ -71,7 +89,6 @@ "version": "2.3.0", "resolved": "https://registry.npmjs.org/@ampproject/remapping/-/remapping-2.3.0.tgz", "integrity": "sha512-30iZtAPgz+LTIYoeivqYo853f02jBYSd5uGnGpkFV0M3xOt9aN73erkgYAmZU43x4VfqcnLxW9Kpg3R5LC4YYw==", - "peer": true, "dependencies": { "@jridgewell/gen-mapping": "^0.3.5", "@jridgewell/trace-mapping": "^0.3.24" @@ -96,7 +113,6 @@ "version": "7.24.4", "resolved": "https://registry.npmjs.org/@babel/compat-data/-/compat-data-7.24.4.tgz", "integrity": "sha512-vg8Gih2MLK+kOkHJp4gBEIkyaIi00jgWot2D9QOmmfLC8jINSOzmCLta6Bvz/JSBCqnegV0L80jhxkol5GWNfQ==", - "peer": true, "engines": { "node": ">=6.9.0" } @@ -105,7 +121,6 @@ "version": "7.24.5", "resolved": "https://registry.npmjs.org/@babel/core/-/core-7.24.5.tgz", "integrity": "sha512-tVQRucExLQ02Boi4vdPp49svNGcfL2GhdTCT9aldhXgCJVAI21EtRfBettiuLUwce/7r6bFdgs6JFkcdTiFttA==", - "peer": true, "dependencies": { "@ampproject/remapping": "^2.2.0", "@babel/code-frame": "^7.24.2", @@ -135,7 +150,6 @@ "version": "6.3.1", "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", - "peer": true, "bin": { "semver": "bin/semver.js" } @@ -155,11 +169,11 @@ } }, "node_modules/@babel/helper-annotate-as-pure": { - "version": "7.22.5", - "resolved": "https://registry.npmjs.org/@babel/helper-annotate-as-pure/-/helper-annotate-as-pure-7.22.5.tgz", - "integrity": "sha512-LvBTxu8bQSQkcyKOU+a1btnNFQ1dMAd0R6PyW3arXes06F6QLWLIrd681bxRPIXlrMGR3XYnW9JyML7dP3qgxg==", + "version": "7.25.9", + "resolved": "https://registry.npmjs.org/@babel/helper-annotate-as-pure/-/helper-annotate-as-pure-7.25.9.tgz", + "integrity": "sha512-gv7320KBUFJz1RnylIg5WWYPRXKZ884AGkYpgpWW02TH66Dl+HaC1t1CKd0z3R4b6hdYEcmrNZHUmfCP+1u3/g==", "dependencies": { - "@babel/types": "^7.22.5" + "@babel/types": "^7.25.9" }, "engines": { "node": ">=6.9.0" @@ -169,7 +183,6 @@ "version": "7.23.6", "resolved": "https://registry.npmjs.org/@babel/helper-compilation-targets/-/helper-compilation-targets-7.23.6.tgz", "integrity": "sha512-9JB548GZoQVmzrFgp8o7KxdgkTGm6xs9DW0o/Pim72UDjzr5ObUQ6ZzYPqA+g9OTS2bBQoctLJrky0RDCAWRgQ==", - "peer": true, "dependencies": { "@babel/compat-data": "^7.23.5", "@babel/helper-validator-option": "^7.23.5", @@ -185,7 +198,6 @@ "version": "5.1.1", "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz", "integrity": "sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==", - "peer": true, "dependencies": { "yallist": "^3.0.2" } @@ -194,7 +206,6 @@ "version": "6.3.1", "resolved": "https://registry.npmjs.org/semver/-/semver-6.3.1.tgz", "integrity": "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==", - "peer": true, "bin": { "semver": "bin/semver.js" } @@ -245,7 +256,6 @@ "version": "7.24.5", "resolved": "https://registry.npmjs.org/@babel/helper-module-transforms/-/helper-module-transforms-7.24.5.tgz", "integrity": "sha512-9GxeY8c2d2mdQUP1Dye0ks3VDyIMS98kt/llQ2nUId8IsWqTF0l1LkSX0/uP7l7MCDrzXS009Hyhe2gzTiGW8A==", - "peer": true, "dependencies": { "@babel/helper-environment-visitor": "^7.22.20", "@babel/helper-module-imports": "^7.24.3", @@ -261,9 +271,9 @@ } }, "node_modules/@babel/helper-plugin-utils": { - "version": "7.24.5", - "resolved": "https://registry.npmjs.org/@babel/helper-plugin-utils/-/helper-plugin-utils-7.24.5.tgz", - "integrity": "sha512-xjNLDopRzW2o6ba0gKbkZq5YWEBaK3PCyTOY1K2P/O07LGMhMqlMXPxwN4S5/RhWuCobT8z0jrlKGlYmeR1OhQ==", + "version": "7.25.9", + "resolved": "https://registry.npmjs.org/@babel/helper-plugin-utils/-/helper-plugin-utils-7.25.9.tgz", + "integrity": "sha512-kSMlyUVdWe25rEsRGviIgOWnoT/nfABVWlqt9N19/dIPWViAOW2s9wznP5tURbs/IDuNk4gPy3YdYRgH3uxhBw==", "engines": { "node": ">=6.9.0" } @@ -272,7 +282,6 @@ "version": "7.24.5", "resolved": "https://registry.npmjs.org/@babel/helper-simple-access/-/helper-simple-access-7.24.5.tgz", "integrity": "sha512-uH3Hmf5q5n7n8mz7arjUlDOCbttY/DW4DYhE6FUsjKJ/oYC1kQQUvwEQWxRwUpX9qQKRXeqLwWxrqilMrf32sQ==", - "peer": true, "dependencies": { "@babel/types": "^7.24.5" }, @@ -292,17 +301,17 @@ } }, "node_modules/@babel/helper-string-parser": { - "version": "7.24.1", - "resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.24.1.tgz", - "integrity": "sha512-2ofRCjnnA9y+wk8b9IAREroeUP02KHp431N2mhKniy2yKIDKpbrHv9eXwm8cBeWQYcJmzv5qKCu65P47eCF7CQ==", + "version": "7.25.9", + "resolved": "https://registry.npmjs.org/@babel/helper-string-parser/-/helper-string-parser-7.25.9.tgz", + "integrity": "sha512-4A/SCr/2KLd5jrtOMFzaKjVtAei3+2r/NChoBNoZ3EyP/+GlhoaEGoWOZUmFmoITP7zOJyHIMm+DYRd8o3PvHA==", "engines": { "node": ">=6.9.0" } }, "node_modules/@babel/helper-validator-identifier": { - "version": "7.24.5", - "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.24.5.tgz", - "integrity": "sha512-3q93SSKX2TWCG30M2G2kwaKeTYgEUp5Snjuj8qm729SObL6nbtUldAi37qbxkD5gg3xnBio+f9nqpSepGZMvxA==", + "version": "7.25.9", + "resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.25.9.tgz", + "integrity": "sha512-Ed61U6XJc3CVRfkERJWDz4dJwKe7iLmmJsbOGu9wSloNSFttHV0I8g6UAgb7qnK5ly5bGLPd4oXZlxCdANBOWQ==", "engines": { "node": ">=6.9.0" } @@ -311,7 +320,6 @@ "version": "7.23.5", "resolved": "https://registry.npmjs.org/@babel/helper-validator-option/-/helper-validator-option-7.23.5.tgz", "integrity": "sha512-85ttAOMLsr53VgXkTbkx8oA6YTfT4q7/HzXSLEYmjcSTJPMPQtvq1BD79Byep5xMUYbGRzEpDsjUf3dyp54IKw==", - "peer": true, "engines": { "node": ">=6.9.0" } @@ -320,7 +328,6 @@ "version": "7.24.5", "resolved": "https://registry.npmjs.org/@babel/helpers/-/helpers-7.24.5.tgz", "integrity": "sha512-CiQmBMMpMQHwM5m01YnrM6imUG1ebgYJ+fAIW4FZe6m4qHTPaRHti+R8cggAwkdz4oXhtO4/K9JWlh+8hIfR2Q==", - "peer": true, "dependencies": { "@babel/template": "^7.24.0", "@babel/traverse": "^7.24.5", @@ -420,11 +427,11 @@ } }, "node_modules/@babel/plugin-syntax-jsx": { - "version": "7.24.1", - "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-jsx/-/plugin-syntax-jsx-7.24.1.tgz", - "integrity": "sha512-2eCtxZXf+kbkMIsXS4poTvT4Yu5rXiRa+9xGVT56raghjmBTKMpFNc9R4IDiB4emao9eO22Ox7CxuJG7BgExqA==", + "version": "7.25.9", + "resolved": "https://registry.npmjs.org/@babel/plugin-syntax-jsx/-/plugin-syntax-jsx-7.25.9.tgz", + "integrity": "sha512-ld6oezHQMZsZfp6pWtbjaNDF2tiiCYYDqQszHt5VV437lewP9aSi2Of99CK0D0XB21k7FLgnLcmQKyKzynfeAA==", "dependencies": { - "@babel/helper-plugin-utils": "^7.24.0" + "@babel/helper-plugin-utils": "^7.25.9" }, "engines": { "node": ">=6.9.0" @@ -486,29 +493,17 @@ } }, "node_modules/@babel/types": { - "version": "7.24.5", - "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.24.5.tgz", - "integrity": "sha512-6mQNsaLeXTw0nxYUYu+NSa4Hx4BlF1x1x8/PMFbiR+GBSr+2DkECc69b8hgy2frEodNcvPffeH8YfWd3LI6jhQ==", + "version": "7.26.0", + "resolved": "https://registry.npmjs.org/@babel/types/-/types-7.26.0.tgz", + "integrity": "sha512-Z/yiTPj+lDVnF7lWeKCIJzaIkI0vYO87dMpZ4bg4TDrFe4XXLFWL1TbXU27gBP3QccxV9mZICCrnjnYlJjXHOA==", "dependencies": { - "@babel/helper-string-parser": "^7.24.1", - "@babel/helper-validator-identifier": "^7.24.5", - "to-fast-properties": "^2.0.0" + "@babel/helper-string-parser": "^7.25.9", + "@babel/helper-validator-identifier": "^7.25.9" }, "engines": { "node": ">=6.9.0" } }, - "node_modules/@dnd-kit/accessibility": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/@dnd-kit/accessibility/-/accessibility-3.1.0.tgz", - "integrity": "sha512-ea7IkhKvlJUv9iSHJOnxinBcoOI3ppGnnL+VDJ75O45Nss6HtZd8IdN8touXPDtASfeI2T2LImb8VOZcL47wjQ==", - "dependencies": { - "tslib": "^2.0.0" - }, - "peerDependencies": { - "react": ">=16.8.0" - } - }, "node_modules/@dnd-kit/core": { "version": "6.1.0", "resolved": "https://registry.npmjs.org/@dnd-kit/core/-/core-6.1.0.tgz", @@ -523,6 +518,17 @@ "react-dom": ">=16.8.0" } }, + "node_modules/@dnd-kit/core/node_modules/@dnd-kit/accessibility": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/@dnd-kit/accessibility/-/accessibility-3.1.0.tgz", + "integrity": "sha512-ea7IkhKvlJUv9iSHJOnxinBcoOI3ppGnnL+VDJ75O45Nss6HtZd8IdN8touXPDtASfeI2T2LImb8VOZcL47wjQ==", + "dependencies": { + "tslib": "^2.0.0" + }, + "peerDependencies": { + "react": ">=16.8.0" + } + }, "node_modules/@dnd-kit/modifiers": { "version": "7.0.0", "resolved": "https://registry.npmjs.org/@dnd-kit/modifiers/-/modifiers-7.0.0.tgz", @@ -560,16 +566,25 @@ "react": ">=16.8.0" } }, + "node_modules/@emnapi/runtime": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.3.1.tgz", + "integrity": "sha512-kEBmG8KyqtxJZv+ygbEim+KCGtIq1fC22Ms3S4ziXmYKm8uyoLX0MHONVKwp+9opg390VaKRNt4a7A9NwmpNhw==", + "optional": true, + "dependencies": { + "tslib": "^2.4.0" + } + }, "node_modules/@emotion/babel-plugin": { - "version": "11.11.0", - "resolved": "https://registry.npmjs.org/@emotion/babel-plugin/-/babel-plugin-11.11.0.tgz", - "integrity": "sha512-m4HEDZleaaCH+XgDDsPF15Ht6wTLsgDTeR3WYj9Q/k76JtWhrJjcP4+/XlG8LGT/Rol9qUfOIztXeA84ATpqPQ==", + "version": "11.12.0", + "resolved": "https://registry.npmjs.org/@emotion/babel-plugin/-/babel-plugin-11.12.0.tgz", + "integrity": "sha512-y2WQb+oP8Jqvvclh8Q55gLUyb7UFvgv7eJfsj7td5TToBrIUtPay2kMrZi4xjq9qw2vD0ZR5fSho0yqoFgX7Rw==", "dependencies": { "@babel/helper-module-imports": "^7.16.7", "@babel/runtime": "^7.18.3", - "@emotion/hash": "^0.9.1", - "@emotion/memoize": "^0.8.1", - "@emotion/serialize": "^1.1.2", + "@emotion/hash": "^0.9.2", + "@emotion/memoize": "^0.9.0", + "@emotion/serialize": "^1.2.0", "babel-plugin-macros": "^3.1.0", "convert-source-map": "^1.5.0", "escape-string-regexp": "^4.0.0", @@ -584,79 +599,56 @@ "integrity": "sha512-ASFBup0Mz1uyiIjANan1jzLQami9z1PoYSZCiiYW2FczPbenXc45FZdBZLzOT+r6+iciuEModtmCti+hjaAk0A==" }, "node_modules/@emotion/cache": { - "version": "11.11.0", - "resolved": "https://registry.npmjs.org/@emotion/cache/-/cache-11.11.0.tgz", - "integrity": "sha512-P34z9ssTCBi3e9EI1ZsWpNHcfY1r09ZO0rZbRO2ob3ZQMnFI35jB536qoXbkdesr5EUhYi22anuEJuyxifaqAQ==", - "dependencies": { - "@emotion/memoize": "^0.8.1", - "@emotion/sheet": "^1.2.2", - "@emotion/utils": "^1.2.1", - "@emotion/weak-memoize": "^0.3.1", + "version": "11.13.1", + "resolved": "https://registry.npmjs.org/@emotion/cache/-/cache-11.13.1.tgz", + "integrity": "sha512-iqouYkuEblRcXmylXIwwOodiEK5Ifl7JcX7o6V4jI3iW4mLXX3dmt5xwBtIkJiQEXFAI+pC8X0i67yiPkH9Ucw==", + "dependencies": { + "@emotion/memoize": "^0.9.0", + "@emotion/sheet": "^1.4.0", + "@emotion/utils": "^1.4.0", + "@emotion/weak-memoize": "^0.4.0", "stylis": "4.2.0" } }, "node_modules/@emotion/hash": { - "version": "0.9.1", - "resolved": "https://registry.npmjs.org/@emotion/hash/-/hash-0.9.1.tgz", - "integrity": "sha512-gJB6HLm5rYwSLI6PQa+X1t5CFGrv1J1TWG+sOyMCeKz2ojaj6Fnl/rZEspogG+cvqbt4AE/2eIyD2QfLKTBNlQ==" + "version": "0.9.2", + "resolved": "https://registry.npmjs.org/@emotion/hash/-/hash-0.9.2.tgz", + "integrity": "sha512-MyqliTZGuOm3+5ZRSaaBGP3USLw6+EGykkwZns2EPC5g8jJ4z9OrdZY9apkl3+UP9+sdz76YYkwCKP5gh8iY3g==" }, "node_modules/@emotion/is-prop-valid": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/@emotion/is-prop-valid/-/is-prop-valid-1.2.2.tgz", - "integrity": "sha512-uNsoYd37AFmaCdXlg6EYD1KaPOaRWRByMCYzbKUX4+hhMfrxdVSelShywL4JVaAeM/eHUOSprYBQls+/neX3pw==", + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/@emotion/is-prop-valid/-/is-prop-valid-1.3.1.tgz", + "integrity": "sha512-/ACwoqx7XQi9knQs/G0qKvv5teDMhD7bXYns9N/wM8ah8iNb8jZ2uNO0YOgiq2o2poIvVtJS2YALasQuMSQ7Kw==", "dependencies": { - "@emotion/memoize": "^0.8.1" + "@emotion/memoize": "^0.9.0" } }, "node_modules/@emotion/memoize": { - "version": "0.8.1", - "resolved": "https://registry.npmjs.org/@emotion/memoize/-/memoize-0.8.1.tgz", - "integrity": "sha512-W2P2c/VRW1/1tLox0mVUalvnWXxavmv/Oum2aPsRcoDJuob75FC3Y8FbpfLwUegRcxINtGUMPq0tFCvYNTBXNA==" - }, - "node_modules/@emotion/react": { - "version": "11.11.4", - "resolved": "https://registry.npmjs.org/@emotion/react/-/react-11.11.4.tgz", - "integrity": "sha512-t8AjMlF0gHpvvxk5mAtCqR4vmxiGHCeJBaQO6gncUSdklELOgtwjerNY2yuJNfwnc6vi16U/+uMF+afIawJ9iw==", - "dependencies": { - "@babel/runtime": "^7.18.3", - "@emotion/babel-plugin": "^11.11.0", - "@emotion/cache": "^11.11.0", - "@emotion/serialize": "^1.1.3", - "@emotion/use-insertion-effect-with-fallbacks": "^1.0.1", - "@emotion/utils": "^1.2.1", - "@emotion/weak-memoize": "^0.3.1", - "hoist-non-react-statics": "^3.3.1" - }, - "peerDependencies": { - "react": ">=16.8.0" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } + "version": "0.9.0", + "resolved": "https://registry.npmjs.org/@emotion/memoize/-/memoize-0.9.0.tgz", + "integrity": "sha512-30FAj7/EoJ5mwVPOWhAyCX+FPfMDrVecJAM+Iw9NRoSl4BBAQeqj4cApHHUXOVvIPgLVDsCFoz/hGD+5QQD1GQ==" }, "node_modules/@emotion/serialize": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/@emotion/serialize/-/serialize-1.1.4.tgz", - "integrity": "sha512-RIN04MBT8g+FnDwgvIUi8czvr1LU1alUMI05LekWB5DGyTm8cCBMCRpq3GqaiyEDRptEXOyXnvZ58GZYu4kBxQ==", + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/@emotion/serialize/-/serialize-1.3.2.tgz", + "integrity": "sha512-grVnMvVPK9yUVE6rkKfAJlYZgo0cu3l9iMC77V7DW6E1DUIrU68pSEXRmFZFOFB1QFo57TncmOcvcbMDWsL4yA==", "dependencies": { - "@emotion/hash": "^0.9.1", - "@emotion/memoize": "^0.8.1", - "@emotion/unitless": "^0.8.1", - "@emotion/utils": "^1.2.1", + "@emotion/hash": "^0.9.2", + "@emotion/memoize": "^0.9.0", + "@emotion/unitless": "^0.10.0", + "@emotion/utils": "^1.4.1", "csstype": "^3.0.2" } }, "node_modules/@emotion/serialize/node_modules/@emotion/unitless": { - "version": "0.8.1", - "resolved": "https://registry.npmjs.org/@emotion/unitless/-/unitless-0.8.1.tgz", - "integrity": "sha512-KOEGMu6dmJZtpadb476IsZBclKvILjopjUii3V+7MnXIQCYh8W3NgNcgwo21n9LXZX6EDIKvqfjYxXebDwxKmQ==" + "version": "0.10.0", + "resolved": "https://registry.npmjs.org/@emotion/unitless/-/unitless-0.10.0.tgz", + "integrity": "sha512-dFoMUuQA20zvtVTuxZww6OHoJYgrzfKM1t52mVySDJnMSEa08ruEvdYQbhvyu6soU+NeLVd3yKfTfT0NeV6qGg==" }, "node_modules/@emotion/sheet": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/@emotion/sheet/-/sheet-1.2.2.tgz", - "integrity": "sha512-0QBtGvaqtWi+nx6doRwDdBIzhNdZrXUppvTM4dtZZWEGTXL/XE/yJxLMGlDT1Gt+UHH5IX1n+jkXyytE/av7OA==" + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/@emotion/sheet/-/sheet-1.4.0.tgz", + "integrity": "sha512-fTBW9/8r2w3dXWYM4HCB1Rdp8NLibOw2+XELH5m5+AkWiL/KqYX6dc0kKYlaYyKjrQ6ds33MCdMPEwgs2z1rqg==" }, "node_modules/@emotion/stylis": { "version": "0.8.5", @@ -668,23 +660,15 @@ "resolved": "https://registry.npmjs.org/@emotion/unitless/-/unitless-0.7.5.tgz", "integrity": "sha512-OWORNpfjMsSSUBVrRBVGECkhWcULOAJz9ZW8uK9qgxD+87M7jHRcvh/A96XXNhXTLmKcoYSQtBEX7lHMO7YRwg==" }, - "node_modules/@emotion/use-insertion-effect-with-fallbacks": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/@emotion/use-insertion-effect-with-fallbacks/-/use-insertion-effect-with-fallbacks-1.0.1.tgz", - "integrity": "sha512-jT/qyKZ9rzLErtrjGgdkMBn2OP8wl0G3sQlBb3YPryvKHsjvINUhVaPFfP+fpBcOkmrVOVEEHQFJ7nbj2TH2gw==", - "peerDependencies": { - "react": ">=16.8.0" - } - }, "node_modules/@emotion/utils": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/@emotion/utils/-/utils-1.2.1.tgz", - "integrity": "sha512-Y2tGf3I+XVnajdItskUCn6LX+VUDmP6lTL4fcqsXAv43dnlbZiuW4MWQW38rW/BVWSE7Q/7+XQocmpnRYILUmg==" + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/@emotion/utils/-/utils-1.4.1.tgz", + "integrity": "sha512-BymCXzCG3r72VKJxaYVwOXATqXIZ85cuvg0YOUDxMGNrKc1DJRZk8MgV5wyXRyEayIMd4FuXJIUgTBXvDNW5cA==" }, "node_modules/@emotion/weak-memoize": { - "version": "0.3.1", - "resolved": "https://registry.npmjs.org/@emotion/weak-memoize/-/weak-memoize-0.3.1.tgz", - "integrity": "sha512-EsBwpc7hBUJWAsNPBmJy4hxWx12v6bshQsldrVmjxJoc3isbxhOrF2IcCpaXxfvq03NwkI7sbsOLXbYuqF/8Ww==" + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/@emotion/weak-memoize/-/weak-memoize-0.4.0.tgz", + "integrity": "sha512-snKqtPW01tN0ui7yu9rGv69aJXr/a/Ywvl11sUjNtEcRc+ng/mQriFL0wLXMef74iHa/EkftbDzU9F8iFbH+zg==" }, "node_modules/@eslint-community/eslint-utils": { "version": "4.4.0", @@ -759,69 +743,201 @@ "@floating-ui/utils": "^0.2.0" } }, - "node_modules/@floating-ui/react": { - "version": "0.19.2", - "resolved": "https://registry.npmjs.org/@floating-ui/react/-/react-0.19.2.tgz", - "integrity": "sha512-JyNk4A0Ezirq8FlXECvRtQOX/iBe5Ize0W/pLkrZjfHW9GUV7Xnq6zm6fyZuQzaHHqEnVizmvlA96e1/CkZv+w==", + "node_modules/@floating-ui/react-dom": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/@floating-ui/react-dom/-/react-dom-2.1.2.tgz", + "integrity": "sha512-06okr5cgPzMNBy+Ycse2A6udMi4bqwW/zgBF/rwjcNqWkyr82Mcg8b0vjX8OJpZFy/FKjJmw6wV7t44kK6kW7A==", "dependencies": { - "@floating-ui/react-dom": "^1.3.0", - "aria-hidden": "^1.1.3", - "tabbable": "^6.0.1" + "@floating-ui/dom": "^1.0.0" }, "peerDependencies": { "react": ">=16.8.0", "react-dom": ">=16.8.0" } }, - "node_modules/@floating-ui/react-dom": { - "version": "2.0.9", - "resolved": "https://registry.npmjs.org/@floating-ui/react-dom/-/react-dom-2.0.9.tgz", - "integrity": "sha512-q0umO0+LQK4+p6aGyvzASqKbKOJcAHJ7ycE9CuUvfx3s9zTHWmGJTPOIlM/hmSBfUfg/XfY5YhLBLR/LHwShQQ==", + "node_modules/@floating-ui/utils": { + "version": "0.2.8", + "resolved": "https://registry.npmjs.org/@floating-ui/utils/-/utils-0.2.8.tgz", + "integrity": "sha512-kym7SodPp8/wloecOpcmSnWJsK7M0E5Wg8UcFA+uO4B9s5d0ywXOEro/8HM9x0rW+TljRzul/14UYz3TleT3ig==" + }, + "node_modules/@headlessui/react": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@headlessui/react/-/react-2.2.0.tgz", + "integrity": "sha512-RzCEg+LXsuI7mHiSomsu/gBJSjpupm6A1qIZ5sWjd7JhARNlMiSA4kKfJpCKwU9tE+zMRterhhrP74PvfJrpXQ==", "dependencies": { - "@floating-ui/dom": "^1.0.0" + "@floating-ui/react": "^0.26.16", + "@react-aria/focus": "^3.17.1", + "@react-aria/interactions": "^3.21.3", + "@tanstack/react-virtual": "^3.8.1" + }, + "engines": { + "node": ">=10" }, "peerDependencies": { - "react": ">=16.8.0", - "react-dom": ">=16.8.0" + "react": "^18 || ^19 || ^19.0.0-rc", + "react-dom": "^18 || ^19 || ^19.0.0-rc" } }, - "node_modules/@floating-ui/react/node_modules/@floating-ui/react-dom": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/@floating-ui/react-dom/-/react-dom-1.3.0.tgz", - "integrity": "sha512-htwHm67Ji5E/pROEAr7f8IKFShuiCKHwUC/UY4vC3I5jiSvGFAYnSYiZO5MlGmads+QqvUkR9ANHEguGrDv72g==", + "node_modules/@headlessui/react/node_modules/@floating-ui/react": { + "version": "0.26.27", + "resolved": "https://registry.npmjs.org/@floating-ui/react/-/react-0.26.27.tgz", + "integrity": "sha512-jLP72x0Kr2CgY6eTYi/ra3VA9LOkTo4C+DUTrbFgFOExKy3omYVmwMjNKqxAHdsnyLS96BIDLcO2SlnsNf8KUQ==", "dependencies": { - "@floating-ui/dom": "^1.2.1" + "@floating-ui/react-dom": "^2.1.2", + "@floating-ui/utils": "^0.2.8", + "tabbable": "^6.0.0" }, "peerDependencies": { "react": ">=16.8.0", "react-dom": ">=16.8.0" } }, - "node_modules/@floating-ui/utils": { - "version": "0.2.2", - "resolved": "https://registry.npmjs.org/@floating-ui/utils/-/utils-0.2.2.tgz", - "integrity": "sha512-J4yDIIthosAsRZ5CPYP/jQvUAQtlZTTD/4suA08/FEnlxqW3sKS9iAhgsa9VYLZ6vDHn/ixJgIqRQPotoBjxIw==" + "node_modules/@headlessui/react/node_modules/@react-aria/focus": { + "version": "3.18.4", + "resolved": "https://registry.npmjs.org/@react-aria/focus/-/focus-3.18.4.tgz", + "integrity": "sha512-91J35077w9UNaMK1cpMUEFRkNNz0uZjnSwiyBCFuRdaVuivO53wNC9XtWSDNDdcO5cGy87vfJRVAiyoCn/mjqA==", + "dependencies": { + "@react-aria/interactions": "^3.22.4", + "@react-aria/utils": "^3.25.3", + "@react-types/shared": "^3.25.0", + "@swc/helpers": "^0.5.0", + "clsx": "^2.0.0" + }, + "peerDependencies": { + "react": "^16.8.0 || ^17.0.0-rc.1 || ^18.0.0 || ^19.0.0" + } }, - "node_modules/@headlessui/react": { - "version": "1.7.19", - "resolved": "https://registry.npmjs.org/@headlessui/react/-/react-1.7.19.tgz", - "integrity": "sha512-Ll+8q3OlMJfJbAKM/+/Y2q6PPYbryqNTXDbryx7SXLIDamkF6iQFbriYHga0dY44PvDhvvBWCx1Xj4U5+G4hOw==", + "node_modules/@headlessui/react/node_modules/@react-aria/focus/node_modules/@react-aria/utils": { + "version": "3.25.3", + "resolved": "https://registry.npmjs.org/@react-aria/utils/-/utils-3.25.3.tgz", + "integrity": "sha512-PR5H/2vaD8fSq0H/UB9inNbc8KDcVmW6fYAfSWkkn+OAdhTTMVKqXXrZuZBWyFfSD5Ze7VN6acr4hrOQm2bmrA==", "dependencies": { - "@tanstack/react-virtual": "^3.0.0-beta.60", - "client-only": "^0.0.1" + "@react-aria/ssr": "^3.9.6", + "@react-stately/utils": "^3.10.4", + "@react-types/shared": "^3.25.0", + "@swc/helpers": "^0.5.0", + "clsx": "^2.0.0" + }, + "peerDependencies": { + "react": "^16.8.0 || ^17.0.0-rc.1 || ^18.0.0 || ^19.0.0" + } + }, + "node_modules/@headlessui/react/node_modules/@react-aria/focus/node_modules/@react-aria/utils/node_modules/@react-aria/ssr": { + "version": "3.9.6", + "resolved": "https://registry.npmjs.org/@react-aria/ssr/-/ssr-3.9.6.tgz", + "integrity": "sha512-iLo82l82ilMiVGy342SELjshuWottlb5+VefO3jOQqQRNYnJBFpUSadswDPbRimSgJUZuFwIEYs6AabkP038fA==", + "dependencies": { + "@swc/helpers": "^0.5.0" }, "engines": { - "node": ">=10" + "node": ">= 12" + }, + "peerDependencies": { + "react": "^16.8.0 || ^17.0.0-rc.1 || ^18.0.0 || ^19.0.0" + } + }, + "node_modules/@headlessui/react/node_modules/@react-aria/focus/node_modules/@react-aria/utils/node_modules/@react-stately/utils": { + "version": "3.10.4", + "resolved": "https://registry.npmjs.org/@react-stately/utils/-/utils-3.10.4.tgz", + "integrity": "sha512-gBEQEIMRh5f60KCm7QKQ2WfvhB2gLUr9b72sqUdIZ2EG+xuPgaIlCBeSicvjmjBvYZwOjoOEnmIkcx2GHp/HWw==", + "dependencies": { + "@swc/helpers": "^0.5.0" + }, + "peerDependencies": { + "react": "^16.8.0 || ^17.0.0-rc.1 || ^18.0.0 || ^19.0.0" + } + }, + "node_modules/@headlessui/react/node_modules/@react-aria/focus/node_modules/@react-types/shared": { + "version": "3.25.0", + "resolved": "https://registry.npmjs.org/@react-types/shared/-/shared-3.25.0.tgz", + "integrity": "sha512-OZSyhzU6vTdW3eV/mz5i6hQwQUhkRs7xwY2d1aqPvTdMe0+2cY7Fwp45PAiwYLEj73i9ro2FxF9qC4DvHGSCgQ==", + "peerDependencies": { + "react": "^16.8.0 || ^17.0.0-rc.1 || ^18.0.0 || ^19.0.0" + } + }, + "node_modules/@headlessui/react/node_modules/@react-aria/interactions": { + "version": "3.22.4", + "resolved": "https://registry.npmjs.org/@react-aria/interactions/-/interactions-3.22.4.tgz", + "integrity": "sha512-E0vsgtpItmknq/MJELqYJwib+YN18Qag8nroqwjk1qOnBa9ROIkUhWJerLi1qs5diXq9LHKehZDXRlwPvdEFww==", + "dependencies": { + "@react-aria/ssr": "^3.9.6", + "@react-aria/utils": "^3.25.3", + "@react-types/shared": "^3.25.0", + "@swc/helpers": "^0.5.0" + }, + "peerDependencies": { + "react": "^16.8.0 || ^17.0.0-rc.1 || ^18.0.0 || ^19.0.0" + } + }, + "node_modules/@headlessui/react/node_modules/@react-aria/interactions/node_modules/@react-aria/ssr": { + "version": "3.9.6", + "resolved": "https://registry.npmjs.org/@react-aria/ssr/-/ssr-3.9.6.tgz", + "integrity": "sha512-iLo82l82ilMiVGy342SELjshuWottlb5+VefO3jOQqQRNYnJBFpUSadswDPbRimSgJUZuFwIEYs6AabkP038fA==", + "dependencies": { + "@swc/helpers": "^0.5.0" + }, + "engines": { + "node": ">= 12" + }, + "peerDependencies": { + "react": "^16.8.0 || ^17.0.0-rc.1 || ^18.0.0 || ^19.0.0" + } + }, + "node_modules/@headlessui/react/node_modules/@react-aria/interactions/node_modules/@react-aria/utils": { + "version": "3.25.3", + "resolved": "https://registry.npmjs.org/@react-aria/utils/-/utils-3.25.3.tgz", + "integrity": "sha512-PR5H/2vaD8fSq0H/UB9inNbc8KDcVmW6fYAfSWkkn+OAdhTTMVKqXXrZuZBWyFfSD5Ze7VN6acr4hrOQm2bmrA==", + "dependencies": { + "@react-aria/ssr": "^3.9.6", + "@react-stately/utils": "^3.10.4", + "@react-types/shared": "^3.25.0", + "@swc/helpers": "^0.5.0", + "clsx": "^2.0.0" + }, + "peerDependencies": { + "react": "^16.8.0 || ^17.0.0-rc.1 || ^18.0.0 || ^19.0.0" + } + }, + "node_modules/@headlessui/react/node_modules/@react-aria/interactions/node_modules/@react-aria/utils/node_modules/@react-stately/utils": { + "version": "3.10.4", + "resolved": "https://registry.npmjs.org/@react-stately/utils/-/utils-3.10.4.tgz", + "integrity": "sha512-gBEQEIMRh5f60KCm7QKQ2WfvhB2gLUr9b72sqUdIZ2EG+xuPgaIlCBeSicvjmjBvYZwOjoOEnmIkcx2GHp/HWw==", + "dependencies": { + "@swc/helpers": "^0.5.0" + }, + "peerDependencies": { + "react": "^16.8.0 || ^17.0.0-rc.1 || ^18.0.0 || ^19.0.0" + } + }, + "node_modules/@headlessui/react/node_modules/@react-aria/interactions/node_modules/@react-types/shared": { + "version": "3.25.0", + "resolved": "https://registry.npmjs.org/@react-types/shared/-/shared-3.25.0.tgz", + "integrity": "sha512-OZSyhzU6vTdW3eV/mz5i6hQwQUhkRs7xwY2d1aqPvTdMe0+2cY7Fwp45PAiwYLEj73i9ro2FxF9qC4DvHGSCgQ==", + "peerDependencies": { + "react": "^16.8.0 || ^17.0.0-rc.1 || ^18.0.0 || ^19.0.0" + } + }, + "node_modules/@headlessui/react/node_modules/@tanstack/react-virtual": { + "version": "3.10.8", + "resolved": "https://registry.npmjs.org/@tanstack/react-virtual/-/react-virtual-3.10.8.tgz", + "integrity": "sha512-VbzbVGSsZlQktyLrP5nxE+vE1ZR+U0NFAWPbJLoG2+DKPwd2D7dVICTVIIaYlJqX1ZCEnYDbaOpmMwbsyhBoIA==", + "dependencies": { + "@tanstack/virtual-core": "3.10.8" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/tannerlinsley" }, "peerDependencies": { - "react": "^16 || ^17 || ^18", - "react-dom": "^16 || ^17 || ^18" + "react": "^16.8.0 || ^17.0.0 || ^18.0.0", + "react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0" } }, "node_modules/@headlessui/tailwindcss": { - "version": "0.2.0", - "resolved": "https://registry.npmjs.org/@headlessui/tailwindcss/-/tailwindcss-0.2.0.tgz", - "integrity": "sha512-fpL830Fln1SykOCboExsWr3JIVeQKieLJ3XytLe/tt1A0XzqUthOftDmjcCYLW62w7mQI7wXcoPXr3tZ9QfGxw==", + "version": "0.2.1", + "resolved": "https://registry.npmjs.org/@headlessui/tailwindcss/-/tailwindcss-0.2.1.tgz", + "integrity": "sha512-2+5+NZ+RzMyrVeCZOxdbvkUSssSxGvcUxphkIfSVLpRiKsj+/63T2TOL9dBYMXVfj/CGr6hMxSRInzXv6YY7sA==", + "license": "MIT", "engines": { "node": ">=10" }, @@ -862,108 +978,10 @@ "integrity": "sha512-93zYdMES/c1D69yZiKDBj0V24vqNzB/koF26KPaagAfd3P/4gUlh3Dys5ogAK+Exi9QyzlD8x/08Zt7wIKcDcA==", "dev": true }, - "node_modules/@isaacs/cliui": { - "version": "8.0.2", - "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz", - "integrity": "sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==", - "dependencies": { - "string-width": "^5.1.2", - "string-width-cjs": "npm:string-width@^4.2.0", - "strip-ansi": "^7.0.1", - "strip-ansi-cjs": "npm:strip-ansi@^6.0.1", - "wrap-ansi": "^8.1.0", - "wrap-ansi-cjs": "npm:wrap-ansi@^7.0.0" - }, - "engines": { - "node": ">=12" - } - }, - "node_modules/@isaacs/cliui/node_modules/ansi-regex": { - "version": "6.0.1", - "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.0.1.tgz", - "integrity": "sha512-n5M855fKb2SsfMIiFFoVrABHJC8QtHwVx+mHWP3QcEqBHYienj5dHSgjbxtC0WEZXYt4wcD6zrQElDPhFuZgfA==", - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/ansi-regex?sponsor=1" - } - }, - "node_modules/@isaacs/cliui/node_modules/strip-ansi": { - "version": "7.1.0", - "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz", - "integrity": "sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==", - "dependencies": { - "ansi-regex": "^6.0.1" - }, - "engines": { - "node": ">=12" - }, - "funding": { - "url": "https://github.com/chalk/strip-ansi?sponsor=1" - } - }, - "node_modules/@jridgewell/gen-mapping": { - "version": "0.3.5", - "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.5.tgz", - "integrity": "sha512-IzL8ZoEDIBRWEzlCcRhOaCupYyN5gdIK+Q6fbFdPDg6HqX6jpkItn7DFIpW9LQzXG6Df9sA7+OKnq0qlz/GaQg==", - "dependencies": { - "@jridgewell/set-array": "^1.2.1", - "@jridgewell/sourcemap-codec": "^1.4.10", - "@jridgewell/trace-mapping": "^0.3.24" - }, - "engines": { - "node": ">=6.0.0" - } - }, - "node_modules/@jridgewell/resolve-uri": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz", - "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==", - "engines": { - "node": ">=6.0.0" - } - }, - "node_modules/@jridgewell/set-array": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/@jridgewell/set-array/-/set-array-1.2.1.tgz", - "integrity": "sha512-R8gLRTZeyp03ymzP/6Lil/28tGeGEzhx1q2k703KGWRAI1VdvPIXdG70VJc2pAMw3NA6JKL5hhFu1sJX0Mnn/A==", - "engines": { - "node": ">=6.0.0" - } - }, - "node_modules/@jridgewell/sourcemap-codec": { - "version": "1.4.15", - "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.4.15.tgz", - "integrity": "sha512-eF2rxCRulEKXHTRiDrDy6erMYWqNw4LPdQ8UQA4huuxaQsVeRPFl2oM8oDGxMFhJUWZf9McpLtJasDDZb/Bpeg==" - }, - "node_modules/@jridgewell/trace-mapping": { - "version": "0.3.25", - "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.25.tgz", - "integrity": "sha512-vNk6aEwybGtawWmy/PzwnGDOjCkLWSD2wqvjGGAgOAwCGWySYXfYoxt00IJkTF+8Lb57DwOb3Aa0o9CApepiYQ==", - "dependencies": { - "@jridgewell/resolve-uri": "^3.1.0", - "@jridgewell/sourcemap-codec": "^1.4.14" - } - }, - "node_modules/@next/env": { - "version": "14.2.3", - "resolved": "https://registry.npmjs.org/@next/env/-/env-14.2.3.tgz", - "integrity": "sha512-W7fd7IbkfmeeY2gXrzJYDx8D2lWKbVoTIj1o1ScPHNzvp30s1AuoEFSdr39bC5sjxJaxTtq3OTCZboNp0lNWHA==" - }, - "node_modules/@next/eslint-plugin-next": { - "version": "14.2.3", - "resolved": "https://registry.npmjs.org/@next/eslint-plugin-next/-/eslint-plugin-next-14.2.3.tgz", - "integrity": "sha512-L3oDricIIjgj1AVnRdRor21gI7mShlSwU/1ZGHmqM3LzHhXXhdkrfeNY5zif25Bi5Dd7fiJHsbhoZCHfXYvlAw==", - "dev": true, - "dependencies": { - "glob": "10.3.10" - } - }, - "node_modules/@next/swc-darwin-arm64": { - "version": "14.2.3", - "resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-14.2.3.tgz", - "integrity": "sha512-3pEYo/RaGqPP0YzwnlmPN2puaF2WMLM3apt5jLW2fFdXD9+pqcoTzRk+iZsf8ta7+quAe4Q6Ms0nR0SFGFdS1A==", + "node_modules/@img/sharp-darwin-arm64": { + "version": "0.33.5", + "resolved": "https://registry.npmjs.org/@img/sharp-darwin-arm64/-/sharp-darwin-arm64-0.33.5.tgz", + "integrity": "sha512-UT4p+iz/2H4twwAoLCqfA9UH5pI6DggwKEGuaPy7nCVQ8ZsiY5PIcrRvD1DzuY3qYL07NtIQcWnBSY/heikIFQ==", "cpu": [ "arm64" ], @@ -972,13 +990,19 @@ "darwin" ], "engines": { - "node": ">= 10" + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" + }, + "optionalDependencies": { + "@img/sharp-libvips-darwin-arm64": "1.0.4" } }, - "node_modules/@next/swc-darwin-x64": { - "version": "14.2.3", - "resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-14.2.3.tgz", - "integrity": "sha512-6adp7waE6P1TYFSXpY366xwsOnEXM+y1kgRpjSRVI2CBDOcbRjsJ67Z6EgKIqWIue52d2q/Mx8g9MszARj8IEA==", + "node_modules/@img/sharp-darwin-x64": { + "version": "0.33.5", + "resolved": "https://registry.npmjs.org/@img/sharp-darwin-x64/-/sharp-darwin-x64-0.33.5.tgz", + "integrity": "sha512-fyHac4jIc1ANYGRDxtiqelIbdWkIuQaI84Mv45KvGRRxSAa7o7d1ZKAOBaYbnepLC1WqxfpimdeWfvqqSGwR2Q==", "cpu": [ "x64" ], @@ -987,28 +1011,64 @@ "darwin" ], "engines": { - "node": ">= 10" - } + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" + }, + "optionalDependencies": { + "@img/sharp-libvips-darwin-x64": "1.0.4" + } }, - "node_modules/@next/swc-linux-arm64-gnu": { - "version": "14.2.3", - "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-14.2.3.tgz", - "integrity": "sha512-cuzCE/1G0ZSnTAHJPUT1rPgQx1w5tzSX7POXSLaS7w2nIUJUD+e25QoXD/hMfxbsT9rslEXugWypJMILBj/QsA==", + "node_modules/@img/sharp-libvips-darwin-arm64": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-arm64/-/sharp-libvips-darwin-arm64-1.0.4.tgz", + "integrity": "sha512-XblONe153h0O2zuFfTAbQYAX2JhYmDHeWikp1LM9Hul9gVPjFY427k6dFEcOL72O01QxQsWi761svJ/ev9xEDg==", "cpu": [ "arm64" ], "optional": true, + "os": [ + "darwin" + ], + "funding": { + "url": "https://opencollective.com/libvips" + } + }, + "node_modules/@img/sharp-libvips-darwin-x64": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-x64/-/sharp-libvips-darwin-x64-1.0.4.tgz", + "integrity": "sha512-xnGR8YuZYfJGmWPvmlunFaWJsb9T/AO2ykoP3Fz/0X5XV2aoYBPkX6xqCQvUTKKiLddarLaxpzNe+b1hjeWHAQ==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "darwin" + ], + "funding": { + "url": "https://opencollective.com/libvips" + } + }, + "node_modules/@img/sharp-libvips-linux-arm": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm/-/sharp-libvips-linux-arm-1.0.5.tgz", + "integrity": "sha512-gvcC4ACAOPRNATg/ov8/MnbxFDJqf/pDePbBnuBDcjsI8PssmjoKMAz4LtLaVi+OnSb5FK/yIOamqDwGmXW32g==", + "cpu": [ + "arm" + ], + "optional": true, "os": [ "linux" ], - "engines": { - "node": ">= 10" + "funding": { + "url": "https://opencollective.com/libvips" } }, - "node_modules/@next/swc-linux-arm64-musl": { - "version": "14.2.3", - "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-14.2.3.tgz", - "integrity": "sha512-0D4/oMM2Y9Ta3nGuCcQN8jjJjmDPYpHX9OJzqk42NZGJocU2MqhBq5tWkJrUQOQY9N+In9xOdymzapM09GeiZw==", + "node_modules/@img/sharp-libvips-linux-arm64": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm64/-/sharp-libvips-linux-arm64-1.0.4.tgz", + "integrity": "sha512-9B+taZ8DlyyqzZQnoeIvDVR/2F4EbMepXMc/NdVbkzsJbzkUjhXv/70GQJ7tdLA4YJgNP25zukcxpX2/SueNrA==", "cpu": [ "arm64" ], @@ -1016,14 +1076,29 @@ "os": [ "linux" ], - "engines": { - "node": ">= 10" + "funding": { + "url": "https://opencollective.com/libvips" } }, - "node_modules/@next/swc-linux-x64-gnu": { - "version": "14.2.3", - "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-14.2.3.tgz", - "integrity": "sha512-ENPiNnBNDInBLyUU5ii8PMQh+4XLr4pG51tOp6aJ9xqFQ2iRI6IH0Ds2yJkAzNV1CfyagcyzPfROMViS2wOZ9w==", + "node_modules/@img/sharp-libvips-linux-s390x": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-s390x/-/sharp-libvips-linux-s390x-1.0.4.tgz", + "integrity": "sha512-u7Wz6ntiSSgGSGcjZ55im6uvTrOxSIS8/dgoVMoiGE9I6JAfU50yH5BoDlYA1tcuGS7g/QNtetJnxA6QEsCVTA==", + "cpu": [ + "s390x" + ], + "optional": true, + "os": [ + "linux" + ], + "funding": { + "url": "https://opencollective.com/libvips" + } + }, + "node_modules/@img/sharp-libvips-linux-x64": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-x64/-/sharp-libvips-linux-x64-1.0.4.tgz", + "integrity": "sha512-MmWmQ3iPFZr0Iev+BAgVMb3ZyC4KeFc3jFxnNbEPas60e1cIfevbtuyf9nDGIzOaW9PdnDciJm+wFFaTlj5xYw==", "cpu": [ "x64" ], @@ -1031,14 +1106,29 @@ "os": [ "linux" ], - "engines": { - "node": ">= 10" + "funding": { + "url": "https://opencollective.com/libvips" } }, - "node_modules/@next/swc-linux-x64-musl": { - "version": "14.2.3", - "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-14.2.3.tgz", - "integrity": "sha512-BTAbq0LnCbF5MtoM7I/9UeUu/8ZBY0i8SFjUMCbPDOLv+un67e2JgyN4pmgfXBwy/I+RHu8q+k+MCkDN6P9ViQ==", + "node_modules/@img/sharp-libvips-linuxmusl-arm64": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-arm64/-/sharp-libvips-linuxmusl-arm64-1.0.4.tgz", + "integrity": "sha512-9Ti+BbTYDcsbp4wfYib8Ctm1ilkugkA/uscUn6UXK1ldpC1JjiXbLfFZtRlBhjPZ5o1NCLiDbg8fhUPKStHoTA==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "linux" + ], + "funding": { + "url": "https://opencollective.com/libvips" + } + }, + "node_modules/@img/sharp-libvips-linuxmusl-x64": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-x64/-/sharp-libvips-linuxmusl-x64-1.0.4.tgz", + "integrity": "sha512-viYN1KX9m+/hGkJtvYYp+CCLgnJXwiQB39damAO7WMdKWlIhmYTfHjwSbQeUK/20vY154mwezd9HflVFM1wVSw==", "cpu": [ "x64" ], @@ -1046,149 +1136,1638 @@ "os": [ "linux" ], + "funding": { + "url": "https://opencollective.com/libvips" + } + }, + "node_modules/@img/sharp-linux-arm": { + "version": "0.33.5", + "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm/-/sharp-linux-arm-0.33.5.tgz", + "integrity": "sha512-JTS1eldqZbJxjvKaAkxhZmBqPRGmxgu+qFKSInv8moZ2AmT5Yib3EQ1c6gp493HvrvV8QgdOXdyaIBrhvFhBMQ==", + "cpu": [ + "arm" + ], + "optional": true, + "os": [ + "linux" + ], "engines": { - "node": ">= 10" + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" + }, + "optionalDependencies": { + "@img/sharp-libvips-linux-arm": "1.0.5" } }, - "node_modules/@next/swc-win32-arm64-msvc": { - "version": "14.2.3", - "resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-14.2.3.tgz", - "integrity": "sha512-AEHIw/dhAMLNFJFJIJIyOFDzrzI5bAjI9J26gbO5xhAKHYTZ9Or04BesFPXiAYXDNdrwTP2dQceYA4dL1geu8A==", + "node_modules/@img/sharp-linux-arm64": { + "version": "0.33.5", + "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm64/-/sharp-linux-arm64-0.33.5.tgz", + "integrity": "sha512-JMVv+AMRyGOHtO1RFBiJy/MBsgz0x4AWrT6QoEVVTyh1E39TrCUpTRI7mx9VksGX4awWASxqCYLCV4wBZHAYxA==", "cpu": [ "arm64" ], "optional": true, "os": [ - "win32" + "linux" ], "engines": { - "node": ">= 10" + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" + }, + "optionalDependencies": { + "@img/sharp-libvips-linux-arm64": "1.0.4" } }, - "node_modules/@next/swc-win32-ia32-msvc": { - "version": "14.2.3", - "resolved": "https://registry.npmjs.org/@next/swc-win32-ia32-msvc/-/swc-win32-ia32-msvc-14.2.3.tgz", - "integrity": "sha512-vga40n1q6aYb0CLrM+eEmisfKCR45ixQYXuBXxOOmmoV8sYST9k7E3US32FsY+CkkF7NtzdcebiFT4CHuMSyZw==", + "node_modules/@img/sharp-linux-s390x": { + "version": "0.33.5", + "resolved": "https://registry.npmjs.org/@img/sharp-linux-s390x/-/sharp-linux-s390x-0.33.5.tgz", + "integrity": "sha512-y/5PCd+mP4CA/sPDKl2961b+C9d+vPAveS33s6Z3zfASk2j5upL6fXVPZi7ztePZ5CuH+1kW8JtvxgbuXHRa4Q==", "cpu": [ - "ia32" + "s390x" ], "optional": true, "os": [ - "win32" + "linux" ], "engines": { - "node": ">= 10" + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" + }, + "optionalDependencies": { + "@img/sharp-libvips-linux-s390x": "1.0.4" } }, - "node_modules/@next/swc-win32-x64-msvc": { - "version": "14.2.3", - "resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-14.2.3.tgz", - "integrity": "sha512-Q1/zm43RWynxrO7lW4ehciQVj+5ePBhOK+/K2P7pLFX3JaJ/IZVC69SHidrmZSOkqz7ECIOhhy7XhAFG4JYyHA==", + "node_modules/@img/sharp-linux-x64": { + "version": "0.33.5", + "resolved": "https://registry.npmjs.org/@img/sharp-linux-x64/-/sharp-linux-x64-0.33.5.tgz", + "integrity": "sha512-opC+Ok5pRNAzuvq1AG0ar+1owsu842/Ab+4qvU879ippJBHvyY5n2mxF1izXqkPYlGuP/M556uh53jRLJmzTWA==", "cpu": [ "x64" ], "optional": true, "os": [ - "win32" + "linux" ], "engines": { - "node": ">= 10" + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" + }, + "optionalDependencies": { + "@img/sharp-libvips-linux-x64": "1.0.4" } }, - "node_modules/@nodelib/fs.scandir": { - "version": "2.1.5", - "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", - "integrity": "sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==", - "dependencies": { - "@nodelib/fs.stat": "2.0.5", - "run-parallel": "^1.1.9" - }, + "node_modules/@img/sharp-linuxmusl-arm64": { + "version": "0.33.5", + "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-arm64/-/sharp-linuxmusl-arm64-0.33.5.tgz", + "integrity": "sha512-XrHMZwGQGvJg2V/oRSUfSAfjfPxO+4DkiRh6p2AFjLQztWUuY/o8Mq0eMQVIY7HJ1CDQUJlxGGZRw1a5bqmd1g==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "linux" + ], "engines": { - "node": ">= 8" + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" + }, + "optionalDependencies": { + "@img/sharp-libvips-linuxmusl-arm64": "1.0.4" } }, - "node_modules/@nodelib/fs.stat": { - "version": "2.0.5", - "resolved": "https://registry.npmjs.org/@nodelib/fs.stat/-/fs.stat-2.0.5.tgz", - "integrity": "sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A==", + "node_modules/@img/sharp-linuxmusl-x64": { + "version": "0.33.5", + "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-x64/-/sharp-linuxmusl-x64-0.33.5.tgz", + "integrity": "sha512-WT+d/cgqKkkKySYmqoZ8y3pxx7lx9vVejxW/W4DOFMYVSkErR+w7mf2u8m/y4+xHe7yY9DAXQMWQhpnMuFfScw==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "linux" + ], "engines": { - "node": ">= 8" + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" + }, + "optionalDependencies": { + "@img/sharp-libvips-linuxmusl-x64": "1.0.4" } }, - "node_modules/@nodelib/fs.walk": { - "version": "1.2.8", - "resolved": "https://registry.npmjs.org/@nodelib/fs.walk/-/fs.walk-1.2.8.tgz", - "integrity": "sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==", + "node_modules/@img/sharp-wasm32": { + "version": "0.33.5", + "resolved": "https://registry.npmjs.org/@img/sharp-wasm32/-/sharp-wasm32-0.33.5.tgz", + "integrity": "sha512-ykUW4LVGaMcU9lu9thv85CbRMAwfeadCJHRsg2GmeRa/cJxsVY9Rbd57JcMxBkKHag5U/x7TSBpScF4U8ElVzg==", + "cpu": [ + "wasm32" + ], + "optional": true, "dependencies": { - "@nodelib/fs.scandir": "2.1.5", - "fastq": "^1.6.0" + "@emnapi/runtime": "^1.2.0" }, "engines": { - "node": ">= 8" + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" } }, - "node_modules/@phosphor-icons/react": { - "version": "2.1.5", - "resolved": "https://registry.npmjs.org/@phosphor-icons/react/-/react-2.1.5.tgz", - "integrity": "sha512-B7vRm/w+P/+eavWZP5CB5Ul0ffK4Y7fpd/auWKuGvm+8pVgAJzbOK8O0s+DqzR+TwWkh5pHtJTuoAtaSvgCPzg==", + "node_modules/@img/sharp-win32-ia32": { + "version": "0.33.5", + "resolved": "https://registry.npmjs.org/@img/sharp-win32-ia32/-/sharp-win32-ia32-0.33.5.tgz", + "integrity": "sha512-T36PblLaTwuVJ/zw/LaH0PdZkRz5rd3SmMHX8GSmR7vtNSP5Z6bQkExdSK7xGWyxLw4sUknBuugTelgw2faBbQ==", + "cpu": [ + "ia32" + ], + "optional": true, + "os": [ + "win32" + ], "engines": { - "node": ">=10" + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" }, - "peerDependencies": { - "react": ">= 16.8", - "react-dom": ">= 16.8" + "funding": { + "url": "https://opencollective.com/libvips" } }, - "node_modules/@pkgjs/parseargs": { - "version": "0.11.0", - "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz", - "integrity": "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==", + "node_modules/@img/sharp-win32-x64": { + "version": "0.33.5", + "resolved": "https://registry.npmjs.org/@img/sharp-win32-x64/-/sharp-win32-x64-0.33.5.tgz", + "integrity": "sha512-MpY/o8/8kj+EcnxwvrP4aTJSWw/aZ7JIGR4aBeZkZw5B7/Jn+tY9/VNwtcoGmdT7GfggGIU4kygOMSbYnOrAbg==", + "cpu": [ + "x64" + ], "optional": true, + "os": [ + "win32" + ], "engines": { - "node": ">=14" + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" + }, + "funding": { + "url": "https://opencollective.com/libvips" } }, - "node_modules/@radix-ui/primitive": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.0.1.tgz", - "integrity": "sha512-yQ8oGX2GVsEYMWGxcovu1uGWPCxV5BFfeeYxqPmuAzUyLT9qmaMXSAhXpb0WrspIeqYzdJpkh2vHModJPgRIaw==", + "node_modules/@isaacs/cliui": { + "version": "8.0.2", + "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz", + "integrity": "sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==", "dependencies": { - "@babel/runtime": "^7.13.10" + "string-width": "^5.1.2", + "string-width-cjs": "npm:string-width@^4.2.0", + "strip-ansi": "^7.0.1", + "strip-ansi-cjs": "npm:strip-ansi@^6.0.1", + "wrap-ansi": "^8.1.0", + "wrap-ansi-cjs": "npm:wrap-ansi@^7.0.0" + }, + "engines": { + "node": ">=12" } }, - "node_modules/@radix-ui/react-arrow": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.0.3.tgz", - "integrity": "sha512-wSP+pHsB/jQRaL6voubsQ/ZlrGBHHrOjmBnr19hxYgtS0WvAFwZhK2WP/YY5yF9uKECCEEDGxuLxq1NBK51wFA==", - "dependencies": { - "@babel/runtime": "^7.13.10", - "@radix-ui/react-primitive": "1.0.3" - }, - "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0", - "react-dom": "^16.8 || ^17.0 || ^18.0" + "node_modules/@isaacs/cliui/node_modules/ansi-regex": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.0.1.tgz", + "integrity": "sha512-n5M855fKb2SsfMIiFFoVrABHJC8QtHwVx+mHWP3QcEqBHYienj5dHSgjbxtC0WEZXYt4wcD6zrQElDPhFuZgfA==", + "engines": { + "node": ">=12" }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - }, - "@types/react-dom": { - "optional": true - } + "funding": { + "url": "https://github.com/chalk/ansi-regex?sponsor=1" } }, - "node_modules/@radix-ui/react-compose-refs": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-compose-refs/-/react-compose-refs-1.0.1.tgz", - "integrity": "sha512-fDSBgd44FKHa1FRMU59qBMPFcl2PZE+2nmqunj+BWFyYYjnhIDWL2ItDs3rrbJDQOtzt5nIebLCQc4QRfz6LJw==", + "node_modules/@isaacs/cliui/node_modules/strip-ansi": { + "version": "7.1.0", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz", + "integrity": "sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==", + "dependencies": { + "ansi-regex": "^6.0.1" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://github.com/chalk/strip-ansi?sponsor=1" + } + }, + "node_modules/@jridgewell/gen-mapping": { + "version": "0.3.5", + "resolved": "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.3.5.tgz", + "integrity": "sha512-IzL8ZoEDIBRWEzlCcRhOaCupYyN5gdIK+Q6fbFdPDg6HqX6jpkItn7DFIpW9LQzXG6Df9sA7+OKnq0qlz/GaQg==", + "dependencies": { + "@jridgewell/set-array": "^1.2.1", + "@jridgewell/sourcemap-codec": "^1.4.10", + "@jridgewell/trace-mapping": "^0.3.24" + }, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/resolve-uri": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz", + "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==", + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/set-array": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/@jridgewell/set-array/-/set-array-1.2.1.tgz", + "integrity": "sha512-R8gLRTZeyp03ymzP/6Lil/28tGeGEzhx1q2k703KGWRAI1VdvPIXdG70VJc2pAMw3NA6JKL5hhFu1sJX0Mnn/A==", + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/source-map": { + "version": "0.3.6", + "resolved": "https://registry.npmjs.org/@jridgewell/source-map/-/source-map-0.3.6.tgz", + "integrity": "sha512-1ZJTZebgqllO79ue2bm3rIGud/bOe0pP5BjSRCRxxYkEZS8STV7zN84UBbiYu7jy+eCKSnVIUgoWWE/tt+shMQ==", + "peer": true, + "dependencies": { + "@jridgewell/gen-mapping": "^0.3.5", + "@jridgewell/trace-mapping": "^0.3.25" + } + }, + "node_modules/@jridgewell/sourcemap-codec": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.0.tgz", + "integrity": "sha512-gv3ZRaISU3fjPAgNsriBRqGWQL6quFx04YMPW/zD8XMLsU32mhCCbfbO6KZFLjvYpCZ8zyDEgqsgf+PwPaM7GQ==" + }, + "node_modules/@jridgewell/trace-mapping": { + "version": "0.3.25", + "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.25.tgz", + "integrity": "sha512-vNk6aEwybGtawWmy/PzwnGDOjCkLWSD2wqvjGGAgOAwCGWySYXfYoxt00IJkTF+8Lb57DwOb3Aa0o9CApepiYQ==", + "dependencies": { + "@jridgewell/resolve-uri": "^3.1.0", + "@jridgewell/sourcemap-codec": "^1.4.14" + } + }, + "node_modules/@next/env": { + "version": "15.0.2", + "resolved": "https://registry.npmjs.org/@next/env/-/env-15.0.2.tgz", + "integrity": "sha512-c0Zr0ModK5OX7D4ZV8Jt/wqoXtitLNPwUfG9zElCZztdaZyNVnN40rDXVZ/+FGuR4CcNV5AEfM6N8f+Ener7Dg==" + }, + "node_modules/@next/eslint-plugin-next": { + "version": "14.2.3", + "resolved": "https://registry.npmjs.org/@next/eslint-plugin-next/-/eslint-plugin-next-14.2.3.tgz", + "integrity": "sha512-L3oDricIIjgj1AVnRdRor21gI7mShlSwU/1ZGHmqM3LzHhXXhdkrfeNY5zif25Bi5Dd7fiJHsbhoZCHfXYvlAw==", + "dev": true, + "dependencies": { + "glob": "10.3.10" + } + }, + "node_modules/@next/swc-darwin-arm64": { + "version": "15.0.2", + "resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-15.0.2.tgz", + "integrity": "sha512-GK+8w88z+AFlmt+ondytZo2xpwlfAR8U6CRwXancHImh6EdGfHMIrTSCcx5sOSBei00GyLVL0ioo1JLKTfprgg==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@next/swc-darwin-x64": { + "version": "15.0.2", + "resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-15.0.2.tgz", + "integrity": "sha512-KUpBVxIbjzFiUZhiLIpJiBoelqzQtVZbdNNsehhUn36e2YzKHphnK8eTUW1s/4aPy5kH/UTid8IuVbaOpedhpw==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@next/swc-linux-arm64-gnu": { + "version": "15.0.2", + "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-15.0.2.tgz", + "integrity": "sha512-9J7TPEcHNAZvwxXRzOtiUvwtTD+fmuY0l7RErf8Yyc7kMpE47MIQakl+3jecmkhOoIyi/Rp+ddq7j4wG6JDskQ==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@next/swc-linux-arm64-musl": { + "version": "15.0.2", + "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-15.0.2.tgz", + "integrity": "sha512-BjH4ZSzJIoTTZRh6rG+a/Ry4SW0HlizcPorqNBixBWc3wtQtj4Sn9FnRZe22QqrPnzoaW0ctvSz4FaH4eGKMww==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@next/swc-linux-x64-gnu": { + "version": "15.0.2", + "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-15.0.2.tgz", + "integrity": "sha512-i3U2TcHgo26sIhcwX/Rshz6avM6nizrZPvrDVDY1bXcLH1ndjbO8zuC7RoHp0NSK7wjJMPYzm7NYL1ksSKFreA==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@next/swc-linux-x64-musl": { + "version": "15.0.2", + "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-15.0.2.tgz", + "integrity": "sha512-AMfZfSVOIR8fa+TXlAooByEF4OB00wqnms1sJ1v+iu8ivwvtPvnkwdzzFMpsK5jA2S9oNeeQ04egIWVb4QWmtQ==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@next/swc-win32-arm64-msvc": { + "version": "15.0.2", + "resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-15.0.2.tgz", + "integrity": "sha512-JkXysDT0/hEY47O+Hvs8PbZAeiCQVxKfGtr4GUpNAhlG2E0Mkjibuo8ryGD29Qb5a3IOnKYNoZlh/MyKd2Nbww==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@next/swc-win32-x64-msvc": { + "version": "15.0.2", + "resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-15.0.2.tgz", + "integrity": "sha512-foaUL0NqJY/dX0Pi/UcZm5zsmSk5MtP/gxx3xOPyREkMFN+CTjctPfu3QaqrQHinaKdPnMWPJDKt4VjDfTBe/Q==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@nodelib/fs.scandir": { + "version": "2.1.5", + "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", + "integrity": "sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==", + "dependencies": { + "@nodelib/fs.stat": "2.0.5", + "run-parallel": "^1.1.9" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/@nodelib/fs.stat": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/@nodelib/fs.stat/-/fs.stat-2.0.5.tgz", + "integrity": "sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A==", + "engines": { + "node": ">= 8" + } + }, + "node_modules/@nodelib/fs.walk": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/@nodelib/fs.walk/-/fs.walk-1.2.8.tgz", + "integrity": "sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==", + "dependencies": { + "@nodelib/fs.scandir": "2.1.5", + "fastq": "^1.6.0" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/@opentelemetry/api": { + "version": "1.9.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/api/-/api-1.9.0.tgz", + "integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==", + "engines": { + "node": ">=8.0.0" + } + }, + "node_modules/@opentelemetry/api-logs": { + "version": "0.53.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/api-logs/-/api-logs-0.53.0.tgz", + "integrity": "sha512-8HArjKx+RaAI8uEIgcORbZIPklyh1YLjPSBus8hjRmvLi6DeFzgOcdZ7KwPabKj8mXF8dX0hyfAyGfycz0DbFw==", + "dependencies": { + "@opentelemetry/api": "^1.0.0" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/@opentelemetry/context-async-hooks": { + "version": "1.26.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/context-async-hooks/-/context-async-hooks-1.26.0.tgz", + "integrity": "sha512-HedpXXYzzbaoutw6DFLWLDket2FwLkLpil4hGCZ1xYEIMTcivdfwEOISgdbLEWyG3HW52gTq2V9mOVJrONgiwg==", + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "@opentelemetry/api": ">=1.0.0 <1.10.0" + } + }, + "node_modules/@opentelemetry/core": { + "version": "1.26.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/core/-/core-1.26.0.tgz", + "integrity": "sha512-1iKxXXE8415Cdv0yjG3G6hQnB5eVEsJce3QaawX8SjDn0mAS0ZM8fAbZZJD4ajvhC15cePvosSCut404KrIIvQ==", + "dependencies": { + "@opentelemetry/semantic-conventions": "1.27.0" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "@opentelemetry/api": ">=1.0.0 <1.10.0" + } + }, + "node_modules/@opentelemetry/instrumentation": { + "version": "0.53.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/instrumentation/-/instrumentation-0.53.0.tgz", + "integrity": "sha512-DMwg0hy4wzf7K73JJtl95m/e0boSoWhH07rfvHvYzQtBD3Bmv0Wc1x733vyZBqmFm8OjJD0/pfiUg1W3JjFX0A==", + "dependencies": { + "@opentelemetry/api-logs": "0.53.0", + "@types/shimmer": "^1.2.0", + "import-in-the-middle": "^1.8.1", + "require-in-the-middle": "^7.1.1", + "semver": "^7.5.2", + "shimmer": "^1.2.1" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "@opentelemetry/api": "^1.3.0" + } + }, + "node_modules/@opentelemetry/instrumentation-amqplib": { + "version": "0.42.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/instrumentation-amqplib/-/instrumentation-amqplib-0.42.0.tgz", + "integrity": "sha512-fiuU6OKsqHJiydHWgTRQ7MnIrJ2lEqsdgFtNIH4LbAUJl/5XmrIeoDzDnox+hfkgWK65jsleFuQDtYb5hW1koQ==", + "dependencies": { + "@opentelemetry/core": "^1.8.0", + "@opentelemetry/instrumentation": "^0.53.0", + "@opentelemetry/semantic-conventions": "^1.27.0" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "@opentelemetry/api": "^1.3.0" + } + }, + "node_modules/@opentelemetry/instrumentation-connect": { + "version": "0.39.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/instrumentation-connect/-/instrumentation-connect-0.39.0.tgz", + "integrity": "sha512-pGBiKevLq7NNglMgqzmeKczF4XQMTOUOTkK8afRHMZMnrK3fcETyTH7lVaSozwiOM3Ws+SuEmXZT7DYrrhxGlg==", + "dependencies": { + "@opentelemetry/core": "^1.8.0", + "@opentelemetry/instrumentation": "^0.53.0", + "@opentelemetry/semantic-conventions": "^1.27.0", + "@types/connect": "3.4.36" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "@opentelemetry/api": "^1.3.0" + } + }, + "node_modules/@opentelemetry/instrumentation-dataloader": { + "version": "0.12.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/instrumentation-dataloader/-/instrumentation-dataloader-0.12.0.tgz", + "integrity": "sha512-pnPxatoFE0OXIZDQhL2okF//dmbiWFzcSc8pUg9TqofCLYZySSxDCgQc69CJBo5JnI3Gz1KP+mOjS4WAeRIH4g==", + "dependencies": { + "@opentelemetry/instrumentation": "^0.53.0" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "@opentelemetry/api": "^1.3.0" + } + }, + "node_modules/@opentelemetry/instrumentation-express": { + "version": "0.42.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/instrumentation-express/-/instrumentation-express-0.42.0.tgz", + "integrity": "sha512-YNcy7ZfGnLsVEqGXQPT+S0G1AE46N21ORY7i7yUQyfhGAL4RBjnZUqefMI0NwqIl6nGbr1IpF0rZGoN8Q7x12Q==", + "dependencies": { + "@opentelemetry/core": "^1.8.0", + "@opentelemetry/instrumentation": "^0.53.0", + "@opentelemetry/semantic-conventions": "^1.27.0" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "@opentelemetry/api": "^1.3.0" + } + }, + "node_modules/@opentelemetry/instrumentation-fastify": { + "version": "0.39.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/instrumentation-fastify/-/instrumentation-fastify-0.39.0.tgz", + "integrity": "sha512-SS9uSlKcsWZabhBp2szErkeuuBDgxOUlllwkS92dVaWRnMmwysPhcEgHKB8rUe3BHg/GnZC1eo1hbTZv4YhfoA==", + "dependencies": { + "@opentelemetry/core": "^1.8.0", + "@opentelemetry/instrumentation": "^0.53.0", + "@opentelemetry/semantic-conventions": "^1.27.0" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "@opentelemetry/api": "^1.3.0" + } + }, + "node_modules/@opentelemetry/instrumentation-fs": { + "version": "0.15.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/instrumentation-fs/-/instrumentation-fs-0.15.0.tgz", + "integrity": "sha512-JWVKdNLpu1skqZQA//jKOcKdJC66TWKqa2FUFq70rKohvaSq47pmXlnabNO+B/BvLfmidfiaN35XakT5RyMl2Q==", + "dependencies": { + "@opentelemetry/core": "^1.8.0", + "@opentelemetry/instrumentation": "^0.53.0" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "@opentelemetry/api": "^1.3.0" + } + }, + "node_modules/@opentelemetry/instrumentation-generic-pool": { + "version": "0.39.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/instrumentation-generic-pool/-/instrumentation-generic-pool-0.39.0.tgz", + "integrity": "sha512-y4v8Y+tSfRB3NNBvHjbjrn7rX/7sdARG7FuK6zR8PGb28CTa0kHpEGCJqvL9L8xkTNvTXo+lM36ajFGUaK1aNw==", + "dependencies": { + "@opentelemetry/instrumentation": "^0.53.0" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "@opentelemetry/api": "^1.3.0" + } + }, + "node_modules/@opentelemetry/instrumentation-graphql": { + "version": "0.43.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/instrumentation-graphql/-/instrumentation-graphql-0.43.0.tgz", + "integrity": "sha512-aI3YMmC2McGd8KW5du1a2gBA0iOMOGLqg4s9YjzwbjFwjlmMNFSK1P3AIg374GWg823RPUGfVTIgZ/juk9CVOA==", + "dependencies": { + "@opentelemetry/instrumentation": "^0.53.0" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "@opentelemetry/api": "^1.3.0" + } + }, + "node_modules/@opentelemetry/instrumentation-hapi": { + "version": "0.41.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/instrumentation-hapi/-/instrumentation-hapi-0.41.0.tgz", + "integrity": "sha512-jKDrxPNXDByPlYcMdZjNPYCvw0SQJjN+B1A+QH+sx+sAHsKSAf9hwFiJSrI6C4XdOls43V/f/fkp9ITkHhKFbQ==", + "dependencies": { + "@opentelemetry/core": "^1.8.0", + "@opentelemetry/instrumentation": "^0.53.0", + "@opentelemetry/semantic-conventions": "^1.27.0" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "@opentelemetry/api": "^1.3.0" + } + }, + "node_modules/@opentelemetry/instrumentation-http": { + "version": "0.53.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/instrumentation-http/-/instrumentation-http-0.53.0.tgz", + "integrity": "sha512-H74ErMeDuZfj7KgYCTOFGWF5W9AfaPnqLQQxeFq85+D29wwV2yqHbz2IKLYpkOh7EI6QwDEl7rZCIxjJLyc/CQ==", + "dependencies": { + "@opentelemetry/core": "1.26.0", + "@opentelemetry/instrumentation": "0.53.0", + "@opentelemetry/semantic-conventions": "1.27.0", + "semver": "^7.5.2" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "@opentelemetry/api": "^1.3.0" + } + }, + "node_modules/@opentelemetry/instrumentation-ioredis": { + "version": "0.43.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/instrumentation-ioredis/-/instrumentation-ioredis-0.43.0.tgz", + "integrity": "sha512-i3Dke/LdhZbiUAEImmRG3i7Dimm/BD7t8pDDzwepSvIQ6s2X6FPia7561gw+64w+nx0+G9X14D7rEfaMEmmjig==", + "dependencies": { + "@opentelemetry/instrumentation": "^0.53.0", + "@opentelemetry/redis-common": "^0.36.2", + "@opentelemetry/semantic-conventions": "^1.27.0" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "@opentelemetry/api": "^1.3.0" + } + }, + "node_modules/@opentelemetry/instrumentation-kafkajs": { + "version": "0.3.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/instrumentation-kafkajs/-/instrumentation-kafkajs-0.3.0.tgz", + "integrity": "sha512-UnkZueYK1ise8FXQeKlpBd7YYUtC7mM8J0wzUSccEfc/G8UqHQqAzIyYCUOUPUKp8GsjLnWOOK/3hJc4owb7Jg==", + "dependencies": { + "@opentelemetry/instrumentation": "^0.53.0", + "@opentelemetry/semantic-conventions": "^1.27.0" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "@opentelemetry/api": "^1.3.0" + } + }, + "node_modules/@opentelemetry/instrumentation-koa": { + "version": "0.43.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/instrumentation-koa/-/instrumentation-koa-0.43.0.tgz", + "integrity": "sha512-lDAhSnmoTIN6ELKmLJBplXzT/Jqs5jGZehuG22EdSMaTwgjMpxMDI1YtlKEhiWPWkrz5LUsd0aOO0ZRc9vn3AQ==", + "dependencies": { + "@opentelemetry/core": "^1.8.0", + "@opentelemetry/instrumentation": "^0.53.0", + "@opentelemetry/semantic-conventions": "^1.27.0" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "@opentelemetry/api": "^1.3.0" + } + }, + "node_modules/@opentelemetry/instrumentation-lru-memoizer": { + "version": "0.40.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/instrumentation-lru-memoizer/-/instrumentation-lru-memoizer-0.40.0.tgz", + "integrity": "sha512-21xRwZsEdMPnROu/QsaOIODmzw59IYpGFmuC4aFWvMj6stA8+Ei1tX67nkarJttlNjoM94um0N4X26AD7ff54A==", + "dependencies": { + "@opentelemetry/instrumentation": "^0.53.0" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "@opentelemetry/api": "^1.3.0" + } + }, + "node_modules/@opentelemetry/instrumentation-mongodb": { + "version": "0.47.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/instrumentation-mongodb/-/instrumentation-mongodb-0.47.0.tgz", + "integrity": "sha512-yqyXRx2SulEURjgOQyJzhCECSh5i1uM49NUaq9TqLd6fA7g26OahyJfsr9NE38HFqGRHpi4loyrnfYGdrsoVjQ==", + "dependencies": { + "@opentelemetry/instrumentation": "^0.53.0", + "@opentelemetry/sdk-metrics": "^1.9.1", + "@opentelemetry/semantic-conventions": "^1.27.0" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "@opentelemetry/api": "^1.3.0" + } + }, + "node_modules/@opentelemetry/instrumentation-mongoose": { + "version": "0.42.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/instrumentation-mongoose/-/instrumentation-mongoose-0.42.0.tgz", + "integrity": "sha512-AnWv+RaR86uG3qNEMwt3plKX1ueRM7AspfszJYVkvkehiicC3bHQA6vWdb6Zvy5HAE14RyFbu9+2hUUjR2NSyg==", + "dependencies": { + "@opentelemetry/core": "^1.8.0", + "@opentelemetry/instrumentation": "^0.53.0", + "@opentelemetry/semantic-conventions": "^1.27.0" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "@opentelemetry/api": "^1.3.0" + } + }, + "node_modules/@opentelemetry/instrumentation-mysql": { + "version": "0.41.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/instrumentation-mysql/-/instrumentation-mysql-0.41.0.tgz", + "integrity": "sha512-jnvrV6BsQWyHS2qb2fkfbfSb1R/lmYwqEZITwufuRl37apTopswu9izc0b1CYRp/34tUG/4k/V39PND6eyiNvw==", + "dependencies": { + "@opentelemetry/instrumentation": "^0.53.0", + "@opentelemetry/semantic-conventions": "^1.27.0", + "@types/mysql": "2.15.26" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "@opentelemetry/api": "^1.3.0" + } + }, + "node_modules/@opentelemetry/instrumentation-mysql2": { + "version": "0.41.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/instrumentation-mysql2/-/instrumentation-mysql2-0.41.0.tgz", + "integrity": "sha512-REQB0x+IzVTpoNgVmy5b+UnH1/mDByrneimP6sbDHkp1j8QOl1HyWOrBH/6YWR0nrbU3l825Em5PlybjT3232g==", + "dependencies": { + "@opentelemetry/instrumentation": "^0.53.0", + "@opentelemetry/semantic-conventions": "^1.27.0", + "@opentelemetry/sql-common": "^0.40.1" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "@opentelemetry/api": "^1.3.0" + } + }, + "node_modules/@opentelemetry/instrumentation-nestjs-core": { + "version": "0.40.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/instrumentation-nestjs-core/-/instrumentation-nestjs-core-0.40.0.tgz", + "integrity": "sha512-WF1hCUed07vKmf5BzEkL0wSPinqJgH7kGzOjjMAiTGacofNXjb/y4KQ8loj2sNsh5C/NN7s1zxQuCgbWbVTGKg==", + "dependencies": { + "@opentelemetry/instrumentation": "^0.53.0", + "@opentelemetry/semantic-conventions": "^1.27.0" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "@opentelemetry/api": "^1.3.0" + } + }, + "node_modules/@opentelemetry/instrumentation-pg": { + "version": "0.44.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/instrumentation-pg/-/instrumentation-pg-0.44.0.tgz", + "integrity": "sha512-oTWVyzKqXud1BYEGX1loo2o4k4vaU1elr3vPO8NZolrBtFvQ34nx4HgUaexUDuEog00qQt+MLR5gws/p+JXMLQ==", + "dependencies": { + "@opentelemetry/instrumentation": "^0.53.0", + "@opentelemetry/semantic-conventions": "^1.27.0", + "@opentelemetry/sql-common": "^0.40.1", + "@types/pg": "8.6.1", + "@types/pg-pool": "2.0.6" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "@opentelemetry/api": "^1.3.0" + } + }, + "node_modules/@opentelemetry/instrumentation-redis-4": { + "version": "0.42.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/instrumentation-redis-4/-/instrumentation-redis-4-0.42.0.tgz", + "integrity": "sha512-NaD+t2JNcOzX/Qa7kMy68JbmoVIV37fT/fJYzLKu2Wwd+0NCxt+K2OOsOakA8GVg8lSpFdbx4V/suzZZ2Pvdjg==", + "dependencies": { + "@opentelemetry/instrumentation": "^0.53.0", + "@opentelemetry/redis-common": "^0.36.2", + "@opentelemetry/semantic-conventions": "^1.27.0" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "@opentelemetry/api": "^1.3.0" + } + }, + "node_modules/@opentelemetry/instrumentation-undici": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/instrumentation-undici/-/instrumentation-undici-0.6.0.tgz", + "integrity": "sha512-ABJBhm5OdhGmbh0S/fOTE4N69IZ00CsHC5ijMYfzbw3E5NwLgpQk5xsljaECrJ8wz1SfXbO03FiSuu5AyRAkvQ==", + "dependencies": { + "@opentelemetry/core": "^1.8.0", + "@opentelemetry/instrumentation": "^0.53.0" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "@opentelemetry/api": "^1.7.0" + } + }, + "node_modules/@opentelemetry/redis-common": { + "version": "0.36.2", + "resolved": "https://registry.npmjs.org/@opentelemetry/redis-common/-/redis-common-0.36.2.tgz", + "integrity": "sha512-faYX1N0gpLhej/6nyp6bgRjzAKXn5GOEMYY7YhciSfCoITAktLUtQ36d24QEWNA1/WA1y6qQunCe0OhHRkVl9g==", + "engines": { + "node": ">=14" + } + }, + "node_modules/@opentelemetry/resources": { + "version": "1.26.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/resources/-/resources-1.26.0.tgz", + "integrity": "sha512-CPNYchBE7MBecCSVy0HKpUISEeJOniWqcHaAHpmasZ3j9o6V3AyBzhRc90jdmemq0HOxDr6ylhUbDhBqqPpeNw==", + "dependencies": { + "@opentelemetry/core": "1.26.0", + "@opentelemetry/semantic-conventions": "1.27.0" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "@opentelemetry/api": ">=1.0.0 <1.10.0" + } + }, + "node_modules/@opentelemetry/sdk-metrics": { + "version": "1.26.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/sdk-metrics/-/sdk-metrics-1.26.0.tgz", + "integrity": "sha512-0SvDXmou/JjzSDOjUmetAAvcKQW6ZrvosU0rkbDGpXvvZN+pQF6JbK/Kd4hNdK4q/22yeruqvukXEJyySTzyTQ==", + "dependencies": { + "@opentelemetry/core": "1.26.0", + "@opentelemetry/resources": "1.26.0" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "@opentelemetry/api": ">=1.3.0 <1.10.0" + } + }, + "node_modules/@opentelemetry/sdk-trace-base": { + "version": "1.26.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/sdk-trace-base/-/sdk-trace-base-1.26.0.tgz", + "integrity": "sha512-olWQldtvbK4v22ymrKLbIcBi9L2SpMO84sCPY54IVsJhP9fRsxJT194C/AVaAuJzLE30EdhhM1VmvVYR7az+cw==", + "dependencies": { + "@opentelemetry/core": "1.26.0", + "@opentelemetry/resources": "1.26.0", + "@opentelemetry/semantic-conventions": "1.27.0" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "@opentelemetry/api": ">=1.0.0 <1.10.0" + } + }, + "node_modules/@opentelemetry/semantic-conventions": { + "version": "1.27.0", + "resolved": "https://registry.npmjs.org/@opentelemetry/semantic-conventions/-/semantic-conventions-1.27.0.tgz", + "integrity": "sha512-sAay1RrB+ONOem0OZanAR1ZI/k7yDpnOQSQmTMuGImUQb2y8EbSaCJ94FQluM74xoU03vlb2d2U90hZluL6nQg==", + "engines": { + "node": ">=14" + } + }, + "node_modules/@opentelemetry/sql-common": { + "version": "0.40.1", + "resolved": "https://registry.npmjs.org/@opentelemetry/sql-common/-/sql-common-0.40.1.tgz", + "integrity": "sha512-nSDlnHSqzC3pXn/wZEZVLuAuJ1MYMXPBwtv2qAbCa3847SaHItdE7SzUq/Jtb0KZmh1zfAbNi3AAMjztTT4Ugg==", + "dependencies": { + "@opentelemetry/core": "^1.1.0" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "@opentelemetry/api": "^1.1.0" + } + }, + "node_modules/@phosphor-icons/react": { + "version": "2.1.5", + "resolved": "https://registry.npmjs.org/@phosphor-icons/react/-/react-2.1.5.tgz", + "integrity": "sha512-B7vRm/w+P/+eavWZP5CB5Ul0ffK4Y7fpd/auWKuGvm+8pVgAJzbOK8O0s+DqzR+TwWkh5pHtJTuoAtaSvgCPzg==", + "engines": { + "node": ">=10" + }, + "peerDependencies": { + "react": ">= 16.8", + "react-dom": ">= 16.8" + } + }, + "node_modules/@pkgjs/parseargs": { + "version": "0.11.0", + "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz", + "integrity": "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==", + "optional": true, + "engines": { + "node": ">=14" + } + }, + "node_modules/@prisma/instrumentation": { + "version": "5.19.1", + "resolved": "https://registry.npmjs.org/@prisma/instrumentation/-/instrumentation-5.19.1.tgz", + "integrity": "sha512-VLnzMQq7CWroL5AeaW0Py2huiNKeoMfCH3SUxstdzPrlWQi6UQ9UrfcbUkNHlVFqOMacqy8X/8YtE0kuKDpD9w==", + "dependencies": { + "@opentelemetry/api": "^1.8", + "@opentelemetry/instrumentation": "^0.49 || ^0.50 || ^0.51 || ^0.52.0", + "@opentelemetry/sdk-trace-base": "^1.22" + } + }, + "node_modules/@prisma/instrumentation/node_modules/@opentelemetry/api-logs": { + "version": "0.52.1", + "resolved": "https://registry.npmjs.org/@opentelemetry/api-logs/-/api-logs-0.52.1.tgz", + "integrity": "sha512-qnSqB2DQ9TPP96dl8cDubDvrUyWc0/sK81xHTK8eSUspzDM3bsewX903qclQFvVhgStjRWdC5bLb3kQqMkfV5A==", + "dependencies": { + "@opentelemetry/api": "^1.0.0" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/@prisma/instrumentation/node_modules/@opentelemetry/instrumentation": { + "version": "0.52.1", + "resolved": "https://registry.npmjs.org/@opentelemetry/instrumentation/-/instrumentation-0.52.1.tgz", + "integrity": "sha512-uXJbYU/5/MBHjMp1FqrILLRuiJCs3Ofk0MeRDk8g1S1gD47U8X3JnSwcMO1rtRo1x1a7zKaQHaoYu49p/4eSKw==", + "dependencies": { + "@opentelemetry/api-logs": "0.52.1", + "@types/shimmer": "^1.0.2", + "import-in-the-middle": "^1.8.1", + "require-in-the-middle": "^7.1.1", + "semver": "^7.5.2", + "shimmer": "^1.2.1" + }, + "engines": { + "node": ">=14" + }, + "peerDependencies": { + "@opentelemetry/api": "^1.3.0" + } + }, + "node_modules/@radix-ui/number": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@radix-ui/number/-/number-1.1.0.tgz", + "integrity": "sha512-V3gRzhVNU1ldS5XhAPTom1fOIo4ccrjjJgmE+LI2h/WaFpHmx0MQApT+KZHnx8abG6Avtfcz4WoEciMnpFT3HQ==", + "license": "MIT" + }, + "node_modules/@radix-ui/primitive": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.0.1.tgz", + "integrity": "sha512-yQ8oGX2GVsEYMWGxcovu1uGWPCxV5BFfeeYxqPmuAzUyLT9qmaMXSAhXpb0WrspIeqYzdJpkh2vHModJPgRIaw==", + "dependencies": { + "@babel/runtime": "^7.13.10" + } + }, + "node_modules/@radix-ui/react-arrow": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-arrow/-/react-arrow-1.1.0.tgz", + "integrity": "sha512-FmlW1rCg7hBpEBwFbjHwCW6AmWLQM6g/v0Sn8XbP9NvmSZ2San1FpQeyPtufzOMSIx7Y4dzjlHoifhp+7NkZhw==", + "dependencies": { + "@radix-ui/react-primitive": "2.0.0" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-collection": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-collection/-/react-collection-1.1.0.tgz", + "integrity": "sha512-GZsZslMJEyo1VKm5L1ZJY8tGDxZNPAoUeQUIbKeJfoi7Q4kmig5AsgLMYYuyYbfjd8fBmFORAIwYAkXMnXZgZw==", + "dependencies": { + "@radix-ui/react-compose-refs": "1.1.0", + "@radix-ui/react-context": "1.1.0", + "@radix-ui/react-primitive": "2.0.0", + "@radix-ui/react-slot": "1.1.0" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-compose-refs": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-compose-refs/-/react-compose-refs-1.1.0.tgz", + "integrity": "sha512-b4inOtiaOnYf9KWyO3jAeeCG6FeyfY6ldiEPanbUjWd+xIk5wZeHa8yVwmrJ2vderhu/BQvzCrJI0lHd+wIiqw==", + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-context": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-context/-/react-context-1.1.0.tgz", + "integrity": "sha512-OKrckBy+sMEgYM/sMmqmErVn0kZqrHPJze+Ql3DzYsDDp0hl0L62nx/2122/Bvps1qz645jlcu2tD9lrRSdf8A==", + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-dialog": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/@radix-ui/react-dialog/-/react-dialog-1.0.5.tgz", + "integrity": "sha512-GjWJX/AUpB703eEBanuBnIWdIXg6NvJFCXcNlSZk4xdszCdhrJgBoUd1cGk67vFO+WdA2pfI/plOpqz/5GUP6Q==", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/primitive": "1.0.1", + "@radix-ui/react-compose-refs": "1.0.1", + "@radix-ui/react-context": "1.0.1", + "@radix-ui/react-dismissable-layer": "1.0.5", + "@radix-ui/react-focus-guards": "1.0.1", + "@radix-ui/react-focus-scope": "1.0.4", + "@radix-ui/react-id": "1.0.1", + "@radix-ui/react-portal": "1.0.4", + "@radix-ui/react-presence": "1.0.1", + "@radix-ui/react-primitive": "1.0.3", + "@radix-ui/react-slot": "1.0.2", + "@radix-ui/react-use-controllable-state": "1.0.1", + "aria-hidden": "^1.1.1", + "react-remove-scroll": "2.5.5" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0", + "react-dom": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-dialog/node_modules/@radix-ui/react-compose-refs": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-compose-refs/-/react-compose-refs-1.0.1.tgz", + "integrity": "sha512-fDSBgd44FKHa1FRMU59qBMPFcl2PZE+2nmqunj+BWFyYYjnhIDWL2ItDs3rrbJDQOtzt5nIebLCQc4QRfz6LJw==", + "dependencies": { + "@babel/runtime": "^7.13.10" + }, + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-dialog/node_modules/@radix-ui/react-context": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-context/-/react-context-1.0.1.tgz", + "integrity": "sha512-ebbrdFoYTcuZ0v4wG5tedGnp9tzcV8awzsxYph7gXUyvnNLuTIcCk1q17JEbnVhXAKG9oX3KtchwiMIAYp9NLg==", + "dependencies": { + "@babel/runtime": "^7.13.10" + }, + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-dialog/node_modules/@radix-ui/react-dismissable-layer": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.0.5.tgz", + "integrity": "sha512-aJeDjQhywg9LBu2t/At58hCvr7pEm0o2Ke1x33B+MhjNmmZ17sy4KImo0KPLgsnc/zN7GPdce8Cnn0SWvwZO7g==", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/primitive": "1.0.1", + "@radix-ui/react-compose-refs": "1.0.1", + "@radix-ui/react-primitive": "1.0.3", + "@radix-ui/react-use-callback-ref": "1.0.1", + "@radix-ui/react-use-escape-keydown": "1.0.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0", + "react-dom": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-dialog/node_modules/@radix-ui/react-dismissable-layer/node_modules/@radix-ui/react-use-callback-ref": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-callback-ref/-/react-use-callback-ref-1.0.1.tgz", + "integrity": "sha512-D94LjX4Sp0xJFVaoQOd3OO9k7tpBYNOXdVhkltUbGv2Qb9OXdrg/CpsjlZv7ia14Sylv398LswWBVVu5nqKzAQ==", + "dependencies": { + "@babel/runtime": "^7.13.10" + }, + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-dialog/node_modules/@radix-ui/react-dismissable-layer/node_modules/@radix-ui/react-use-escape-keydown": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-escape-keydown/-/react-use-escape-keydown-1.0.3.tgz", + "integrity": "sha512-vyL82j40hcFicA+M4Ex7hVkB9vHgSse1ZWomAqV2Je3RleKGO5iM8KMOEtfoSB0PnIelMd2lATjTGMYqN5ylTg==", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/react-use-callback-ref": "1.0.1" + }, + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-dialog/node_modules/@radix-ui/react-focus-guards": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.0.1.tgz", + "integrity": "sha512-Rect2dWbQ8waGzhMavsIbmSVCgYxkXLxxR3ZvCX79JOglzdEy4JXMb98lq4hPxUbLr77nP0UOGf4rcMU+s1pUA==", + "dependencies": { + "@babel/runtime": "^7.13.10" + }, + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-dialog/node_modules/@radix-ui/react-focus-scope": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-scope/-/react-focus-scope-1.0.4.tgz", + "integrity": "sha512-sL04Mgvf+FmyvZeYfNu1EPAaaxD+aw7cYeIB9L9Fvq8+urhltTRaEo5ysKOpHuKPclsZcSUMKlN05x4u+CINpA==", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/react-compose-refs": "1.0.1", + "@radix-ui/react-primitive": "1.0.3", + "@radix-ui/react-use-callback-ref": "1.0.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0", + "react-dom": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-dialog/node_modules/@radix-ui/react-focus-scope/node_modules/@radix-ui/react-use-callback-ref": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-callback-ref/-/react-use-callback-ref-1.0.1.tgz", + "integrity": "sha512-D94LjX4Sp0xJFVaoQOd3OO9k7tpBYNOXdVhkltUbGv2Qb9OXdrg/CpsjlZv7ia14Sylv398LswWBVVu5nqKzAQ==", + "dependencies": { + "@babel/runtime": "^7.13.10" + }, + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-dialog/node_modules/@radix-ui/react-id": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-id/-/react-id-1.0.1.tgz", + "integrity": "sha512-tI7sT/kqYp8p96yGWY1OAnLHrqDgzHefRBKQ2YAkBS5ja7QLcZ9Z/uY7bEjPUatf8RomoXM8/1sMj1IJaE5UzQ==", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/react-use-layout-effect": "1.0.1" + }, + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-dialog/node_modules/@radix-ui/react-id/node_modules/@radix-ui/react-use-layout-effect": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-layout-effect/-/react-use-layout-effect-1.0.1.tgz", + "integrity": "sha512-v/5RegiJWYdoCvMnITBkNNx6bCj20fiaJnWtRkU18yITptraXjffz5Qbn05uOiQnOvi+dbkznkoaMltz1GnszQ==", + "dependencies": { + "@babel/runtime": "^7.13.10" + }, + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-dialog/node_modules/@radix-ui/react-portal": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.0.4.tgz", + "integrity": "sha512-Qki+C/EuGUVCQTOTD5vzJzJuMUlewbzuKyUy+/iHM2uwGiru9gZeBJtHAPKAEkB5KWGi9mP/CHKcY0wt1aW45Q==", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/react-primitive": "1.0.3" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0", + "react-dom": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-dialog/node_modules/@radix-ui/react-presence": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.0.1.tgz", + "integrity": "sha512-UXLW4UAbIY5ZjcvzjfRFo5gxva8QirC9hF7wRE4U5gz+TP0DbRk+//qyuAQ1McDxBt1xNMBTaciFGvEmJvAZCg==", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/react-compose-refs": "1.0.1", + "@radix-ui/react-use-layout-effect": "1.0.1" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0", + "react-dom": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-dialog/node_modules/@radix-ui/react-presence/node_modules/@radix-ui/react-use-layout-effect": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-layout-effect/-/react-use-layout-effect-1.0.1.tgz", + "integrity": "sha512-v/5RegiJWYdoCvMnITBkNNx6bCj20fiaJnWtRkU18yITptraXjffz5Qbn05uOiQnOvi+dbkznkoaMltz1GnszQ==", + "dependencies": { + "@babel/runtime": "^7.13.10" + }, + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-dialog/node_modules/@radix-ui/react-primitive": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-1.0.3.tgz", + "integrity": "sha512-yi58uVyoAcK/Nq1inRY56ZSjKypBNKTa/1mcL8qdl6oJeEaDbOldlzrGn7P6Q3Id5d+SYNGc5AJgc4vGhjs5+g==", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/react-slot": "1.0.2" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0", + "react-dom": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-dialog/node_modules/@radix-ui/react-slot": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.0.2.tgz", + "integrity": "sha512-YeTpuq4deV+6DusvVUW4ivBgnkHwECUu0BiN43L5UCDFgdhsRUWAghhTF5MbvNTPzmiFOx90asDSUjWuCNapwg==", + "license": "MIT", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/react-compose-refs": "1.0.1" + }, + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-dialog/node_modules/@radix-ui/react-use-controllable-state": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-controllable-state/-/react-use-controllable-state-1.0.1.tgz", + "integrity": "sha512-Svl5GY5FQeN758fWKrjM6Qb7asvXeiZltlT4U2gVfl8Gx5UAv2sMR0LWo8yhsIZh2oQ0eFdZ59aoOOMV7b47VA==", + "dependencies": { + "@babel/runtime": "^7.13.10", + "@radix-ui/react-use-callback-ref": "1.0.1" + }, + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-dialog/node_modules/@radix-ui/react-use-controllable-state/node_modules/@radix-ui/react-use-callback-ref": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-callback-ref/-/react-use-callback-ref-1.0.1.tgz", + "integrity": "sha512-D94LjX4Sp0xJFVaoQOd3OO9k7tpBYNOXdVhkltUbGv2Qb9OXdrg/CpsjlZv7ia14Sylv398LswWBVVu5nqKzAQ==", "dependencies": { "@babel/runtime": "^7.13.10" }, "peerDependencies": { "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0" + "react": "^16.8 || ^17.0 || ^18.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-dialog/node_modules/react-remove-scroll": { + "version": "2.5.5", + "resolved": "https://registry.npmjs.org/react-remove-scroll/-/react-remove-scroll-2.5.5.tgz", + "integrity": "sha512-ImKhrzJJsyXJfBZ4bzu8Bwpka14c/fQt0k+cyFp/PBhTfyDnU5hjOtM4AG/0AMyy8oKzOTR0lDgJIM7pYXI0kw==", + "dependencies": { + "react-remove-scroll-bar": "^2.3.3", + "react-style-singleton": "^2.2.1", + "tslib": "^2.1.0", + "use-callback-ref": "^1.3.0", + "use-sidecar": "^1.1.2" + }, + "engines": { + "node": ">=10" + }, + "peerDependencies": { + "@types/react": "^16.8.0 || ^17.0.0 || ^18.0.0", + "react": "^16.8.0 || ^17.0.0 || ^18.0.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-dialog/node_modules/react-remove-scroll/node_modules/react-remove-scroll-bar": { + "version": "2.3.6", + "resolved": "https://registry.npmjs.org/react-remove-scroll-bar/-/react-remove-scroll-bar-2.3.6.tgz", + "integrity": "sha512-DtSYaao4mBmX+HDo5YWYdBWQwYIQQshUV/dVxFxK+KM26Wjwp1gZ6rv6OC3oujI6Bfu6Xyg3TwK533AQutsn/g==", + "dependencies": { + "react-style-singleton": "^2.2.1", + "tslib": "^2.0.0" + }, + "engines": { + "node": ">=10" + }, + "peerDependencies": { + "@types/react": "^16.8.0 || ^17.0.0 || ^18.0.0", + "react": "^16.8.0 || ^17.0.0 || ^18.0.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-dialog/node_modules/react-remove-scroll/node_modules/react-style-singleton": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/react-style-singleton/-/react-style-singleton-2.2.1.tgz", + "integrity": "sha512-ZWj0fHEMyWkHzKYUr2Bs/4zU6XLmq9HsgBURm7g5pAVfyn49DgUiNgY2d4lXRlYSiCif9YBGpQleewkcqddc7g==", + "dependencies": { + "get-nonce": "^1.0.0", + "invariant": "^2.2.4", + "tslib": "^2.0.0" + }, + "engines": { + "node": ">=10" + }, + "peerDependencies": { + "@types/react": "^16.8.0 || ^17.0.0 || ^18.0.0", + "react": "^16.8.0 || ^17.0.0 || ^18.0.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-dialog/node_modules/react-remove-scroll/node_modules/use-callback-ref": { + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/use-callback-ref/-/use-callback-ref-1.3.2.tgz", + "integrity": "sha512-elOQwe6Q8gqZgDA8mrh44qRTQqpIHDcZ3hXTLjBe1i4ph8XpNJnO+aQf3NaG+lriLopI4HMx9VjQLfPQ6vhnoA==", + "dependencies": { + "tslib": "^2.0.0" + }, + "engines": { + "node": ">=10" + }, + "peerDependencies": { + "@types/react": "^16.8.0 || ^17.0.0 || ^18.0.0", + "react": "^16.8.0 || ^17.0.0 || ^18.0.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-dialog/node_modules/react-remove-scroll/node_modules/use-sidecar": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/use-sidecar/-/use-sidecar-1.1.2.tgz", + "integrity": "sha512-epTbsLuzZ7lPClpz2TyryBfztm7m+28DlEv2ZCQ3MDr5ssiwyOwGH/e5F9CkfWjJ1t4clvI58yF822/GUkjjhw==", + "dependencies": { + "detect-node-es": "^1.1.0", + "tslib": "^2.0.0" + }, + "engines": { + "node": ">=10" + }, + "peerDependencies": { + "@types/react": "^16.9.0 || ^17.0.0 || ^18.0.0", + "react": "^16.8.0 || ^17.0.0 || ^18.0.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-direction": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-direction/-/react-direction-1.1.0.tgz", + "integrity": "sha512-BUuBvgThEiAXh2DWu93XsT+a3aWrGqolGlqqw5VU1kG7p/ZH2cuDlM1sRLNnY3QcBS69UIz2mcKhMxDsdewhjg==", + "license": "MIT", + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-dismissable-layer": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.1.tgz", + "integrity": "sha512-QSxg29lfr/xcev6kSz7MAlmDnzbP1eI/Dwn3Tp1ip0KT5CUELsxkekFEMVBEoykI3oV39hKT4TKZzBNMbcTZYQ==", + "dependencies": { + "@radix-ui/primitive": "1.1.0", + "@radix-ui/react-compose-refs": "1.1.0", + "@radix-ui/react-primitive": "2.0.0", + "@radix-ui/react-use-callback-ref": "1.1.0", + "@radix-ui/react-use-escape-keydown": "1.1.0" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-dismissable-layer/node_modules/@radix-ui/primitive": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.0.tgz", + "integrity": "sha512-4Z8dn6Upk0qk4P74xBhZ6Hd/w0mPEzOOLxy4xiPXOXqjF7jZS0VAKk7/x/H6FyY2zCkYJqePf1G5KmkmNJ4RBA==" + }, + "node_modules/@radix-ui/react-focus-scope": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-scope/-/react-focus-scope-1.1.0.tgz", + "integrity": "sha512-200UD8zylvEyL8Bx+z76RJnASR2gRMuxlgFCPAe/Q/679a/r0eK3MBVYMb7vZODZcffZBdob1EGnky78xmVvcA==", + "dependencies": { + "@radix-ui/react-compose-refs": "1.1.0", + "@radix-ui/react-primitive": "2.0.0", + "@radix-ui/react-use-callback-ref": "1.1.0" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-id": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-id/-/react-id-1.1.0.tgz", + "integrity": "sha512-EJUrI8yYh7WOjNOqpoJaf1jlFIH2LvtgAl+YcFqNCa+4hj64ZXmPkAKOFs/ukjz3byN6bdb/AVUqHkI8/uWWMA==", + "dependencies": { + "@radix-ui/react-use-layout-effect": "1.1.0" + }, + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "peerDependenciesMeta": { "@types/react": { @@ -1196,49 +2775,355 @@ } } }, - "node_modules/@radix-ui/react-context": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-context/-/react-context-1.0.1.tgz", - "integrity": "sha512-ebbrdFoYTcuZ0v4wG5tedGnp9tzcV8awzsxYph7gXUyvnNLuTIcCk1q17JEbnVhXAKG9oX3KtchwiMIAYp9NLg==", + "node_modules/@radix-ui/react-popover": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@radix-ui/react-popover/-/react-popover-1.1.2.tgz", + "integrity": "sha512-u2HRUyWW+lOiA2g0Le0tMmT55FGOEWHwPFt1EPfbLly7uXQExFo5duNKqG2DzmFXIdqOeNd+TpE8baHWJCyP9w==", + "license": "MIT", "dependencies": { - "@babel/runtime": "^7.13.10" + "@radix-ui/primitive": "1.1.0", + "@radix-ui/react-compose-refs": "1.1.0", + "@radix-ui/react-context": "1.1.1", + "@radix-ui/react-dismissable-layer": "1.1.1", + "@radix-ui/react-focus-guards": "1.1.1", + "@radix-ui/react-focus-scope": "1.1.0", + "@radix-ui/react-id": "1.1.0", + "@radix-ui/react-popper": "1.2.0", + "@radix-ui/react-portal": "1.1.2", + "@radix-ui/react-presence": "1.1.1", + "@radix-ui/react-primitive": "2.0.0", + "@radix-ui/react-slot": "1.1.0", + "@radix-ui/react-use-controllable-state": "1.1.0", + "aria-hidden": "^1.1.1", + "react-remove-scroll": "2.6.0" }, "peerDependencies": { "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0" + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "peerDependenciesMeta": { "@types/react": { "optional": true + }, + "@types/react-dom": { + "optional": true } } }, - "node_modules/@radix-ui/react-dialog": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/@radix-ui/react-dialog/-/react-dialog-1.0.5.tgz", - "integrity": "sha512-GjWJX/AUpB703eEBanuBnIWdIXg6NvJFCXcNlSZk4xdszCdhrJgBoUd1cGk67vFO+WdA2pfI/plOpqz/5GUP6Q==", + "node_modules/@radix-ui/react-popover/node_modules/@radix-ui/primitive": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.0.tgz", + "integrity": "sha512-4Z8dn6Upk0qk4P74xBhZ6Hd/w0mPEzOOLxy4xiPXOXqjF7jZS0VAKk7/x/H6FyY2zCkYJqePf1G5KmkmNJ4RBA==", + "license": "MIT" + }, + "node_modules/@radix-ui/react-popover/node_modules/@radix-ui/react-context": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-context/-/react-context-1.1.1.tgz", + "integrity": "sha512-UASk9zi+crv9WteK/NU4PLvOoL3OuE6BWVKNF6hPRBtYBDXQ2u5iu3O59zUlJiTVvkyuycnqrztsHVJwcK9K+Q==", + "license": "MIT", + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-popover/node_modules/@radix-ui/react-focus-guards": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.1.1.tgz", + "integrity": "sha512-pSIwfrT1a6sIoDASCSpFwOasEwKTZWDw/iBdtnqKO7v6FeOzYJ7U53cPzYFVR3geGGXgVHaH+CdngrrAzqUGxg==", + "license": "MIT", + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-popover/node_modules/react-remove-scroll": { + "version": "2.6.0", + "resolved": "https://registry.npmjs.org/react-remove-scroll/-/react-remove-scroll-2.6.0.tgz", + "integrity": "sha512-I2U4JVEsQenxDAKaVa3VZ/JeJZe0/2DxPWL8Tj8yLKctQJQiZM52pn/GWFpSp8dftjM3pSAHVJZscAnC/y+ySQ==", + "license": "MIT", "dependencies": { - "@babel/runtime": "^7.13.10", - "@radix-ui/primitive": "1.0.1", - "@radix-ui/react-compose-refs": "1.0.1", - "@radix-ui/react-context": "1.0.1", - "@radix-ui/react-dismissable-layer": "1.0.5", - "@radix-ui/react-focus-guards": "1.0.1", - "@radix-ui/react-focus-scope": "1.0.4", - "@radix-ui/react-id": "1.0.1", - "@radix-ui/react-portal": "1.0.4", - "@radix-ui/react-presence": "1.0.1", - "@radix-ui/react-primitive": "1.0.3", - "@radix-ui/react-slot": "1.0.2", - "@radix-ui/react-use-controllable-state": "1.0.1", + "react-remove-scroll-bar": "^2.3.6", + "react-style-singleton": "^2.2.1", + "tslib": "^2.1.0", + "use-callback-ref": "^1.3.0", + "use-sidecar": "^1.1.2" + }, + "engines": { + "node": ">=10" + }, + "peerDependencies": { + "@types/react": "^16.8.0 || ^17.0.0 || ^18.0.0", + "react": "^16.8.0 || ^17.0.0 || ^18.0.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-popover/node_modules/react-remove-scroll/node_modules/react-remove-scroll-bar": { + "version": "2.3.6", + "resolved": "https://registry.npmjs.org/react-remove-scroll-bar/-/react-remove-scroll-bar-2.3.6.tgz", + "integrity": "sha512-DtSYaao4mBmX+HDo5YWYdBWQwYIQQshUV/dVxFxK+KM26Wjwp1gZ6rv6OC3oujI6Bfu6Xyg3TwK533AQutsn/g==", + "dependencies": { + "react-style-singleton": "^2.2.1", + "tslib": "^2.0.0" + }, + "engines": { + "node": ">=10" + }, + "peerDependencies": { + "@types/react": "^16.8.0 || ^17.0.0 || ^18.0.0", + "react": "^16.8.0 || ^17.0.0 || ^18.0.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-popover/node_modules/react-remove-scroll/node_modules/react-style-singleton": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/react-style-singleton/-/react-style-singleton-2.2.1.tgz", + "integrity": "sha512-ZWj0fHEMyWkHzKYUr2Bs/4zU6XLmq9HsgBURm7g5pAVfyn49DgUiNgY2d4lXRlYSiCif9YBGpQleewkcqddc7g==", + "dependencies": { + "get-nonce": "^1.0.0", + "invariant": "^2.2.4", + "tslib": "^2.0.0" + }, + "engines": { + "node": ">=10" + }, + "peerDependencies": { + "@types/react": "^16.8.0 || ^17.0.0 || ^18.0.0", + "react": "^16.8.0 || ^17.0.0 || ^18.0.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-popover/node_modules/react-remove-scroll/node_modules/use-callback-ref": { + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/use-callback-ref/-/use-callback-ref-1.3.2.tgz", + "integrity": "sha512-elOQwe6Q8gqZgDA8mrh44qRTQqpIHDcZ3hXTLjBe1i4ph8XpNJnO+aQf3NaG+lriLopI4HMx9VjQLfPQ6vhnoA==", + "dependencies": { + "tslib": "^2.0.0" + }, + "engines": { + "node": ">=10" + }, + "peerDependencies": { + "@types/react": "^16.8.0 || ^17.0.0 || ^18.0.0", + "react": "^16.8.0 || ^17.0.0 || ^18.0.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-popover/node_modules/react-remove-scroll/node_modules/use-sidecar": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/use-sidecar/-/use-sidecar-1.1.2.tgz", + "integrity": "sha512-epTbsLuzZ7lPClpz2TyryBfztm7m+28DlEv2ZCQ3MDr5ssiwyOwGH/e5F9CkfWjJ1t4clvI58yF822/GUkjjhw==", + "dependencies": { + "detect-node-es": "^1.1.0", + "tslib": "^2.0.0" + }, + "engines": { + "node": ">=10" + }, + "peerDependencies": { + "@types/react": "^16.9.0 || ^17.0.0 || ^18.0.0", + "react": "^16.8.0 || ^17.0.0 || ^18.0.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-popper": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.0.tgz", + "integrity": "sha512-ZnRMshKF43aBxVWPWvbj21+7TQCvhuULWJ4gNIKYpRlQt5xGRhLx66tMp8pya2UkGHTSlhpXwmjqltDYHhw7Vg==", + "dependencies": { + "@floating-ui/react-dom": "^2.0.0", + "@radix-ui/react-arrow": "1.1.0", + "@radix-ui/react-compose-refs": "1.1.0", + "@radix-ui/react-context": "1.1.0", + "@radix-ui/react-primitive": "2.0.0", + "@radix-ui/react-use-callback-ref": "1.1.0", + "@radix-ui/react-use-layout-effect": "1.1.0", + "@radix-ui/react-use-rect": "1.1.0", + "@radix-ui/react-use-size": "1.1.0", + "@radix-ui/rect": "1.1.0" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-portal": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.2.tgz", + "integrity": "sha512-WeDYLGPxJb/5EGBoedyJbT0MpoULmwnIPMJMSldkuiMsBAv7N1cRdsTWZWht9vpPOiN3qyiGAtbK2is47/uMFg==", + "dependencies": { + "@radix-ui/react-primitive": "2.0.0", + "@radix-ui/react-use-layout-effect": "1.1.0" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-presence": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.1.1.tgz", + "integrity": "sha512-IeFXVi4YS1K0wVZzXNrbaaUvIJ3qdY+/Ih4eHFhWA9SwGR9UDX7Ck8abvL57C4cv3wwMvUE0OG69Qc3NCcTe/A==", + "dependencies": { + "@radix-ui/react-compose-refs": "1.1.0", + "@radix-ui/react-use-layout-effect": "1.1.0" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-primitive": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-2.0.0.tgz", + "integrity": "sha512-ZSpFm0/uHa8zTvKBDjLFWLo8dkr4MBsiDLz0g3gMUwqgLHz9rTaRRGYDgvZPtBJgYCBKXkS9fzmoySgr8CO6Cw==", + "dependencies": { + "@radix-ui/react-slot": "1.1.0" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-roving-focus": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-roving-focus/-/react-roving-focus-1.1.0.tgz", + "integrity": "sha512-EA6AMGeq9AEeQDeSH0aZgG198qkfHSbvWTf1HvoDmOB5bBG/qTxjYMWUKMnYiV6J/iP/J8MEFSuB2zRU2n7ODA==", + "dependencies": { + "@radix-ui/primitive": "1.1.0", + "@radix-ui/react-collection": "1.1.0", + "@radix-ui/react-compose-refs": "1.1.0", + "@radix-ui/react-context": "1.1.0", + "@radix-ui/react-direction": "1.1.0", + "@radix-ui/react-id": "1.1.0", + "@radix-ui/react-primitive": "2.0.0", + "@radix-ui/react-use-callback-ref": "1.1.0", + "@radix-ui/react-use-controllable-state": "1.1.0" + }, + "peerDependencies": { + "@types/react": "*", + "@types/react-dom": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + }, + "@types/react-dom": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-roving-focus/node_modules/@radix-ui/primitive": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.0.tgz", + "integrity": "sha512-4Z8dn6Upk0qk4P74xBhZ6Hd/w0mPEzOOLxy4xiPXOXqjF7jZS0VAKk7/x/H6FyY2zCkYJqePf1G5KmkmNJ4RBA==" + }, + "node_modules/@radix-ui/react-select": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/@radix-ui/react-select/-/react-select-2.1.2.tgz", + "integrity": "sha512-rZJtWmorC7dFRi0owDmoijm6nSJH1tVw64QGiNIZ9PNLyBDtG+iAq+XGsya052At4BfarzY/Dhv9wrrUr6IMZA==", + "license": "MIT", + "dependencies": { + "@radix-ui/number": "1.1.0", + "@radix-ui/primitive": "1.1.0", + "@radix-ui/react-collection": "1.1.0", + "@radix-ui/react-compose-refs": "1.1.0", + "@radix-ui/react-context": "1.1.1", + "@radix-ui/react-direction": "1.1.0", + "@radix-ui/react-dismissable-layer": "1.1.1", + "@radix-ui/react-focus-guards": "1.1.1", + "@radix-ui/react-focus-scope": "1.1.0", + "@radix-ui/react-id": "1.1.0", + "@radix-ui/react-popper": "1.2.0", + "@radix-ui/react-portal": "1.1.2", + "@radix-ui/react-primitive": "2.0.0", + "@radix-ui/react-slot": "1.1.0", + "@radix-ui/react-use-callback-ref": "1.1.0", + "@radix-ui/react-use-controllable-state": "1.1.0", + "@radix-ui/react-use-layout-effect": "1.1.0", + "@radix-ui/react-use-previous": "1.1.0", + "@radix-ui/react-visually-hidden": "1.1.0", "aria-hidden": "^1.1.1", - "react-remove-scroll": "2.5.5" + "react-remove-scroll": "2.6.0" }, "peerDependencies": { "@types/react": "*", "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0", - "react-dom": "^16.8 || ^17.0 || ^18.0" + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "peerDependenciesMeta": { "@types/react": { @@ -1249,43 +3134,60 @@ } } }, - "node_modules/@radix-ui/react-dismissable-layer": { - "version": "1.0.5", - "resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.0.5.tgz", - "integrity": "sha512-aJeDjQhywg9LBu2t/At58hCvr7pEm0o2Ke1x33B+MhjNmmZ17sy4KImo0KPLgsnc/zN7GPdce8Cnn0SWvwZO7g==", - "dependencies": { - "@babel/runtime": "^7.13.10", - "@radix-ui/primitive": "1.0.1", - "@radix-ui/react-compose-refs": "1.0.1", - "@radix-ui/react-primitive": "1.0.3", - "@radix-ui/react-use-callback-ref": "1.0.1", - "@radix-ui/react-use-escape-keydown": "1.0.3" - }, + "node_modules/@radix-ui/react-select/node_modules/@radix-ui/primitive": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.0.tgz", + "integrity": "sha512-4Z8dn6Upk0qk4P74xBhZ6Hd/w0mPEzOOLxy4xiPXOXqjF7jZS0VAKk7/x/H6FyY2zCkYJqePf1G5KmkmNJ4RBA==", + "license": "MIT" + }, + "node_modules/@radix-ui/react-select/node_modules/@radix-ui/react-context": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-context/-/react-context-1.1.1.tgz", + "integrity": "sha512-UASk9zi+crv9WteK/NU4PLvOoL3OuE6BWVKNF6hPRBtYBDXQ2u5iu3O59zUlJiTVvkyuycnqrztsHVJwcK9K+Q==", + "license": "MIT", "peerDependencies": { "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0", - "react-dom": "^16.8 || ^17.0 || ^18.0" + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "peerDependenciesMeta": { "@types/react": { "optional": true - }, - "@types/react-dom": { + } + } + }, + "node_modules/@radix-ui/react-select/node_modules/@radix-ui/react-focus-guards": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.1.1.tgz", + "integrity": "sha512-pSIwfrT1a6sIoDASCSpFwOasEwKTZWDw/iBdtnqKO7v6FeOzYJ7U53cPzYFVR3geGGXgVHaH+CdngrrAzqUGxg==", + "license": "MIT", + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" + }, + "peerDependenciesMeta": { + "@types/react": { "optional": true } } }, - "node_modules/@radix-ui/react-focus-guards": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.0.1.tgz", - "integrity": "sha512-Rect2dWbQ8waGzhMavsIbmSVCgYxkXLxxR3ZvCX79JOglzdEy4JXMb98lq4hPxUbLr77nP0UOGf4rcMU+s1pUA==", + "node_modules/@radix-ui/react-select/node_modules/react-remove-scroll": { + "version": "2.6.0", + "resolved": "https://registry.npmjs.org/react-remove-scroll/-/react-remove-scroll-2.6.0.tgz", + "integrity": "sha512-I2U4JVEsQenxDAKaVa3VZ/JeJZe0/2DxPWL8Tj8yLKctQJQiZM52pn/GWFpSp8dftjM3pSAHVJZscAnC/y+ySQ==", + "license": "MIT", "dependencies": { - "@babel/runtime": "^7.13.10" + "react-remove-scroll-bar": "^2.3.6", + "react-style-singleton": "^2.2.1", + "tslib": "^2.1.0", + "use-callback-ref": "^1.3.0", + "use-sidecar": "^1.1.2" + }, + "engines": { + "node": ">=10" }, "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0" + "@types/react": "^16.8.0 || ^17.0.0 || ^18.0.0", + "react": "^16.8.0 || ^17.0.0 || ^18.0.0" }, "peerDependenciesMeta": { "@types/react": { @@ -1293,42 +3195,42 @@ } } }, - "node_modules/@radix-ui/react-focus-scope": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-scope/-/react-focus-scope-1.0.4.tgz", - "integrity": "sha512-sL04Mgvf+FmyvZeYfNu1EPAaaxD+aw7cYeIB9L9Fvq8+urhltTRaEo5ysKOpHuKPclsZcSUMKlN05x4u+CINpA==", + "node_modules/@radix-ui/react-select/node_modules/react-remove-scroll/node_modules/react-remove-scroll-bar": { + "version": "2.3.6", + "resolved": "https://registry.npmjs.org/react-remove-scroll-bar/-/react-remove-scroll-bar-2.3.6.tgz", + "integrity": "sha512-DtSYaao4mBmX+HDo5YWYdBWQwYIQQshUV/dVxFxK+KM26Wjwp1gZ6rv6OC3oujI6Bfu6Xyg3TwK533AQutsn/g==", "dependencies": { - "@babel/runtime": "^7.13.10", - "@radix-ui/react-compose-refs": "1.0.1", - "@radix-ui/react-primitive": "1.0.3", - "@radix-ui/react-use-callback-ref": "1.0.1" + "react-style-singleton": "^2.2.1", + "tslib": "^2.0.0" + }, + "engines": { + "node": ">=10" }, "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0", - "react-dom": "^16.8 || ^17.0 || ^18.0" + "@types/react": "^16.8.0 || ^17.0.0 || ^18.0.0", + "react": "^16.8.0 || ^17.0.0 || ^18.0.0" }, "peerDependenciesMeta": { "@types/react": { "optional": true - }, - "@types/react-dom": { - "optional": true } } }, - "node_modules/@radix-ui/react-id": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-id/-/react-id-1.0.1.tgz", - "integrity": "sha512-tI7sT/kqYp8p96yGWY1OAnLHrqDgzHefRBKQ2YAkBS5ja7QLcZ9Z/uY7bEjPUatf8RomoXM8/1sMj1IJaE5UzQ==", + "node_modules/@radix-ui/react-select/node_modules/react-remove-scroll/node_modules/react-style-singleton": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/react-style-singleton/-/react-style-singleton-2.2.1.tgz", + "integrity": "sha512-ZWj0fHEMyWkHzKYUr2Bs/4zU6XLmq9HsgBURm7g5pAVfyn49DgUiNgY2d4lXRlYSiCif9YBGpQleewkcqddc7g==", "dependencies": { - "@babel/runtime": "^7.13.10", - "@radix-ui/react-use-layout-effect": "1.0.1" + "get-nonce": "^1.0.0", + "invariant": "^2.2.4", + "tslib": "^2.0.0" + }, + "engines": { + "node": ">=10" }, "peerDependencies": { - "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0" + "@types/react": "^16.8.0 || ^17.0.0 || ^18.0.0", + "react": "^16.8.0 || ^17.0.0 || ^18.0.0" }, "peerDependenciesMeta": { "@types/react": { @@ -1336,88 +3238,60 @@ } } }, - "node_modules/@radix-ui/react-popover": { - "version": "1.0.7", - "resolved": "https://registry.npmjs.org/@radix-ui/react-popover/-/react-popover-1.0.7.tgz", - "integrity": "sha512-shtvVnlsxT6faMnK/a7n0wptwBD23xc1Z5mdrtKLwVEfsEMXodS0r5s0/g5P0hX//EKYZS2sxUjqfzlg52ZSnQ==", + "node_modules/@radix-ui/react-select/node_modules/react-remove-scroll/node_modules/use-callback-ref": { + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/use-callback-ref/-/use-callback-ref-1.3.2.tgz", + "integrity": "sha512-elOQwe6Q8gqZgDA8mrh44qRTQqpIHDcZ3hXTLjBe1i4ph8XpNJnO+aQf3NaG+lriLopI4HMx9VjQLfPQ6vhnoA==", "dependencies": { - "@babel/runtime": "^7.13.10", - "@radix-ui/primitive": "1.0.1", - "@radix-ui/react-compose-refs": "1.0.1", - "@radix-ui/react-context": "1.0.1", - "@radix-ui/react-dismissable-layer": "1.0.5", - "@radix-ui/react-focus-guards": "1.0.1", - "@radix-ui/react-focus-scope": "1.0.4", - "@radix-ui/react-id": "1.0.1", - "@radix-ui/react-popper": "1.1.3", - "@radix-ui/react-portal": "1.0.4", - "@radix-ui/react-presence": "1.0.1", - "@radix-ui/react-primitive": "1.0.3", - "@radix-ui/react-slot": "1.0.2", - "@radix-ui/react-use-controllable-state": "1.0.1", - "aria-hidden": "^1.1.1", - "react-remove-scroll": "2.5.5" + "tslib": "^2.0.0" + }, + "engines": { + "node": ">=10" }, "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0", - "react-dom": "^16.8 || ^17.0 || ^18.0" + "@types/react": "^16.8.0 || ^17.0.0 || ^18.0.0", + "react": "^16.8.0 || ^17.0.0 || ^18.0.0" }, "peerDependenciesMeta": { "@types/react": { "optional": true - }, - "@types/react-dom": { - "optional": true } } }, - "node_modules/@radix-ui/react-popper": { - "version": "1.1.3", - "resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.1.3.tgz", - "integrity": "sha512-cKpopj/5RHZWjrbF2846jBNacjQVwkP068DfmgrNJXpvVWrOvlAmE9xSiy5OqeE+Gi8D9fP+oDhUnPqNMY8/5w==", + "node_modules/@radix-ui/react-select/node_modules/react-remove-scroll/node_modules/use-sidecar": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/use-sidecar/-/use-sidecar-1.1.2.tgz", + "integrity": "sha512-epTbsLuzZ7lPClpz2TyryBfztm7m+28DlEv2ZCQ3MDr5ssiwyOwGH/e5F9CkfWjJ1t4clvI58yF822/GUkjjhw==", "dependencies": { - "@babel/runtime": "^7.13.10", - "@floating-ui/react-dom": "^2.0.0", - "@radix-ui/react-arrow": "1.0.3", - "@radix-ui/react-compose-refs": "1.0.1", - "@radix-ui/react-context": "1.0.1", - "@radix-ui/react-primitive": "1.0.3", - "@radix-ui/react-use-callback-ref": "1.0.1", - "@radix-ui/react-use-layout-effect": "1.0.1", - "@radix-ui/react-use-rect": "1.0.1", - "@radix-ui/react-use-size": "1.0.1", - "@radix-ui/rect": "1.0.1" + "detect-node-es": "^1.1.0", + "tslib": "^2.0.0" + }, + "engines": { + "node": ">=10" }, "peerDependencies": { - "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0", - "react-dom": "^16.8 || ^17.0 || ^18.0" + "@types/react": "^16.9.0 || ^17.0.0 || ^18.0.0", + "react": "^16.8.0 || ^17.0.0 || ^18.0.0" }, "peerDependenciesMeta": { "@types/react": { "optional": true - }, - "@types/react-dom": { - "optional": true } } }, - "node_modules/@radix-ui/react-portal": { - "version": "1.0.4", - "resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.0.4.tgz", - "integrity": "sha512-Qki+C/EuGUVCQTOTD5vzJzJuMUlewbzuKyUy+/iHM2uwGiru9gZeBJtHAPKAEkB5KWGi9mP/CHKcY0wt1aW45Q==", + "node_modules/@radix-ui/react-separator": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-separator/-/react-separator-1.1.0.tgz", + "integrity": "sha512-3uBAs+egzvJBDZAzvb/n4NxxOYpnspmWxO2u5NbZ8Y6FM/NdrGSF9bop3Cf6F6C71z1rTSn8KV0Fo2ZVd79lGA==", + "license": "MIT", "dependencies": { - "@babel/runtime": "^7.13.10", - "@radix-ui/react-primitive": "1.0.3" + "@radix-ui/react-primitive": "2.0.0" }, "peerDependencies": { "@types/react": "*", "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0", - "react-dom": "^16.8 || ^17.0 || ^18.0" + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "peerDependenciesMeta": { "@types/react": { @@ -1428,43 +3302,44 @@ } } }, - "node_modules/@radix-ui/react-presence": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-presence/-/react-presence-1.0.1.tgz", - "integrity": "sha512-UXLW4UAbIY5ZjcvzjfRFo5gxva8QirC9hF7wRE4U5gz+TP0DbRk+//qyuAQ1McDxBt1xNMBTaciFGvEmJvAZCg==", + "node_modules/@radix-ui/react-slot": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.1.0.tgz", + "integrity": "sha512-FUCf5XMfmW4dtYl69pdS4DbxKy8nj4M7SafBgPllysxmdachynNflAdp/gCsnYWNDnge6tI9onzMp5ARYc1KNw==", + "license": "MIT", "dependencies": { - "@babel/runtime": "^7.13.10", - "@radix-ui/react-compose-refs": "1.0.1", - "@radix-ui/react-use-layout-effect": "1.0.1" + "@radix-ui/react-compose-refs": "1.1.0" }, "peerDependencies": { "@types/react": "*", - "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0", - "react-dom": "^16.8 || ^17.0 || ^18.0" + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "peerDependenciesMeta": { "@types/react": { "optional": true - }, - "@types/react-dom": { - "optional": true } } }, - "node_modules/@radix-ui/react-primitive": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/@radix-ui/react-primitive/-/react-primitive-1.0.3.tgz", - "integrity": "sha512-yi58uVyoAcK/Nq1inRY56ZSjKypBNKTa/1mcL8qdl6oJeEaDbOldlzrGn7P6Q3Id5d+SYNGc5AJgc4vGhjs5+g==", + "node_modules/@radix-ui/react-tabs": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-tabs/-/react-tabs-1.1.1.tgz", + "integrity": "sha512-3GBUDmP2DvzmtYLMsHmpA1GtR46ZDZ+OreXM/N+kkQJOPIgytFWWTfDQmBQKBvaFS0Vno0FktdbVzN28KGrMdw==", + "license": "MIT", "dependencies": { - "@babel/runtime": "^7.13.10", - "@radix-ui/react-slot": "1.0.2" + "@radix-ui/primitive": "1.1.0", + "@radix-ui/react-context": "1.1.1", + "@radix-ui/react-direction": "1.1.0", + "@radix-ui/react-id": "1.1.0", + "@radix-ui/react-presence": "1.1.1", + "@radix-ui/react-primitive": "2.0.0", + "@radix-ui/react-roving-focus": "1.1.0", + "@radix-ui/react-use-controllable-state": "1.1.0" }, "peerDependencies": { "@types/react": "*", "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0", - "react-dom": "^16.8 || ^17.0 || ^18.0" + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "peerDependenciesMeta": { "@types/react": { @@ -1475,17 +3350,20 @@ } } }, - "node_modules/@radix-ui/react-slot": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/@radix-ui/react-slot/-/react-slot-1.0.2.tgz", - "integrity": "sha512-YeTpuq4deV+6DusvVUW4ivBgnkHwECUu0BiN43L5UCDFgdhsRUWAghhTF5MbvNTPzmiFOx90asDSUjWuCNapwg==", - "dependencies": { - "@babel/runtime": "^7.13.10", - "@radix-ui/react-compose-refs": "1.0.1" - }, + "node_modules/@radix-ui/react-tabs/node_modules/@radix-ui/primitive": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.0.tgz", + "integrity": "sha512-4Z8dn6Upk0qk4P74xBhZ6Hd/w0mPEzOOLxy4xiPXOXqjF7jZS0VAKk7/x/H6FyY2zCkYJqePf1G5KmkmNJ4RBA==", + "license": "MIT" + }, + "node_modules/@radix-ui/react-tabs/node_modules/@radix-ui/react-context": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-context/-/react-context-1.1.1.tgz", + "integrity": "sha512-UASk9zi+crv9WteK/NU4PLvOoL3OuE6BWVKNF6hPRBtYBDXQ2u5iu3O59zUlJiTVvkyuycnqrztsHVJwcK9K+Q==", + "license": "MIT", "peerDependencies": { "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0" + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "peerDependenciesMeta": { "@types/react": { @@ -1494,29 +3372,29 @@ } }, "node_modules/@radix-ui/react-tooltip": { - "version": "1.0.7", - "resolved": "https://registry.npmjs.org/@radix-ui/react-tooltip/-/react-tooltip-1.0.7.tgz", - "integrity": "sha512-lPh5iKNFVQ/jav/j6ZrWq3blfDJ0OH9R6FlNUHPMqdLuQ9vwDgFsRxvl8b7Asuy5c8xmoojHUxKHQSOAvMHxyw==", + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/@radix-ui/react-tooltip/-/react-tooltip-1.1.3.tgz", + "integrity": "sha512-Z4w1FIS0BqVFI2c1jZvb/uDVJijJjJ2ZMuPV81oVgTZ7g3BZxobplnMVvXtFWgtozdvYJ+MFWtwkM5S2HnAong==", + "license": "MIT", "dependencies": { - "@babel/runtime": "^7.13.10", - "@radix-ui/primitive": "1.0.1", - "@radix-ui/react-compose-refs": "1.0.1", - "@radix-ui/react-context": "1.0.1", - "@radix-ui/react-dismissable-layer": "1.0.5", - "@radix-ui/react-id": "1.0.1", - "@radix-ui/react-popper": "1.1.3", - "@radix-ui/react-portal": "1.0.4", - "@radix-ui/react-presence": "1.0.1", - "@radix-ui/react-primitive": "1.0.3", - "@radix-ui/react-slot": "1.0.2", - "@radix-ui/react-use-controllable-state": "1.0.1", - "@radix-ui/react-visually-hidden": "1.0.3" + "@radix-ui/primitive": "1.1.0", + "@radix-ui/react-compose-refs": "1.1.0", + "@radix-ui/react-context": "1.1.1", + "@radix-ui/react-dismissable-layer": "1.1.1", + "@radix-ui/react-id": "1.1.0", + "@radix-ui/react-popper": "1.2.0", + "@radix-ui/react-portal": "1.1.2", + "@radix-ui/react-presence": "1.1.1", + "@radix-ui/react-primitive": "2.0.0", + "@radix-ui/react-slot": "1.1.0", + "@radix-ui/react-use-controllable-state": "1.1.0", + "@radix-ui/react-visually-hidden": "1.1.0" }, "peerDependencies": { "@types/react": "*", "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0", - "react-dom": "^16.8 || ^17.0 || ^18.0" + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "peerDependenciesMeta": { "@types/react": { @@ -1527,16 +3405,34 @@ } } }, - "node_modules/@radix-ui/react-use-callback-ref": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-callback-ref/-/react-use-callback-ref-1.0.1.tgz", - "integrity": "sha512-D94LjX4Sp0xJFVaoQOd3OO9k7tpBYNOXdVhkltUbGv2Qb9OXdrg/CpsjlZv7ia14Sylv398LswWBVVu5nqKzAQ==", - "dependencies": { - "@babel/runtime": "^7.13.10" + "node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/primitive": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.0.tgz", + "integrity": "sha512-4Z8dn6Upk0qk4P74xBhZ6Hd/w0mPEzOOLxy4xiPXOXqjF7jZS0VAKk7/x/H6FyY2zCkYJqePf1G5KmkmNJ4RBA==", + "license": "MIT" + }, + "node_modules/@radix-ui/react-tooltip/node_modules/@radix-ui/react-context": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@radix-ui/react-context/-/react-context-1.1.1.tgz", + "integrity": "sha512-UASk9zi+crv9WteK/NU4PLvOoL3OuE6BWVKNF6hPRBtYBDXQ2u5iu3O59zUlJiTVvkyuycnqrztsHVJwcK9K+Q==", + "license": "MIT", + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-use-callback-ref": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-callback-ref/-/react-use-callback-ref-1.1.0.tgz", + "integrity": "sha512-CasTfvsy+frcFkbXtSJ2Zu9JHpN8TYKxkgJGWbjiZhFivxaeW7rMeZt7QELGVLaYVfFMsKHjb7Ak0nMEe+2Vfw==", "peerDependencies": { "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0" + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "peerDependenciesMeta": { "@types/react": { @@ -1545,16 +3441,15 @@ } }, "node_modules/@radix-ui/react-use-controllable-state": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-controllable-state/-/react-use-controllable-state-1.0.1.tgz", - "integrity": "sha512-Svl5GY5FQeN758fWKrjM6Qb7asvXeiZltlT4U2gVfl8Gx5UAv2sMR0LWo8yhsIZh2oQ0eFdZ59aoOOMV7b47VA==", + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-controllable-state/-/react-use-controllable-state-1.1.0.tgz", + "integrity": "sha512-MtfMVJiSr2NjzS0Aa90NPTnvTSg6C/JLCV7ma0W6+OMV78vd8OyRpID+Ng9LxzsPbLeuBnWBA1Nq30AtBIDChw==", "dependencies": { - "@babel/runtime": "^7.13.10", - "@radix-ui/react-use-callback-ref": "1.0.1" + "@radix-ui/react-use-callback-ref": "1.1.0" }, "peerDependencies": { "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0" + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "peerDependenciesMeta": { "@types/react": { @@ -1563,16 +3458,15 @@ } }, "node_modules/@radix-ui/react-use-escape-keydown": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-escape-keydown/-/react-use-escape-keydown-1.0.3.tgz", - "integrity": "sha512-vyL82j40hcFicA+M4Ex7hVkB9vHgSse1ZWomAqV2Je3RleKGO5iM8KMOEtfoSB0PnIelMd2lATjTGMYqN5ylTg==", + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-escape-keydown/-/react-use-escape-keydown-1.1.0.tgz", + "integrity": "sha512-L7vwWlR1kTTQ3oh7g1O0CBF3YCyyTj8NmhLR+phShpyA50HCfBFKVJTpshm9PzLiKmehsrQzTYTpX9HvmC9rhw==", "dependencies": { - "@babel/runtime": "^7.13.10", - "@radix-ui/react-use-callback-ref": "1.0.1" + "@radix-ui/react-use-callback-ref": "1.1.0" }, "peerDependencies": { "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0" + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "peerDependenciesMeta": { "@types/react": { @@ -1581,15 +3475,27 @@ } }, "node_modules/@radix-ui/react-use-layout-effect": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-layout-effect/-/react-use-layout-effect-1.0.1.tgz", - "integrity": "sha512-v/5RegiJWYdoCvMnITBkNNx6bCj20fiaJnWtRkU18yITptraXjffz5Qbn05uOiQnOvi+dbkznkoaMltz1GnszQ==", - "dependencies": { - "@babel/runtime": "^7.13.10" + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-layout-effect/-/react-use-layout-effect-1.1.0.tgz", + "integrity": "sha512-+FPE0rOdziWSrH9athwI1R0HDVbWlEhd+FR+aSDk4uWGmSJ9Z54sdZVDQPZAinJhJXwfT+qnj969mCsT2gfm5w==", + "peerDependencies": { + "@types/react": "*", + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } + } + }, + "node_modules/@radix-ui/react-use-previous": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-previous/-/react-use-previous-1.1.0.tgz", + "integrity": "sha512-Z/e78qg2YFnnXcW88A4JmTtm4ADckLno6F7OXotmkQfeuCVaKuYzqAATPhVzl3delXE7CxIV8shofPn3jPc5Og==", + "license": "MIT", "peerDependencies": { "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0" + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "peerDependenciesMeta": { "@types/react": { @@ -1598,16 +3504,15 @@ } }, "node_modules/@radix-ui/react-use-rect": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-rect/-/react-use-rect-1.0.1.tgz", - "integrity": "sha512-Cq5DLuSiuYVKNU8orzJMbl15TXilTnJKUCltMVQg53BQOF1/C5toAaGrowkgksdBQ9H+SRL23g0HDmg9tvmxXw==", + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-rect/-/react-use-rect-1.1.0.tgz", + "integrity": "sha512-0Fmkebhr6PiseyZlYAOtLS+nb7jLmpqTrJyv61Pe68MKYW6OWdRE2kI70TaYY27u7H0lajqM3hSMMLFq18Z7nQ==", "dependencies": { - "@babel/runtime": "^7.13.10", - "@radix-ui/rect": "1.0.1" + "@radix-ui/rect": "1.1.0" }, "peerDependencies": { "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0" + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "peerDependenciesMeta": { "@types/react": { @@ -1616,16 +3521,15 @@ } }, "node_modules/@radix-ui/react-use-size": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/@radix-ui/react-use-size/-/react-use-size-1.0.1.tgz", - "integrity": "sha512-ibay+VqrgcaI6veAojjofPATwledXiSmX+C0KrBk/xgpX9rBzPV3OsfwlhQdUOFbh+LKQorLYT+xTXW9V8yd0g==", + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-use-size/-/react-use-size-1.1.0.tgz", + "integrity": "sha512-XW3/vWuIXHa+2Uwcc2ABSfcCledmXhhQPlGbfcRXbiUQI5Icjcg19BGCZVKKInYbvUCut/ufbbLLPFC5cbb1hw==", "dependencies": { - "@babel/runtime": "^7.13.10", - "@radix-ui/react-use-layout-effect": "1.0.1" + "@radix-ui/react-use-layout-effect": "1.1.0" }, "peerDependencies": { "@types/react": "*", - "react": "^16.8 || ^17.0 || ^18.0" + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "peerDependenciesMeta": { "@types/react": { @@ -1634,18 +3538,17 @@ } }, "node_modules/@radix-ui/react-visually-hidden": { - "version": "1.0.3", - "resolved": "https://registry.npmjs.org/@radix-ui/react-visually-hidden/-/react-visually-hidden-1.0.3.tgz", - "integrity": "sha512-D4w41yN5YRKtu464TLnByKzMDG/JlMPHtfZgQAu9v6mNakUqGUI9vUrfQKz8NK41VMm/xbZbh76NUTVtIYqOMA==", + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@radix-ui/react-visually-hidden/-/react-visually-hidden-1.1.0.tgz", + "integrity": "sha512-N8MDZqtgCgG5S3aV60INAB475osJousYpZ4cTJ2cFbMpdHS5Y6loLTH8LPtkj2QN0x93J30HT/M3qJXM0+lyeQ==", "dependencies": { - "@babel/runtime": "^7.13.10", - "@radix-ui/react-primitive": "1.0.3" + "@radix-ui/react-primitive": "2.0.0" }, "peerDependencies": { "@types/react": "*", "@types/react-dom": "*", - "react": "^16.8 || ^17.0 || ^18.0", - "react-dom": "^16.8 || ^17.0 || ^18.0" + "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", + "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "peerDependenciesMeta": { "@types/react": { @@ -1657,11 +3560,116 @@ } }, "node_modules/@radix-ui/rect": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/@radix-ui/rect/-/rect-1.0.1.tgz", - "integrity": "sha512-fyrgCaedtvMg9NK3en0pnOYJdtfwxUcNolezkNPUsoX57X8oQk+NkqcvzHXD2uKNij6GXmWU9NDru2IWjrO4BQ==", + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@radix-ui/rect/-/rect-1.1.0.tgz", + "integrity": "sha512-A9+lCBZoaMJlVKcRBz2YByCG+Cp2t6nAnMnNba+XiWxnj6r4JUFqfsgwocMBZU9LPtdxC6wB56ySYpc7LQIoJg==" + }, + "node_modules/@rollup/plugin-commonjs": { + "version": "26.0.1", + "resolved": "https://registry.npmjs.org/@rollup/plugin-commonjs/-/plugin-commonjs-26.0.1.tgz", + "integrity": "sha512-UnsKoZK6/aGIH6AdkptXhNvhaqftcjq3zZdT+LY5Ftms6JR06nADcDsYp5hTU9E2lbJUEOhdlY5J4DNTneM+jQ==", "dependencies": { - "@babel/runtime": "^7.13.10" + "@rollup/pluginutils": "^5.0.1", + "commondir": "^1.0.1", + "estree-walker": "^2.0.2", + "glob": "^10.4.1", + "is-reference": "1.2.1", + "magic-string": "^0.30.3" + }, + "engines": { + "node": ">=16.0.0 || 14 >= 14.17" + }, + "peerDependencies": { + "rollup": "^2.68.0||^3.0.0||^4.0.0" + }, + "peerDependenciesMeta": { + "rollup": { + "optional": true + } + } + }, + "node_modules/@rollup/plugin-commonjs/node_modules/brace-expansion": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", + "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==", + "dependencies": { + "balanced-match": "^1.0.0" + } + }, + "node_modules/@rollup/plugin-commonjs/node_modules/glob": { + "version": "10.4.5", + "resolved": "https://registry.npmjs.org/glob/-/glob-10.4.5.tgz", + "integrity": "sha512-7Bv8RF0k6xjo7d4A/PxYLbUCfb6c+Vpd2/mB2yRDlew7Jb5hEXiCD9ibfO7wpk8i4sevK6DFny9h7EYbM3/sHg==", + "dependencies": { + "foreground-child": "^3.1.0", + "jackspeak": "^3.1.2", + "minimatch": "^9.0.4", + "minipass": "^7.1.2", + "package-json-from-dist": "^1.0.0", + "path-scurry": "^1.11.1" + }, + "bin": { + "glob": "dist/esm/bin.mjs" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/@rollup/plugin-commonjs/node_modules/jackspeak": { + "version": "3.4.3", + "resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-3.4.3.tgz", + "integrity": "sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw==", + "dependencies": { + "@isaacs/cliui": "^8.0.2" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + }, + "optionalDependencies": { + "@pkgjs/parseargs": "^0.11.0" + } + }, + "node_modules/@rollup/plugin-commonjs/node_modules/minimatch": { + "version": "9.0.5", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz", + "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==", + "dependencies": { + "brace-expansion": "^2.0.1" + }, + "engines": { + "node": ">=16 || 14 >=14.17" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/@rollup/plugin-commonjs/node_modules/minipass": { + "version": "7.1.2", + "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.2.tgz", + "integrity": "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==", + "engines": { + "node": ">=16 || 14 >=14.17" + } + }, + "node_modules/@rollup/pluginutils": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/@rollup/pluginutils/-/pluginutils-5.1.2.tgz", + "integrity": "sha512-/FIdS3PyZ39bjZlwqFnWqCOVnW7o963LtKMwQOD0NhQqw22gSr2YY1afu3FxRip4ZCZNsD5jq6Aaz6QV3D/Njw==", + "dependencies": { + "@types/estree": "^1.0.0", + "estree-walker": "^2.0.2", + "picomatch": "^2.3.1" + }, + "engines": { + "node": ">=14.0.0" + }, + "peerDependencies": { + "rollup": "^1.20.0||^2.0.0||^3.0.0||^4.0.0" + }, + "peerDependenciesMeta": { + "rollup": { + "optional": true + } } }, "node_modules/@rushstack/eslint-patch": { @@ -1670,17 +3678,409 @@ "integrity": "sha512-qC/xYId4NMebE6w/V33Fh9gWxLgURiNYgVNObbJl2LZv0GUUItCcCqC5axQSwRaAgaxl2mELq1rMzlswaQ0Zxg==", "dev": true }, + "node_modules/@sentry-internal/browser-utils": { + "version": "8.34.0", + "resolved": "https://registry.npmjs.org/@sentry-internal/browser-utils/-/browser-utils-8.34.0.tgz", + "integrity": "sha512-4AcYOzPzD1tL5eSRQ/GpKv5enquZf4dMVUez99/Bh3va8qiJrNP55AcM7UzZ7WZLTqKygIYruJTU5Zu2SpEAPQ==", + "dependencies": { + "@sentry/core": "8.34.0", + "@sentry/types": "8.34.0", + "@sentry/utils": "8.34.0" + }, + "engines": { + "node": ">=14.18" + } + }, + "node_modules/@sentry-internal/feedback": { + "version": "8.34.0", + "resolved": "https://registry.npmjs.org/@sentry-internal/feedback/-/feedback-8.34.0.tgz", + "integrity": "sha512-aYSM2KPUs0FLPxxbJCFSwCYG70VMzlT04xepD1Y/tTlPPOja/02tSv2tyOdZbv8Uw7xslZs3/8Lhj74oYcTBxw==", + "dependencies": { + "@sentry/core": "8.34.0", + "@sentry/types": "8.34.0", + "@sentry/utils": "8.34.0" + }, + "engines": { + "node": ">=14.18" + } + }, + "node_modules/@sentry-internal/replay": { + "version": "8.34.0", + "resolved": "https://registry.npmjs.org/@sentry-internal/replay/-/replay-8.34.0.tgz", + "integrity": "sha512-EoMh9NYljNewZK1quY23YILgtNdGgrkzJ9TPsj6jXUG0LZ0Q7N7eFWd0xOEDBvFxrmI3cSXF1i4d1sBb+eyKRw==", + "dependencies": { + "@sentry-internal/browser-utils": "8.34.0", + "@sentry/core": "8.34.0", + "@sentry/types": "8.34.0", + "@sentry/utils": "8.34.0" + }, + "engines": { + "node": ">=14.18" + } + }, + "node_modules/@sentry-internal/replay-canvas": { + "version": "8.34.0", + "resolved": "https://registry.npmjs.org/@sentry-internal/replay-canvas/-/replay-canvas-8.34.0.tgz", + "integrity": "sha512-x8KhZcCDpbKHqFOykYXiamX6x0LRxv6N1OJHoH+XCrMtiDBZr4Yo30d/MaS6rjmKGMtSRij30v+Uq+YWIgxUrg==", + "dependencies": { + "@sentry-internal/replay": "8.34.0", + "@sentry/core": "8.34.0", + "@sentry/types": "8.34.0", + "@sentry/utils": "8.34.0" + }, + "engines": { + "node": ">=14.18" + } + }, + "node_modules/@sentry/babel-plugin-component-annotate": { + "version": "2.22.3", + "resolved": "https://registry.npmjs.org/@sentry/babel-plugin-component-annotate/-/babel-plugin-component-annotate-2.22.3.tgz", + "integrity": "sha512-OlHA+i+vnQHRIdry4glpiS/xTOtgjmpXOt6IBOUqynx5Jd/iK1+fj+t8CckqOx9wRacO/hru2wfW/jFq0iViLg==", + "engines": { + "node": ">= 14" + } + }, + "node_modules/@sentry/browser": { + "version": "8.34.0", + "resolved": "https://registry.npmjs.org/@sentry/browser/-/browser-8.34.0.tgz", + "integrity": "sha512-3HHG2NXxzHq1lVmDy2uRjYjGNf9NsJsTPlOC70vbQdOb+S49EdH/XMPy+J3ruIoyv6Cu0LwvA6bMOM6rHZOgNQ==", + "dependencies": { + "@sentry-internal/browser-utils": "8.34.0", + "@sentry-internal/feedback": "8.34.0", + "@sentry-internal/replay": "8.34.0", + "@sentry-internal/replay-canvas": "8.34.0", + "@sentry/core": "8.34.0", + "@sentry/types": "8.34.0", + "@sentry/utils": "8.34.0" + }, + "engines": { + "node": ">=14.18" + } + }, + "node_modules/@sentry/bundler-plugin-core": { + "version": "2.22.3", + "resolved": "https://registry.npmjs.org/@sentry/bundler-plugin-core/-/bundler-plugin-core-2.22.3.tgz", + "integrity": "sha512-DeoUl0WffcqZZRl5Wy9aHvX4WfZbbWt0QbJ7NJrcEViq+dRAI2FQTYECFLwdZi5Gtb3oyqZICO+P7k8wDnzsjQ==", + "dependencies": { + "@babel/core": "^7.18.5", + "@sentry/babel-plugin-component-annotate": "2.22.3", + "@sentry/cli": "^2.33.1", + "dotenv": "^16.3.1", + "find-up": "^5.0.0", + "glob": "^9.3.2", + "magic-string": "0.30.8", + "unplugin": "1.0.1" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/@sentry/bundler-plugin-core/node_modules/brace-expansion": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", + "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==", + "dependencies": { + "balanced-match": "^1.0.0" + } + }, + "node_modules/@sentry/bundler-plugin-core/node_modules/glob": { + "version": "9.3.5", + "resolved": "https://registry.npmjs.org/glob/-/glob-9.3.5.tgz", + "integrity": "sha512-e1LleDykUz2Iu+MTYdkSsuWX8lvAjAcs0Xef0lNIu0S2wOAzuTxCJtcd9S3cijlwYF18EsU3rzb8jPVobxDh9Q==", + "dependencies": { + "fs.realpath": "^1.0.0", + "minimatch": "^8.0.2", + "minipass": "^4.2.4", + "path-scurry": "^1.6.1" + }, + "engines": { + "node": ">=16 || 14 >=14.17" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/@sentry/bundler-plugin-core/node_modules/magic-string": { + "version": "0.30.8", + "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.8.tgz", + "integrity": "sha512-ISQTe55T2ao7XtlAStud6qwYPZjE4GK1S/BeVPus4jrq6JuOnQ00YKQC581RWhR122W7msZV263KzVeLoqidyQ==", + "dependencies": { + "@jridgewell/sourcemap-codec": "^1.4.15" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/@sentry/bundler-plugin-core/node_modules/minimatch": { + "version": "8.0.4", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-8.0.4.tgz", + "integrity": "sha512-W0Wvr9HyFXZRGIDgCicunpQ299OKXs9RgZfaukz4qAW/pJhcpUfupc9c+OObPOFueNy8VSrZgEmDtk6Kh4WzDA==", + "dependencies": { + "brace-expansion": "^2.0.1" + }, + "engines": { + "node": ">=16 || 14 >=14.17" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/@sentry/bundler-plugin-core/node_modules/minipass": { + "version": "4.2.8", + "resolved": "https://registry.npmjs.org/minipass/-/minipass-4.2.8.tgz", + "integrity": "sha512-fNzuVyifolSLFL4NzpF+wEF4qrgqaaKX0haXPQEdQ7NKAN+WecoKMHV09YcuL/DHxrUsYQOK3MiuDf7Ip2OXfQ==", + "engines": { + "node": ">=8" + } + }, + "node_modules/@sentry/cli": { + "version": "2.37.0", + "resolved": "https://registry.npmjs.org/@sentry/cli/-/cli-2.37.0.tgz", + "integrity": "sha512-fM3V4gZRJR/s8lafc3O07hhOYRnvkySdPkvL/0e0XW0r+xRwqIAgQ5ECbsZO16A5weUiXVSf03ztDL1FcmbJCQ==", + "hasInstallScript": true, + "dependencies": { + "https-proxy-agent": "^5.0.0", + "node-fetch": "^2.6.7", + "progress": "^2.0.3", + "proxy-from-env": "^1.1.0", + "which": "^2.0.2" + }, + "bin": { + "sentry-cli": "bin/sentry-cli" + }, + "engines": { + "node": ">= 10" + }, + "optionalDependencies": { + "@sentry/cli-darwin": "2.37.0", + "@sentry/cli-linux-arm": "2.37.0", + "@sentry/cli-linux-arm64": "2.37.0", + "@sentry/cli-linux-i686": "2.37.0", + "@sentry/cli-linux-x64": "2.37.0", + "@sentry/cli-win32-i686": "2.37.0", + "@sentry/cli-win32-x64": "2.37.0" + } + }, + "node_modules/@sentry/cli-darwin": { + "version": "2.37.0", + "resolved": "https://registry.npmjs.org/@sentry/cli-darwin/-/cli-darwin-2.37.0.tgz", + "integrity": "sha512-CsusyMvO0eCPSN7H+sKHXS1pf637PWbS4rZak/7giz/z31/6qiXmeMlcL3f9lLZKtFPJmXVFO9uprn1wbBVF8A==", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">=10" + } + }, + "node_modules/@sentry/core": { + "version": "8.34.0", + "resolved": "https://registry.npmjs.org/@sentry/core/-/core-8.34.0.tgz", + "integrity": "sha512-adrXCTK/zsg5pJ67lgtZqdqHvyx6etMjQW3P82NgWdj83c8fb+zH+K79Z47pD4zQjX0ou2Ws5nwwi4wJbz4bfA==", + "dependencies": { + "@sentry/types": "8.34.0", + "@sentry/utils": "8.34.0" + }, + "engines": { + "node": ">=14.18" + } + }, + "node_modules/@sentry/nextjs": { + "version": "8.34.0", + "resolved": "https://registry.npmjs.org/@sentry/nextjs/-/nextjs-8.34.0.tgz", + "integrity": "sha512-REHE3E21Mnm92B3BfJz3GTMsaZM8vaDJAe7RlAMDltESRECv+ELJ5qVRLgAp8Bd6w4mG8IRNINmK2UwHrAIi9g==", + "dependencies": { + "@opentelemetry/instrumentation-http": "0.53.0", + "@opentelemetry/semantic-conventions": "^1.27.0", + "@rollup/plugin-commonjs": "26.0.1", + "@sentry-internal/browser-utils": "8.34.0", + "@sentry/core": "8.34.0", + "@sentry/node": "8.34.0", + "@sentry/opentelemetry": "8.34.0", + "@sentry/react": "8.34.0", + "@sentry/types": "8.34.0", + "@sentry/utils": "8.34.0", + "@sentry/vercel-edge": "8.34.0", + "@sentry/webpack-plugin": "2.22.3", + "chalk": "3.0.0", + "resolve": "1.22.8", + "rollup": "3.29.5", + "stacktrace-parser": "^0.1.10" + }, + "engines": { + "node": ">=14.18" + }, + "peerDependencies": { + "next": "^13.2.0 || ^14.0 || ^15.0.0-rc.0", + "webpack": ">=5.0.0" + }, + "peerDependenciesMeta": { + "webpack": { + "optional": true + } + } + }, + "node_modules/@sentry/nextjs/node_modules/@sentry/react": { + "version": "8.34.0", + "resolved": "https://registry.npmjs.org/@sentry/react/-/react-8.34.0.tgz", + "integrity": "sha512-gIgzhj7h67C+Sdq2ul4fOSK142Gf0uV99bqHRdtIiUlXw9yjzZQY5TKTtzbOaevn7qBJ0xrRKtIRUbOBMl0clw==", + "dependencies": { + "@sentry/browser": "8.34.0", + "@sentry/core": "8.34.0", + "@sentry/types": "8.34.0", + "@sentry/utils": "8.34.0", + "hoist-non-react-statics": "^3.3.2" + }, + "engines": { + "node": ">=14.18" + }, + "peerDependencies": { + "react": "^16.14.0 || 17.x || 18.x || 19.x" + } + }, + "node_modules/@sentry/nextjs/node_modules/chalk": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-3.0.0.tgz", + "integrity": "sha512-4D3B6Wf41KOYRFdszmDqMCGq5VV/uMAB273JILmO+3jAlh8X4qDtdtgCR3fxtbLEMzSx22QdhnDcJvu2u1fVwg==", + "dependencies": { + "ansi-styles": "^4.1.0", + "supports-color": "^7.1.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/@sentry/node": { + "version": "8.34.0", + "resolved": "https://registry.npmjs.org/@sentry/node/-/node-8.34.0.tgz", + "integrity": "sha512-Q7BPp7Y8yCcwD620xoziWSOuPi/PCIdttkczvB0BGzBRYh2s702h+qNusRijRpVNZmzmYOo9m1x7Y1O/b8/v2A==", + "dependencies": { + "@opentelemetry/api": "^1.9.0", + "@opentelemetry/context-async-hooks": "^1.25.1", + "@opentelemetry/core": "^1.25.1", + "@opentelemetry/instrumentation": "^0.53.0", + "@opentelemetry/instrumentation-amqplib": "^0.42.0", + "@opentelemetry/instrumentation-connect": "0.39.0", + "@opentelemetry/instrumentation-dataloader": "0.12.0", + "@opentelemetry/instrumentation-express": "0.42.0", + "@opentelemetry/instrumentation-fastify": "0.39.0", + "@opentelemetry/instrumentation-fs": "0.15.0", + "@opentelemetry/instrumentation-generic-pool": "0.39.0", + "@opentelemetry/instrumentation-graphql": "0.43.0", + "@opentelemetry/instrumentation-hapi": "0.41.0", + "@opentelemetry/instrumentation-http": "0.53.0", + "@opentelemetry/instrumentation-ioredis": "0.43.0", + "@opentelemetry/instrumentation-kafkajs": "0.3.0", + "@opentelemetry/instrumentation-koa": "0.43.0", + "@opentelemetry/instrumentation-lru-memoizer": "0.40.0", + "@opentelemetry/instrumentation-mongodb": "0.47.0", + "@opentelemetry/instrumentation-mongoose": "0.42.0", + "@opentelemetry/instrumentation-mysql": "0.41.0", + "@opentelemetry/instrumentation-mysql2": "0.41.0", + "@opentelemetry/instrumentation-nestjs-core": "0.40.0", + "@opentelemetry/instrumentation-pg": "0.44.0", + "@opentelemetry/instrumentation-redis-4": "0.42.0", + "@opentelemetry/instrumentation-undici": "0.6.0", + "@opentelemetry/resources": "^1.26.0", + "@opentelemetry/sdk-trace-base": "^1.26.0", + "@opentelemetry/semantic-conventions": "^1.27.0", + "@prisma/instrumentation": "5.19.1", + "@sentry/core": "8.34.0", + "@sentry/opentelemetry": "8.34.0", + "@sentry/types": "8.34.0", + "@sentry/utils": "8.34.0", + "import-in-the-middle": "^1.11.0" + }, + "engines": { + "node": ">=14.18" + } + }, + "node_modules/@sentry/opentelemetry": { + "version": "8.34.0", + "resolved": "https://registry.npmjs.org/@sentry/opentelemetry/-/opentelemetry-8.34.0.tgz", + "integrity": "sha512-WS91L+HVKGVIzOgt0szGp+24iKOs86BZsAHGt0HWnMR4kqWP6Ak+TLvqWDCxnuzniZMxdewDGA8p5hrBAPsmsA==", + "dependencies": { + "@sentry/core": "8.34.0", + "@sentry/types": "8.34.0", + "@sentry/utils": "8.34.0" + }, + "engines": { + "node": ">=14.18" + }, + "peerDependencies": { + "@opentelemetry/api": "^1.9.0", + "@opentelemetry/core": "^1.25.1", + "@opentelemetry/instrumentation": "^0.53.0", + "@opentelemetry/sdk-trace-base": "^1.26.0", + "@opentelemetry/semantic-conventions": "^1.27.0" + } + }, + "node_modules/@sentry/types": { + "version": "8.34.0", + "resolved": "https://registry.npmjs.org/@sentry/types/-/types-8.34.0.tgz", + "integrity": "sha512-zLRc60CzohGCo6zNsNeQ9JF3SiEeRE4aDCP9fDDdIVCOKovS+mn1rtSip0qd0Vp2fidOu0+2yY0ALCz1A3PJSQ==", + "engines": { + "node": ">=14.18" + } + }, + "node_modules/@sentry/utils": { + "version": "8.34.0", + "resolved": "https://registry.npmjs.org/@sentry/utils/-/utils-8.34.0.tgz", + "integrity": "sha512-W1KoRlFUjprlh3t86DZPFxLfM6mzjRzshVfMY7vRlJFymBelJsnJ3A1lPeBZM9nCraOSiw6GtOWu6k5BAkiGIg==", + "dependencies": { + "@sentry/types": "8.34.0" + }, + "engines": { + "node": ">=14.18" + } + }, + "node_modules/@sentry/vercel-edge": { + "version": "8.34.0", + "resolved": "https://registry.npmjs.org/@sentry/vercel-edge/-/vercel-edge-8.34.0.tgz", + "integrity": "sha512-yF6043FcVO9GqPawCJZp0psEL8iF9+5bOlAdQydCyaj2BtDgFvAeBVI19qlDeAHhqsXNfTD0JsIox2aJPNupwg==", + "dependencies": { + "@sentry/core": "8.34.0", + "@sentry/types": "8.34.0", + "@sentry/utils": "8.34.0" + }, + "engines": { + "node": ">=14.18" + } + }, + "node_modules/@sentry/webpack-plugin": { + "version": "2.22.3", + "resolved": "https://registry.npmjs.org/@sentry/webpack-plugin/-/webpack-plugin-2.22.3.tgz", + "integrity": "sha512-Sq1S6bL3nuoTP5typkj+HPjQ13dqftIE8kACAq4tKkXOpWO9bf6HtqcruEQCxMekbWDTdljsrknQ17ZBx2q66Q==", + "dependencies": { + "@sentry/bundler-plugin-core": "2.22.3", + "unplugin": "1.0.1", + "uuid": "^9.0.0" + }, + "engines": { + "node": ">= 14" + }, + "peerDependencies": { + "webpack": ">=4.40.0" + } + }, + "node_modules/@stripe/stripe-js": { + "version": "4.6.0", + "resolved": "https://registry.npmjs.org/@stripe/stripe-js/-/stripe-js-4.6.0.tgz", + "integrity": "sha512-ZoK0dMFnVH0J5XUWGqsta8S8xm980qEwJKAIgZcLQxaSsbGRB9CsVvfOjwQFE1JC1q3rPwb/b+gQAmzIESnHnA==", + "engines": { + "node": ">=12.16" + } + }, "node_modules/@swc/counter": { "version": "0.1.3", "resolved": "https://registry.npmjs.org/@swc/counter/-/counter-0.1.3.tgz", "integrity": "sha512-e2BR4lsJkkRlKZ/qCHPw9ZaSxc0MVUd7gtbtaB7aMvHeJVYe8sOB8DBZkP2DtISHGSku9sCK6T6cnY0CtXrOCQ==" }, "node_modules/@swc/helpers": { - "version": "0.5.5", - "resolved": "https://registry.npmjs.org/@swc/helpers/-/helpers-0.5.5.tgz", - "integrity": "sha512-KGYxvIOXcceOAbEk4bi/dVLEK9z8sZ0uBB3Il5b1rhfClSpcX0yfRO0KmTkqR2cnQDymwLB+25ZyMzICg/cm/A==", + "version": "0.5.13", + "resolved": "https://registry.npmjs.org/@swc/helpers/-/helpers-0.5.13.tgz", + "integrity": "sha512-UoKGxQ3r5kYI9dALKJapMmuK+1zWM/H17Z1+iwnNmzcJRnfFuevZs375TA5rW31pu4BS4NoSy1fRsexDXfWn5w==", "dependencies": { - "@swc/counter": "^0.1.3", "tslib": "^2.4.0" } }, @@ -1699,48 +4099,21 @@ "tailwindcss": ">=3.0.0 || insiders" } }, - "node_modules/@tanstack/react-virtual": { - "version": "3.5.0", - "resolved": "https://registry.npmjs.org/@tanstack/react-virtual/-/react-virtual-3.5.0.tgz", - "integrity": "sha512-rtvo7KwuIvqK9zb0VZ5IL7fiJAEnG+0EiFZz8FUOs+2mhGqdGmjKIaT1XU7Zq0eFqL0jonLlhbayJI/J2SA/Bw==", - "dependencies": { - "@tanstack/virtual-core": "3.5.0" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/tannerlinsley" - }, - "peerDependencies": { - "react": "^16.8.0 || ^17.0.0 || ^18.0.0", - "react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0" - } - }, "node_modules/@tanstack/virtual-core": { - "version": "3.5.0", - "resolved": "https://registry.npmjs.org/@tanstack/virtual-core/-/virtual-core-3.5.0.tgz", - "integrity": "sha512-KnPRCkQTyqhanNC0K63GBG3wA8I+D1fQuVnAvcBF8f13akOKeQp1gSbu6f77zCxhEk727iV5oQnbHLYzHrECLg==", + "version": "3.10.8", + "resolved": "https://registry.npmjs.org/@tanstack/virtual-core/-/virtual-core-3.10.8.tgz", + "integrity": "sha512-PBu00mtt95jbKFi6Llk9aik8bnR3tR/oQP1o3TSi+iG//+Q2RTIzCEgKkHG8BB86kxMNW6O8wku+Lmi+QFR6jA==", "funding": { "type": "github", "url": "https://github.com/sponsors/tannerlinsley" } }, - "node_modules/@tremor/react": { - "version": "3.16.3", - "resolved": "https://registry.npmjs.org/@tremor/react/-/react-3.16.3.tgz", - "integrity": "sha512-XiufPz4RRdrHrhwL7Cfcd9XoUEPyN/Q4jwj3kw1OQmFD1sYMCS2pAzzSP62k7zq02Z0QIPBuVK5p7/KQ+h4esQ==", + "node_modules/@types/connect": { + "version": "3.4.36", + "resolved": "https://registry.npmjs.org/@types/connect/-/connect-3.4.36.tgz", + "integrity": "sha512-P63Zd/JUGq+PdrM1lv0Wv5SBYeA2+CORvbrXbngriYY0jzLUWfQMQQxOhjONEz/wlHOAxOdY7CY65rgQdTjq2w==", "dependencies": { - "@floating-ui/react": "^0.19.2", - "@headlessui/react": "^1.7.19", - "@headlessui/tailwindcss": "^0.2.0", - "date-fns": "^3.6.0", - "react-day-picker": "^8.10.1", - "react-transition-state": "^2.1.1", - "recharts": "^2.12.7", - "tailwind-merge": "^1.14.0" - }, - "peerDependencies": { - "react": "^18.0.0", - "react-dom": ">=16.6.0" + "@types/node": "*" } }, "node_modules/@types/d3-array": { @@ -1840,6 +4213,12 @@ "resolved": "https://registry.npmjs.org/@types/js-cookie/-/js-cookie-3.0.6.tgz", "integrity": "sha512-wkw9yd1kEXOPnvEeEV1Go1MmxtBJL0RR79aOTAApecWFVu7w0NNXNqhcWgvw2YgZDYadliXkl14pa3WXw5jlCQ==" }, + "node_modules/@types/json-schema": { + "version": "7.0.15", + "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz", + "integrity": "sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA==", + "peer": true + }, "node_modules/@types/json5": { "version": "0.0.29", "resolved": "https://registry.npmjs.org/@types/json5/-/json5-0.0.29.tgz", @@ -1864,6 +4243,14 @@ "resolved": "https://registry.npmjs.org/@types/ms/-/ms-0.7.34.tgz", "integrity": "sha512-nG96G3Wp6acyAgJqGasjODb+acrI7KltPiRxzHPXnP3NgI28bpQDRv53olbqGXbfcgF5aiiHmO3xpwEpS5Ld9g==" }, + "node_modules/@types/mysql": { + "version": "2.15.26", + "resolved": "https://registry.npmjs.org/@types/mysql/-/mysql-2.15.26.tgz", + "integrity": "sha512-DSLCOXhkvfS5WNNPbfn2KdICAmk8lLc+/PNvnPnF7gOdMZCxopXduqv0OQ13y/yA/zXTSikZZqVgybUxOEg6YQ==", + "dependencies": { + "@types/node": "*" + } + }, "node_modules/@types/node": { "version": "18.15.11", "resolved": "https://registry.npmjs.org/@types/node/-/node-18.15.11.tgz", @@ -1874,6 +4261,24 @@ "resolved": "https://registry.npmjs.org/@types/parse-json/-/parse-json-4.0.2.tgz", "integrity": "sha512-dISoDXWWQwUquiKsyZ4Ng+HX2KsPL7LyHKHQwgGFEA3IaKac4Obd+h2a/a6waisAoepJlBcx9paWqjA8/HVjCw==" }, + "node_modules/@types/pg": { + "version": "8.6.1", + "resolved": "https://registry.npmjs.org/@types/pg/-/pg-8.6.1.tgz", + "integrity": "sha512-1Kc4oAGzAl7uqUStZCDvaLFqZrW9qWSjXOmBfdgyBP5La7Us6Mg4GBvRlSoaZMhQF/zSj1C8CtKMBkoiT8eL8w==", + "dependencies": { + "@types/node": "*", + "pg-protocol": "*", + "pg-types": "^2.2.0" + } + }, + "node_modules/@types/pg-pool": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/@types/pg-pool/-/pg-pool-2.0.6.tgz", + "integrity": "sha512-TaAUE5rq2VQYxab5Ts7WZhKNmuN78Q6PiFonTDdpbx8a1H0M1vhy3rhiMjl+e2iHmogyMw7jZF4FrE6eJUy5HQ==", + "dependencies": { + "@types/pg": "*" + } + }, "node_modules/@types/prismjs": { "version": "1.26.4", "resolved": "https://registry.npmjs.org/@types/prismjs/-/prismjs-1.26.4.tgz", @@ -1915,6 +4320,11 @@ "resolved": "https://registry.npmjs.org/@types/scheduler/-/scheduler-0.23.0.tgz", "integrity": "sha512-YIoDCTH3Af6XM5VuwGG/QL/CJqga1Zm3NkU3HZ4ZHK2fRMPYP1VczsTUqtsf43PH/iJNVlPHAo2oWX7BSdB2Hw==" }, + "node_modules/@types/shimmer": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/@types/shimmer/-/shimmer-1.2.0.tgz", + "integrity": "sha512-UE7oxhQLLd9gub6JKIAhDq06T0F6FnztwMNRvYgjeQSBeMc1ZG/tA47EwfduvkuQS8apbkM/lpLpWsaCeYsXVg==" + }, "node_modules/@types/unist": { "version": "3.0.2", "resolved": "https://registry.npmjs.org/@types/unist/-/unist-3.0.2.tgz", @@ -2057,11 +4467,168 @@ "resolved": "https://registry.npmjs.org/@ungap/structured-clone/-/structured-clone-1.2.0.tgz", "integrity": "sha512-zuVdFrMJiuCDQUMCzQaD6KL28MjnqqN8XnAqiEq9PNm/hCPTSGfrXCOfwj1ow4LFb/tNymJPwsNbVePc1xFqrQ==" }, + "node_modules/@webassemblyjs/ast": { + "version": "1.12.1", + "resolved": "https://registry.npmjs.org/@webassemblyjs/ast/-/ast-1.12.1.tgz", + "integrity": "sha512-EKfMUOPRRUTy5UII4qJDGPpqfwjOmZ5jeGFwid9mnoqIFK+e0vqoi1qH56JpmZSzEL53jKnNzScdmftJyG5xWg==", + "peer": true, + "dependencies": { + "@webassemblyjs/helper-numbers": "1.11.6", + "@webassemblyjs/helper-wasm-bytecode": "1.11.6" + } + }, + "node_modules/@webassemblyjs/floating-point-hex-parser": { + "version": "1.11.6", + "resolved": "https://registry.npmjs.org/@webassemblyjs/floating-point-hex-parser/-/floating-point-hex-parser-1.11.6.tgz", + "integrity": "sha512-ejAj9hfRJ2XMsNHk/v6Fu2dGS+i4UaXBXGemOfQ/JfQ6mdQg/WXtwleQRLLS4OvfDhv8rYnVwH27YJLMyYsxhw==", + "peer": true + }, + "node_modules/@webassemblyjs/helper-api-error": { + "version": "1.11.6", + "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-api-error/-/helper-api-error-1.11.6.tgz", + "integrity": "sha512-o0YkoP4pVu4rN8aTJgAyj9hC2Sv5UlkzCHhxqWj8butaLvnpdc2jOwh4ewE6CX0txSfLn/UYaV/pheS2Txg//Q==", + "peer": true + }, + "node_modules/@webassemblyjs/helper-buffer": { + "version": "1.12.1", + "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-buffer/-/helper-buffer-1.12.1.tgz", + "integrity": "sha512-nzJwQw99DNDKr9BVCOZcLuJJUlqkJh+kVzVl6Fmq/tI5ZtEyWT1KZMyOXltXLZJmDtvLCDgwsyrkohEtopTXCw==", + "peer": true + }, + "node_modules/@webassemblyjs/helper-numbers": { + "version": "1.11.6", + "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-numbers/-/helper-numbers-1.11.6.tgz", + "integrity": "sha512-vUIhZ8LZoIWHBohiEObxVm6hwP034jwmc9kuq5GdHZH0wiLVLIPcMCdpJzG4C11cHoQ25TFIQj9kaVADVX7N3g==", + "peer": true, + "dependencies": { + "@webassemblyjs/floating-point-hex-parser": "1.11.6", + "@webassemblyjs/helper-api-error": "1.11.6", + "@xtuc/long": "4.2.2" + } + }, + "node_modules/@webassemblyjs/helper-wasm-bytecode": { + "version": "1.11.6", + "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-wasm-bytecode/-/helper-wasm-bytecode-1.11.6.tgz", + "integrity": "sha512-sFFHKwcmBprO9e7Icf0+gddyWYDViL8bpPjJJl0WHxCdETktXdmtWLGVzoHbqUcY4Be1LkNfwTmXOJUFZYSJdA==", + "peer": true + }, + "node_modules/@webassemblyjs/helper-wasm-section": { + "version": "1.12.1", + "resolved": "https://registry.npmjs.org/@webassemblyjs/helper-wasm-section/-/helper-wasm-section-1.12.1.tgz", + "integrity": "sha512-Jif4vfB6FJlUlSbgEMHUyk1j234GTNG9dBJ4XJdOySoj518Xj0oGsNi59cUQF4RRMS9ouBUxDDdyBVfPTypa5g==", + "peer": true, + "dependencies": { + "@webassemblyjs/ast": "1.12.1", + "@webassemblyjs/helper-buffer": "1.12.1", + "@webassemblyjs/helper-wasm-bytecode": "1.11.6", + "@webassemblyjs/wasm-gen": "1.12.1" + } + }, + "node_modules/@webassemblyjs/ieee754": { + "version": "1.11.6", + "resolved": "https://registry.npmjs.org/@webassemblyjs/ieee754/-/ieee754-1.11.6.tgz", + "integrity": "sha512-LM4p2csPNvbij6U1f19v6WR56QZ8JcHg3QIJTlSwzFcmx6WSORicYj6I63f9yU1kEUtrpG+kjkiIAkevHpDXrg==", + "peer": true, + "dependencies": { + "@xtuc/ieee754": "^1.2.0" + } + }, + "node_modules/@webassemblyjs/leb128": { + "version": "1.11.6", + "resolved": "https://registry.npmjs.org/@webassemblyjs/leb128/-/leb128-1.11.6.tgz", + "integrity": "sha512-m7a0FhE67DQXgouf1tbN5XQcdWoNgaAuoULHIfGFIEVKA6tu/edls6XnIlkmS6FrXAquJRPni3ZZKjw6FSPjPQ==", + "peer": true, + "dependencies": { + "@xtuc/long": "4.2.2" + } + }, + "node_modules/@webassemblyjs/utf8": { + "version": "1.11.6", + "resolved": "https://registry.npmjs.org/@webassemblyjs/utf8/-/utf8-1.11.6.tgz", + "integrity": "sha512-vtXf2wTQ3+up9Zsg8sa2yWiQpzSsMyXj0qViVP6xKGCUT8p8YJ6HqI7l5eCnWx1T/FYdsv07HQs2wTFbbof/RA==", + "peer": true + }, + "node_modules/@webassemblyjs/wasm-edit": { + "version": "1.12.1", + "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-edit/-/wasm-edit-1.12.1.tgz", + "integrity": "sha512-1DuwbVvADvS5mGnXbE+c9NfA8QRcZ6iKquqjjmR10k6o+zzsRVesil54DKexiowcFCPdr/Q0qaMgB01+SQ1u6g==", + "peer": true, + "dependencies": { + "@webassemblyjs/ast": "1.12.1", + "@webassemblyjs/helper-buffer": "1.12.1", + "@webassemblyjs/helper-wasm-bytecode": "1.11.6", + "@webassemblyjs/helper-wasm-section": "1.12.1", + "@webassemblyjs/wasm-gen": "1.12.1", + "@webassemblyjs/wasm-opt": "1.12.1", + "@webassemblyjs/wasm-parser": "1.12.1", + "@webassemblyjs/wast-printer": "1.12.1" + } + }, + "node_modules/@webassemblyjs/wasm-gen": { + "version": "1.12.1", + "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-gen/-/wasm-gen-1.12.1.tgz", + "integrity": "sha512-TDq4Ojh9fcohAw6OIMXqiIcTq5KUXTGRkVxbSo1hQnSy6lAM5GSdfwWeSxpAo0YzgsgF182E/U0mDNhuA0tW7w==", + "peer": true, + "dependencies": { + "@webassemblyjs/ast": "1.12.1", + "@webassemblyjs/helper-wasm-bytecode": "1.11.6", + "@webassemblyjs/ieee754": "1.11.6", + "@webassemblyjs/leb128": "1.11.6", + "@webassemblyjs/utf8": "1.11.6" + } + }, + "node_modules/@webassemblyjs/wasm-opt": { + "version": "1.12.1", + "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-opt/-/wasm-opt-1.12.1.tgz", + "integrity": "sha512-Jg99j/2gG2iaz3hijw857AVYekZe2SAskcqlWIZXjji5WStnOpVoat3gQfT/Q5tb2djnCjBtMocY/Su1GfxPBg==", + "peer": true, + "dependencies": { + "@webassemblyjs/ast": "1.12.1", + "@webassemblyjs/helper-buffer": "1.12.1", + "@webassemblyjs/wasm-gen": "1.12.1", + "@webassemblyjs/wasm-parser": "1.12.1" + } + }, + "node_modules/@webassemblyjs/wasm-parser": { + "version": "1.12.1", + "resolved": "https://registry.npmjs.org/@webassemblyjs/wasm-parser/-/wasm-parser-1.12.1.tgz", + "integrity": "sha512-xikIi7c2FHXysxXe3COrVUPSheuBtpcfhbpFj4gmu7KRLYOzANztwUU0IbsqvMqzuNK2+glRGWCEqZo1WCLyAQ==", + "peer": true, + "dependencies": { + "@webassemblyjs/ast": "1.12.1", + "@webassemblyjs/helper-api-error": "1.11.6", + "@webassemblyjs/helper-wasm-bytecode": "1.11.6", + "@webassemblyjs/ieee754": "1.11.6", + "@webassemblyjs/leb128": "1.11.6", + "@webassemblyjs/utf8": "1.11.6" + } + }, + "node_modules/@webassemblyjs/wast-printer": { + "version": "1.12.1", + "resolved": "https://registry.npmjs.org/@webassemblyjs/wast-printer/-/wast-printer-1.12.1.tgz", + "integrity": "sha512-+X4WAlOisVWQMikjbcvY2e0rwPsKQ9F688lksZhBcPycBBuii3O7m8FACbDMWDojpAqvjIncrG8J0XHKyQfVeA==", + "peer": true, + "dependencies": { + "@webassemblyjs/ast": "1.12.1", + "@xtuc/long": "4.2.2" + } + }, + "node_modules/@xtuc/ieee754": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/@xtuc/ieee754/-/ieee754-1.2.0.tgz", + "integrity": "sha512-DX8nKgqcGwsc0eJSqYt5lwP4DH5FlHnmuWWBRy7X0NcaGR0ZtuyeESgMwTYVEtxmsNGY+qit4QYT/MIYTOTPeA==", + "peer": true + }, + "node_modules/@xtuc/long": { + "version": "4.2.2", + "resolved": "https://registry.npmjs.org/@xtuc/long/-/long-4.2.2.tgz", + "integrity": "sha512-NuHqBY1PB/D8xU6s/thBgOAiAP7HOYDQ32+BFZILJ8ivkUkAHQnWfn6WhL79Owj1qmUnoN/YPhktdIoucipkAQ==", + "peer": true + }, "node_modules/acorn": { "version": "8.11.3", "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.11.3.tgz", "integrity": "sha512-Y9rRfJG5jcKOE0CLisYbojUjIrIEE7AGMzA/Sm4BslANhbS+cDMpgBdcPT91oJ7OuJ9hYJBx59RjbhxVnrF8Xg==", - "dev": true, "bin": { "acorn": "bin/acorn" }, @@ -2069,6 +4636,14 @@ "node": ">=0.4.0" } }, + "node_modules/acorn-import-attributes": { + "version": "1.9.5", + "resolved": "https://registry.npmjs.org/acorn-import-attributes/-/acorn-import-attributes-1.9.5.tgz", + "integrity": "sha512-n02Vykv5uA3eHGM/Z2dQrcD56kL8TyDb2p1+0P83PClMnC/nc+anbQRhIOWnSq4Ke/KvDPrY3C9hDtC/A3eHnQ==", + "peerDependencies": { + "acorn": "^8" + } + }, "node_modules/acorn-jsx": { "version": "5.3.2", "resolved": "https://registry.npmjs.org/acorn-jsx/-/acorn-jsx-5.3.2.tgz", @@ -2078,11 +4653,21 @@ "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0" } }, + "node_modules/agent-base": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/agent-base/-/agent-base-6.0.2.tgz", + "integrity": "sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==", + "dependencies": { + "debug": "4" + }, + "engines": { + "node": ">= 6.0.0" + } + }, "node_modules/ajv": { "version": "6.12.6", "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", - "dev": true, "dependencies": { "fast-deep-equal": "^3.1.1", "fast-json-stable-stringify": "^2.0.0", @@ -2094,6 +4679,15 @@ "url": "https://github.com/sponsors/epoberezkin" } }, + "node_modules/ajv-keywords": { + "version": "3.5.2", + "resolved": "https://registry.npmjs.org/ajv-keywords/-/ajv-keywords-3.5.2.tgz", + "integrity": "sha512-5p6WTN0DdTGVQk6VjcEju19IgaHudalcfabD7yhDGeA6bcQnmL+CpveLJq/3hvfwd1aof6L386Ougkx6RfyMIQ==", + "peer": true, + "peerDependencies": { + "ajv": "^6.9.1" + } + }, "node_modules/ansi-regex": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", @@ -2415,11 +5009,6 @@ "dequal": "^2.0.3" } }, - "node_modules/b4a": { - "version": "1.6.6", - "resolved": "https://registry.npmjs.org/b4a/-/b4a-1.6.6.tgz", - "integrity": "sha512-5Tk1HLk6b6ctmjIkAcU/Ujv/1WqiDl0F0JdRCR80VsOcUlHcu7pWeWRlOqQLHfDEsVx9YH/aif5AG4ehoCtTmg==" - }, "node_modules/babel-plugin-macros": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/babel-plugin-macros/-/babel-plugin-macros-3.1.0.tgz", @@ -2434,21 +5023,6 @@ "npm": ">=6" } }, - "node_modules/babel-plugin-styled-components": { - "version": "2.1.4", - "resolved": "https://registry.npmjs.org/babel-plugin-styled-components/-/babel-plugin-styled-components-2.1.4.tgz", - "integrity": "sha512-Xgp9g+A/cG47sUyRwwYxGM4bR/jDRg5N6it/8+HxCnbT5XNKSKDT9xm4oag/osgqjC2It/vH0yXsomOG6k558g==", - "dependencies": { - "@babel/helper-annotate-as-pure": "^7.22.5", - "@babel/helper-module-imports": "^7.22.5", - "@babel/plugin-syntax-jsx": "^7.22.5", - "lodash": "^4.17.21", - "picomatch": "^2.3.1" - }, - "peerDependencies": { - "styled-components": ">= 2" - } - }, "node_modules/bail": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/bail/-/bail-2.0.2.tgz", @@ -2463,66 +5037,6 @@ "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==" }, - "node_modules/bare-events": { - "version": "2.2.2", - "resolved": "https://registry.npmjs.org/bare-events/-/bare-events-2.2.2.tgz", - "integrity": "sha512-h7z00dWdG0PYOQEvChhOSWvOfkIKsdZGkWr083FgN/HyoQuebSew/cgirYqh9SCuy/hRvxc5Vy6Fw8xAmYHLkQ==", - "optional": true - }, - "node_modules/bare-fs": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-2.3.0.tgz", - "integrity": "sha512-TNFqa1B4N99pds2a5NYHR15o0ZpdNKbAeKTE/+G6ED/UeOavv8RY3dr/Fu99HW3zU3pXpo2kDNO8Sjsm2esfOw==", - "optional": true, - "dependencies": { - "bare-events": "^2.0.0", - "bare-path": "^2.0.0", - "bare-stream": "^1.0.0" - } - }, - "node_modules/bare-os": { - "version": "2.3.0", - "resolved": "https://registry.npmjs.org/bare-os/-/bare-os-2.3.0.tgz", - "integrity": "sha512-oPb8oMM1xZbhRQBngTgpcQ5gXw6kjOaRsSWsIeNyRxGed2w/ARyP7ScBYpWR1qfX2E5rS3gBw6OWcSQo+s+kUg==", - "optional": true - }, - "node_modules/bare-path": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/bare-path/-/bare-path-2.1.2.tgz", - "integrity": "sha512-o7KSt4prEphWUHa3QUwCxUI00R86VdjiuxmJK0iNVDHYPGo+HsDaVCnqCmPbf/MiW1ok8F4p3m8RTHlWk8K2ig==", - "optional": true, - "dependencies": { - "bare-os": "^2.1.0" - } - }, - "node_modules/bare-stream": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-1.0.0.tgz", - "integrity": "sha512-KhNUoDL40iP4gFaLSsoGE479t0jHijfYdIcxRn/XtezA2BaUD0NRf/JGRpsMq6dMNM+SrCrB0YSSo/5wBY4rOQ==", - "optional": true, - "dependencies": { - "streamx": "^2.16.1" - } - }, - "node_modules/base64-js": { - "version": "1.5.1", - "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", - "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ] - }, "node_modules/binary-extensions": { "version": "2.3.0", "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.3.0.tgz", @@ -2534,16 +5048,6 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/bl": { - "version": "4.1.0", - "resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz", - "integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==", - "dependencies": { - "buffer": "^5.5.0", - "inherits": "^2.0.4", - "readable-stream": "^3.4.0" - } - }, "node_modules/brace-expansion": { "version": "1.1.11", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", @@ -2596,28 +5100,11 @@ "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7" } }, - "node_modules/buffer": { - "version": "5.7.1", - "resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz", - "integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "dependencies": { - "base64-js": "^1.3.1", - "ieee754": "^1.1.13" - } + "node_modules/buffer-from": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.2.tgz", + "integrity": "sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==", + "peer": true }, "node_modules/busboy": { "version": "1.6.0", @@ -2634,7 +5121,6 @@ "version": "1.0.7", "resolved": "https://registry.npmjs.org/call-bind/-/call-bind-1.0.7.tgz", "integrity": "sha512-GHTSNSYICQ7scH7sZ+M2rFopRoLh8t2bLSW6BbgrtLsahOIB5iyAVJf9GjWK3cYTDaMj4XdBpM1cA6pIS0Kv2w==", - "dev": true, "dependencies": { "es-define-property": "^1.0.0", "es-errors": "^1.3.0", @@ -2787,10 +5273,40 @@ "node": ">= 6" } }, - "node_modules/chownr": { - "version": "1.1.4", - "resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz", - "integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==" + "node_modules/chrome-trace-event": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/chrome-trace-event/-/chrome-trace-event-1.0.4.tgz", + "integrity": "sha512-rNjApaLzuwaOTjCiT8lSDdGN1APCiqkChLMJxJPWLunPAt5fy8xgU9/jNOchV84wfIxrA0lRQB7oCT8jrn/wrQ==", + "peer": true, + "engines": { + "node": ">=6.0" + } + }, + "node_modules/cjs-module-lexer": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/cjs-module-lexer/-/cjs-module-lexer-1.4.1.tgz", + "integrity": "sha512-cuSVIHi9/9E/+821Qjdvngor+xpnlwnuwIyZOaLmHBVdXL+gP+I6QQB9VkO7RI77YIcTV+S1W9AreJ5eN63JBA==" + }, + "node_modules/class-variance-authority": { + "version": "0.7.0", + "resolved": "https://registry.npmjs.org/class-variance-authority/-/class-variance-authority-0.7.0.tgz", + "integrity": "sha512-jFI8IQw4hczaL4ALINxqLEXQbWcNjoSkloa4IaufXCJr6QawJyw7tuRysRsrE8w2p/4gGaxKIt/hX3qz/IbD1A==", + "license": "Apache-2.0", + "dependencies": { + "clsx": "2.0.0" + }, + "funding": { + "url": "https://joebell.co.uk" + } + }, + "node_modules/class-variance-authority/node_modules/clsx": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/clsx/-/clsx-2.0.0.tgz", + "integrity": "sha512-rQ1+kcj+ttHG0MKVGBUXwayCCF1oh39BF5COIpRzuCEv8Mwjv0XucrI2ExNTOn9IlLifGClWQcU9BrZORvtw6Q==", + "license": "MIT", + "engines": { + "node": ">=6" + } }, "node_modules/client-only": { "version": "0.0.1", @@ -2801,6 +5317,7 @@ "version": "2.1.1", "resolved": "https://registry.npmjs.org/clsx/-/clsx-2.1.1.tgz", "integrity": "sha512-eYm0QWBtUrBWZWG0d386OGAw16Z995PiOVo2B7bjWSbHedGl5e0ZWaq65kOGgUSNesEIDkB9ISbTg/JK9dhCZA==", + "license": "MIT", "engines": { "node": ">=6" } @@ -2859,6 +5376,11 @@ "node": ">= 6" } }, + "node_modules/commondir": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/commondir/-/commondir-1.0.1.tgz", + "integrity": "sha512-W9pAhw0ja1Edb5GVdIF1mjZw/ASI0AlShXM83UUGe2DVr5TdAPEA1OA8m/g8zWp9x6On7gqufY+FatDbC3MDQg==" + }, "node_modules/concat-map": { "version": "0.0.1", "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", @@ -2868,8 +5390,17 @@ "node_modules/convert-source-map": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/convert-source-map/-/convert-source-map-2.0.0.tgz", - "integrity": "sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==", - "peer": true + "integrity": "sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==" + }, + "node_modules/core-js": { + "version": "3.38.1", + "resolved": "https://registry.npmjs.org/core-js/-/core-js-3.38.1.tgz", + "integrity": "sha512-OP35aUorbU3Zvlx7pjsFdu1rGNnD4pgw/CWoYzRY3t2EzoVT7shKHY1dlAy3f41cGIO7ZDPQimhGFTlEYkG/Hw==", + "hasInstallScript": true, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/core-js" + } }, "node_modules/cosmiconfig": { "version": "7.1.0", @@ -3112,17 +5643,18 @@ "version": "3.6.0", "resolved": "https://registry.npmjs.org/date-fns/-/date-fns-3.6.0.tgz", "integrity": "sha512-fRHTG8g/Gif+kSh50gaGEdToemgfj74aRX3swtiouboip5JDLAyDE9F11nHMIcvOaXeOC6D7SpNhi7uFyB7Uww==", + "license": "MIT", "funding": { "type": "github", "url": "https://github.com/sponsors/kossnocorp" } }, "node_modules/debug": { - "version": "4.3.4", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz", - "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==", + "version": "4.3.7", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.7.tgz", + "integrity": "sha512-Er2nc/H7RrMXZBFCEim6TCmMk02Z8vLC2Rbi1KEBggpo0fS6l0S1nnapwmIi3yW/+GOJap1Krg4w0Hg80oCqgQ==", "dependencies": { - "ms": "2.1.2" + "ms": "^2.1.3" }, "engines": { "node": ">=6.0" @@ -3150,28 +5682,6 @@ "url": "https://github.com/sponsors/wooorm" } }, - "node_modules/decompress-response": { - "version": "6.0.0", - "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz", - "integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==", - "dependencies": { - "mimic-response": "^3.1.0" - }, - "engines": { - "node": ">=10" - }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" - } - }, - "node_modules/deep-extend": { - "version": "0.6.0", - "resolved": "https://registry.npmjs.org/deep-extend/-/deep-extend-0.6.0.tgz", - "integrity": "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==", - "engines": { - "node": ">=4.0.0" - } - }, "node_modules/deep-is": { "version": "0.1.4", "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz", @@ -3190,7 +5700,6 @@ "version": "1.1.4", "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz", "integrity": "sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==", - "dev": true, "dependencies": { "es-define-property": "^1.0.0", "es-errors": "^1.3.0", @@ -3296,6 +5805,17 @@ "csstype": "^3.0.2" } }, + "node_modules/dotenv": { + "version": "16.4.5", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.4.5.tgz", + "integrity": "sha512-ZmdL2rui+eB2YwhsWzjInR8LldtZHGDoQ1ugH85ppHKwpUHL7j7rN0Ti9NCnGiQbhaZ11FpR+7ao1dNsmduNUg==", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://dotenvx.com" + } + }, "node_modules/eastasianwidth": { "version": "0.2.0", "resolved": "https://registry.npmjs.org/eastasianwidth/-/eastasianwidth-0.2.0.tgz", @@ -3311,19 +5831,10 @@ "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz", "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==" }, - "node_modules/end-of-stream": { - "version": "1.4.4", - "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.4.tgz", - "integrity": "sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==", - "dependencies": { - "once": "^1.4.0" - } - }, "node_modules/enhanced-resolve": { - "version": "5.16.1", - "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.16.1.tgz", - "integrity": "sha512-4U5pNsuDl0EhuZpq46M5xPslstkviJuhrdobaRDBk2Jy2KO37FDAJl4lb2KlNabxT0m4MTK2UHNrsAcphE8nyw==", - "dev": true, + "version": "5.17.1", + "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.17.1.tgz", + "integrity": "sha512-LMHl3dXhTcfv8gM4kEzIUeTQ+7fpdA0l2tUf34BddXPkz2A5xJ5L/Pchd5BL6rdccM9QGvu0sWZzK1Z1t4wwyg==", "dependencies": { "graceful-fs": "^4.2.4", "tapable": "^2.2.0" @@ -3420,7 +5931,6 @@ "version": "1.0.0", "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.0.tgz", "integrity": "sha512-jxayLKShrEqqzJ0eumQbVhTYQM27CfT1T35+gCgDFoL82JLsXqTJ76zv6A0YLOgEnLUMvLzsDsGIrl8NFpT2gQ==", - "dev": true, "dependencies": { "get-intrinsic": "^1.2.4" }, @@ -3432,7 +5942,6 @@ "version": "1.3.0", "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", - "dev": true, "engines": { "node": ">= 0.4" } @@ -3462,6 +5971,12 @@ "node": ">= 0.4" } }, + "node_modules/es-module-lexer": { + "version": "1.5.4", + "resolved": "https://registry.npmjs.org/es-module-lexer/-/es-module-lexer-1.5.4.tgz", + "integrity": "sha512-MVNK56NiMrOwitFB7cqDwq0CQutbw+0BvLshJSse0MUNU+y1FC3bUS/AQg7oUng+/wKrrki7JfmwtVHkVfPLlw==", + "peer": true + }, "node_modules/es-object-atoms": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.0.0.tgz", @@ -3919,7 +6434,6 @@ "version": "4.3.0", "resolved": "https://registry.npmjs.org/esrecurse/-/esrecurse-4.3.0.tgz", "integrity": "sha512-KmfKL3b6G+RXvP8N1vr3Tq1kL/oCFgn2NYXEtqP8/L3pKapUA4G8cFVaoF3SU323CD4XypR/ffioHmkti6/Tag==", - "dev": true, "dependencies": { "estraverse": "^5.2.0" }, @@ -3931,7 +6445,6 @@ "version": "5.3.0", "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz", "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==", - "dev": true, "engines": { "node": ">=4.0" } @@ -3945,6 +6458,11 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/estree-walker": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/estree-walker/-/estree-walker-2.0.2.tgz", + "integrity": "sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w==" + }, "node_modules/esutils": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz", @@ -3959,12 +6477,13 @@ "resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-4.0.7.tgz", "integrity": "sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==" }, - "node_modules/expand-template": { - "version": "2.0.3", - "resolved": "https://registry.npmjs.org/expand-template/-/expand-template-2.0.3.tgz", - "integrity": "sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg==", + "node_modules/events": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/events/-/events-3.3.0.tgz", + "integrity": "sha512-mQw+2fkQbALzQ7V0MY0IqdnXNOeTtP4r0lN9z7AAawCXgqea7bDii20AYrIBrFd/Hx0M2Ocz6S111CaFkUcb0Q==", + "peer": true, "engines": { - "node": ">=6" + "node": ">=0.8.x" } }, "node_modules/extend": { @@ -3975,8 +6494,7 @@ "node_modules/fast-deep-equal": { "version": "3.1.3", "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", - "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==", - "dev": true + "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==" }, "node_modules/fast-equals": { "version": "5.0.1", @@ -3986,11 +6504,6 @@ "node": ">=6.0.0" } }, - "node_modules/fast-fifo": { - "version": "1.3.2", - "resolved": "https://registry.npmjs.org/fast-fifo/-/fast-fifo-1.3.2.tgz", - "integrity": "sha512-/d9sfos4yxzpwkDkuN7k2SqFKtYNmCTzgfEpz82x34IM9/zc8KGxQoXg1liNC/izpRM/MBdt44Nmx41ZWqk+FQ==" - }, "node_modules/fast-glob": { "version": "3.3.2", "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.2.tgz", @@ -4020,8 +6533,7 @@ "node_modules/fast-json-stable-stringify": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz", - "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==", - "dev": true + "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==" }, "node_modules/fast-levenshtein": { "version": "2.0.6", @@ -4037,6 +6549,11 @@ "reusify": "^1.0.4" } }, + "node_modules/fflate": { + "version": "0.4.8", + "resolved": "https://registry.npmjs.org/fflate/-/fflate-0.4.8.tgz", + "integrity": "sha512-FJqqoDBR00Mdj9ppamLa/Y7vxm+PRmNWA67N846RvsoYVMKB4q3y/de5PA7gUmRMYK/8CMz2GDZQmCRN1wBcWA==" + }, "node_modules/file-entry-cache": { "version": "6.0.1", "resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-6.0.1.tgz", @@ -4080,7 +6597,6 @@ "version": "5.0.0", "resolved": "https://registry.npmjs.org/find-up/-/find-up-5.0.0.tgz", "integrity": "sha512-78/PXT1wlLLDgTzDs7sjq9hzz0vXD+zn+7wypEe4fXQxCmdmqfGsEPQxmiCSQI3ajFV91bVSsvNtrJRiW6nGng==", - "dev": true, "dependencies": { "locate-path": "^6.0.0", "path-exists": "^4.0.0" @@ -4172,16 +6688,10 @@ "url": "https://github.com/sponsors/rawify" } }, - "node_modules/fs-constants": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz", - "integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==" - }, "node_modules/fs.realpath": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz", - "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==", - "dev": true + "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==" }, "node_modules/fsevents": { "version": "2.3.3", @@ -4235,7 +6745,6 @@ "version": "1.0.0-beta.2", "resolved": "https://registry.npmjs.org/gensync/-/gensync-1.0.0-beta.2.tgz", "integrity": "sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==", - "peer": true, "engines": { "node": ">=6.9.0" } @@ -4244,7 +6753,6 @@ "version": "1.2.4", "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.2.4.tgz", "integrity": "sha512-5uYhsJH8VJBTv7oslg4BznJYhDoRI6waYCxMmCdnTrcCrHA/fCFKoTFz2JKKE0HdDFUF7/oQuhzumXJK7paBRQ==", - "dev": true, "dependencies": { "es-errors": "^1.3.0", "function-bind": "^1.1.2", @@ -4296,11 +6804,6 @@ "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1" } }, - "node_modules/github-from-package": { - "version": "0.0.0", - "resolved": "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz", - "integrity": "sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw==" - }, "node_modules/glob": { "version": "10.3.10", "resolved": "https://registry.npmjs.org/glob/-/glob-10.3.10.tgz", @@ -4333,6 +6836,12 @@ "node": ">=10.13.0" } }, + "node_modules/glob-to-regexp": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/glob-to-regexp/-/glob-to-regexp-0.4.1.tgz", + "integrity": "sha512-lkX1HJXwyMcprw/5YUZc2s7DrpAiHB21/V+E1rHUrVNokkvB6bqMzT0VfV6/86ZNabt1k14YOIaT7nDvOX3Iiw==", + "peer": true + }, "node_modules/glob/node_modules/brace-expansion": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", @@ -4410,7 +6919,6 @@ "version": "1.0.1", "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.0.1.tgz", "integrity": "sha512-d65bNlIadxvpb/A2abVdlqKqV563juRnZ1Wtk6s1sIR8uNsXR70xqIzVqxVf1eTqDunwT2MkczEeaezCKTZhwA==", - "dev": true, "dependencies": { "get-intrinsic": "^1.1.3" }, @@ -4442,7 +6950,6 @@ "version": "4.0.0", "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", "integrity": "sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==", - "dev": true, "engines": { "node": ">=8" } @@ -4451,7 +6958,6 @@ "version": "1.0.2", "resolved": "https://registry.npmjs.org/has-property-descriptors/-/has-property-descriptors-1.0.2.tgz", "integrity": "sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg==", - "dev": true, "dependencies": { "es-define-property": "^1.0.0" }, @@ -4463,7 +6969,6 @@ "version": "1.0.3", "resolved": "https://registry.npmjs.org/has-proto/-/has-proto-1.0.3.tgz", "integrity": "sha512-SJ1amZAJUiZS+PhsVLf5tGydlaVB8EdFpaSO4gmiUKUOxk8qzn5AIy4ZeJUmh22znIdk/uMAUT2pl3FxzVUH+Q==", - "dev": true, "engines": { "node": ">= 0.4" }, @@ -4475,7 +6980,6 @@ "version": "1.0.3", "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.0.3.tgz", "integrity": "sha512-l3LCuF6MgDNwTDKkdYGEihYjt5pRPbEg46rtlmnSPlUbgmB8LOIrKJbYYFBSbnPaJexMKtiPO8hmeRjRz2Td+A==", - "dev": true, "engines": { "node": ">= 0.4" }, @@ -4694,24 +7198,17 @@ "url": "https://opencollective.com/unified" } }, - "node_modules/ieee754": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz", - "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ] + "node_modules/https-proxy-agent": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/https-proxy-agent/-/https-proxy-agent-5.0.1.tgz", + "integrity": "sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==", + "dependencies": { + "agent-base": "6", + "debug": "4" + }, + "engines": { + "node": ">= 6" + } }, "node_modules/ignore": { "version": "5.3.1", @@ -4737,6 +7234,17 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/import-in-the-middle": { + "version": "1.11.2", + "resolved": "https://registry.npmjs.org/import-in-the-middle/-/import-in-the-middle-1.11.2.tgz", + "integrity": "sha512-gK6Rr6EykBcc6cVWRSBR5TWf8nn6hZMYSRYqCcHa0l0d1fPK7JSYo6+Mlmck76jIX9aL/IZ71c06U2VpFwl1zA==", + "dependencies": { + "acorn": "^8.8.2", + "acorn-import-attributes": "^1.9.5", + "cjs-module-lexer": "^1.2.2", + "module-details-from-path": "^1.0.3" + } + }, "node_modules/imurmurhash": { "version": "0.1.4", "resolved": "https://registry.npmjs.org/imurmurhash/-/imurmurhash-0.1.4.tgz", @@ -4759,12 +7267,8 @@ "node_modules/inherits": { "version": "2.0.4", "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", - "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" - }, - "node_modules/ini": { - "version": "1.3.8", - "resolved": "https://registry.npmjs.org/ini/-/ini-1.3.8.tgz", - "integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==" + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", + "dev": true }, "node_modules/inline-style-parser": { "version": "0.2.3", @@ -5090,6 +7594,14 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/is-reference": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/is-reference/-/is-reference-1.2.1.tgz", + "integrity": "sha512-U82MsXXiFIrjCK4otLT+o2NA2Cd2g5MLoOVXUZjIOhLurrRxpEXzI8O0KZHr3IjLvlAH1kTPYSuqer5T9ZVBKQ==", + "dependencies": { + "@types/estree": "*" + } + }, "node_modules/is-regex": { "version": "1.1.4", "resolved": "https://registry.npmjs.org/is-regex/-/is-regex-1.1.4.tgz", @@ -5259,6 +7771,35 @@ "@pkgjs/parseargs": "^0.11.0" } }, + "node_modules/jest-worker": { + "version": "27.5.1", + "resolved": "https://registry.npmjs.org/jest-worker/-/jest-worker-27.5.1.tgz", + "integrity": "sha512-7vuh85V5cdDofPyxn58nrPjBktZo0u9x1g8WtjQol+jZDaE+fhN+cIvTj11GndBnMnyfrUOG1sZQxCdjKh+DKg==", + "peer": true, + "dependencies": { + "@types/node": "*", + "merge-stream": "^2.0.0", + "supports-color": "^8.0.0" + }, + "engines": { + "node": ">= 10.13.0" + } + }, + "node_modules/jest-worker/node_modules/supports-color": { + "version": "8.1.1", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-8.1.1.tgz", + "integrity": "sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q==", + "peer": true, + "dependencies": { + "has-flag": "^4.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/supports-color?sponsor=1" + } + }, "node_modules/jiti": { "version": "1.21.0", "resolved": "https://registry.npmjs.org/jiti/-/jiti-1.21.0.tgz", @@ -5317,8 +7858,7 @@ "node_modules/json-schema-traverse": { "version": "0.4.1", "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", - "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==", - "dev": true + "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==" }, "node_modules/json-stable-stringify-without-jsonify": { "version": "1.0.1", @@ -5330,7 +7870,6 @@ "version": "2.2.3", "resolved": "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz", "integrity": "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==", - "peer": true, "bin": { "json5": "lib/cli.js" }, @@ -5406,11 +7945,19 @@ "resolved": "https://registry.npmjs.org/lines-and-columns/-/lines-and-columns-1.2.4.tgz", "integrity": "sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==" }, + "node_modules/loader-runner": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/loader-runner/-/loader-runner-4.3.0.tgz", + "integrity": "sha512-3R/1M+yS3j5ou80Me59j7F9IMs4PXs3VqRrm0TU3AbKPxlmpoY1TNscJV/oGJXo8qCatFGTfDbY6W6ipGOYXfg==", + "peer": true, + "engines": { + "node": ">=6.11.5" + } + }, "node_modules/locate-path": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz", "integrity": "sha512-iPZK6eYjbxRu3uB4/WZ3EsEIMJFMqAoopl3R+zuq0UjcAm/MO6KCweDgPfP3elTztoKP3KtnVHxTn2NHBSDVUw==", - "dev": true, "dependencies": { "p-locate": "^5.0.0" }, @@ -5477,6 +8024,23 @@ "node": "14 || >=16.14" } }, + "node_modules/lucide-react": { + "version": "0.454.0", + "resolved": "https://registry.npmjs.org/lucide-react/-/lucide-react-0.454.0.tgz", + "integrity": "sha512-hw7zMDwykCLnEzgncEEjHeA6+45aeEzRYuKHuyRSOPkhko+J3ySGjGIzu+mmMfDFG1vazHepMaYFYHbTFAZAAQ==", + "license": "ISC", + "peerDependencies": { + "react": "^16.5.1 || ^17.0.0 || ^18.0.0 || ^19.0.0-rc" + } + }, + "node_modules/magic-string": { + "version": "0.30.12", + "resolved": "https://registry.npmjs.org/magic-string/-/magic-string-0.30.12.tgz", + "integrity": "sha512-Ea8I3sQMVXr8JhN4z+H/d8zwo+tYDgHE9+5G4Wnrwhs0gaK9fXTKx0Tw5Xwsd/bCPTTZNRAdpyzvoeORe9LYpw==", + "dependencies": { + "@jridgewell/sourcemap-codec": "^1.5.0" + } + }, "node_modules/markdown-table": { "version": "3.0.3", "resolved": "https://registry.npmjs.org/markdown-table/-/markdown-table-3.0.3.tgz", @@ -5757,6 +8321,12 @@ "resolved": "https://registry.npmjs.org/memoize-one/-/memoize-one-6.0.0.tgz", "integrity": "sha512-rkpe71W0N0c0Xz6QD0eJETuWAJGnJ9afsl1srmwPrI+yBCkge5EycXXbYRyvL29zZVUWQCY7InPRCv3GDXuZNw==" }, + "node_modules/merge-stream": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/merge-stream/-/merge-stream-2.0.0.tgz", + "integrity": "sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==", + "peer": true + }, "node_modules/merge2": { "version": "1.4.1", "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz", @@ -6301,26 +8871,36 @@ ] }, "node_modules/micromatch": { - "version": "4.0.5", - "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.5.tgz", - "integrity": "sha512-DMy+ERcEW2q8Z2Po+WNXuw3c5YaUSFjAO5GsJqfEl7UjvtIuFKO6ZrKvcItdy98dwFI2N1tg3zNIdKaQT+aNdA==", + "version": "4.0.8", + "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.8.tgz", + "integrity": "sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==", "dependencies": { - "braces": "^3.0.2", + "braces": "^3.0.3", "picomatch": "^2.3.1" }, "engines": { "node": ">=8.6" } }, - "node_modules/mimic-response": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-3.1.0.tgz", - "integrity": "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==", + "node_modules/mime-db": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", + "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", + "peer": true, "engines": { - "node": ">=10" + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "2.1.35", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", + "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", + "peer": true, + "dependencies": { + "mime-db": "1.52.0" }, - "funding": { - "url": "https://github.com/sponsors/sindresorhus" + "engines": { + "node": ">= 0.6" } }, "node_modules/minimatch": { @@ -6339,6 +8919,7 @@ "version": "1.2.8", "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==", + "dev": true, "funding": { "url": "https://github.com/sponsors/ljharb" } @@ -6351,15 +8932,15 @@ "node": ">=16 || 14 >=14.17" } }, - "node_modules/mkdirp-classic": { - "version": "0.5.3", - "resolved": "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz", - "integrity": "sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==" + "node_modules/module-details-from-path": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/module-details-from-path/-/module-details-from-path-1.0.3.tgz", + "integrity": "sha512-ySViT69/76t8VhE1xXHK6Ch4NcDd26gx0MzKXLO+F7NOtnqH68d9zF94nT8ZWSxXh8ELOERsnJO/sWt1xZYw5A==" }, "node_modules/ms": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", - "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==" }, "node_modules/mz": { "version": "2.7.0", @@ -6388,52 +8969,54 @@ "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" } }, - "node_modules/napi-build-utils": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/napi-build-utils/-/napi-build-utils-1.0.2.tgz", - "integrity": "sha512-ONmRUqK7zj7DWX0D9ADe03wbwOBZxNAfF20PlGfCWQcD3+/MakShIHrMqx9YwPTfxDdF1zLeL+RGZiR9kGMLdg==" - }, "node_modules/natural-compare": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz", "integrity": "sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==", "dev": true }, + "node_modules/neo-async": { + "version": "2.6.2", + "resolved": "https://registry.npmjs.org/neo-async/-/neo-async-2.6.2.tgz", + "integrity": "sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw==", + "peer": true + }, "node_modules/next": { - "version": "14.2.3", - "resolved": "https://registry.npmjs.org/next/-/next-14.2.3.tgz", - "integrity": "sha512-dowFkFTR8v79NPJO4QsBUtxv0g9BrS/phluVpMAt2ku7H+cbcBJlopXjkWlwxrk/xGqMemr7JkGPGemPrLLX7A==", + "version": "15.0.2", + "resolved": "https://registry.npmjs.org/next/-/next-15.0.2.tgz", + "integrity": "sha512-rxIWHcAu4gGSDmwsELXacqAPUk+j8dV/A9cDF5fsiCMpkBDYkO2AEaL1dfD+nNmDiU6QMCFN8Q30VEKapT9UHQ==", "dependencies": { - "@next/env": "14.2.3", - "@swc/helpers": "0.5.5", + "@next/env": "15.0.2", + "@swc/counter": "0.1.3", + "@swc/helpers": "0.5.13", "busboy": "1.6.0", "caniuse-lite": "^1.0.30001579", - "graceful-fs": "^4.2.11", "postcss": "8.4.31", - "styled-jsx": "5.1.1" + "styled-jsx": "5.1.6" }, "bin": { "next": "dist/bin/next" }, "engines": { - "node": ">=18.17.0" + "node": ">=18.18.0" }, "optionalDependencies": { - "@next/swc-darwin-arm64": "14.2.3", - "@next/swc-darwin-x64": "14.2.3", - "@next/swc-linux-arm64-gnu": "14.2.3", - "@next/swc-linux-arm64-musl": "14.2.3", - "@next/swc-linux-x64-gnu": "14.2.3", - "@next/swc-linux-x64-musl": "14.2.3", - "@next/swc-win32-arm64-msvc": "14.2.3", - "@next/swc-win32-ia32-msvc": "14.2.3", - "@next/swc-win32-x64-msvc": "14.2.3" + "@next/swc-darwin-arm64": "15.0.2", + "@next/swc-darwin-x64": "15.0.2", + "@next/swc-linux-arm64-gnu": "15.0.2", + "@next/swc-linux-arm64-musl": "15.0.2", + "@next/swc-linux-x64-gnu": "15.0.2", + "@next/swc-linux-x64-musl": "15.0.2", + "@next/swc-win32-arm64-msvc": "15.0.2", + "@next/swc-win32-x64-msvc": "15.0.2", + "sharp": "^0.33.5" }, "peerDependencies": { "@opentelemetry/api": "^1.1.0", "@playwright/test": "^1.41.2", - "react": "^18.2.0", - "react-dom": "^18.2.0", + "babel-plugin-react-compiler": "*", + "react": "^18.2.0 || 19.0.0-rc-02c0e824-20241028", + "react-dom": "^18.2.0 || 19.0.0-rc-02c0e824-20241028", "sass": "^1.3.0" }, "peerDependenciesMeta": { @@ -6443,6 +9026,9 @@ "@playwright/test": { "optional": true }, + "babel-plugin-react-compiler": { + "optional": true + }, "sass": { "optional": true } @@ -6475,22 +9061,25 @@ "node": "^10 || ^12 || >=14" } }, - "node_modules/node-abi": { - "version": "3.62.0", - "resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.62.0.tgz", - "integrity": "sha512-CPMcGa+y33xuL1E0TcNIu4YyaZCxnnvkVaEXrsosR3FxN+fV8xvb7Mzpb7IgKler10qeMkE6+Dp8qJhpzdq35g==", + "node_modules/node-fetch": { + "version": "2.7.0", + "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", + "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", "dependencies": { - "semver": "^7.3.5" + "whatwg-url": "^5.0.0" }, "engines": { - "node": ">=10" + "node": "4.x || >=6.0.0" + }, + "peerDependencies": { + "encoding": "^0.1.0" + }, + "peerDependenciesMeta": { + "encoding": { + "optional": true + } } }, - "node_modules/node-addon-api": { - "version": "6.1.0", - "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-6.1.0.tgz", - "integrity": "sha512-+eawOlIgy680F0kBzPUNFhMZGtJ1YmqM6l4+Crf4IkImjYrO/mqPwRMh352g23uIaQKFItcQ64I7KMaJxHgAVA==" - }, "node_modules/node-releases": { "version": "2.0.14", "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.14.tgz", @@ -8926,7 +11515,6 @@ "version": "1.13.1", "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.1.tgz", "integrity": "sha512-5qoj1RUiKOMsCCNLV1CBiPYE10sziTsnmNxkAI/rZhiD63CF7IqdFGC/XzjWjpSgLf0LxXX3bDFIh0E18f6UhQ==", - "dev": true, "funding": { "url": "https://github.com/sponsors/ljharb" } @@ -9042,6 +11630,7 @@ "version": "1.4.0", "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "dev": true, "dependencies": { "wrappy": "1" } @@ -9067,7 +11656,6 @@ "version": "3.1.0", "resolved": "https://registry.npmjs.org/p-limit/-/p-limit-3.1.0.tgz", "integrity": "sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==", - "dev": true, "dependencies": { "yocto-queue": "^0.1.0" }, @@ -9082,7 +11670,6 @@ "version": "5.0.0", "resolved": "https://registry.npmjs.org/p-locate/-/p-locate-5.0.0.tgz", "integrity": "sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw==", - "dev": true, "dependencies": { "p-limit": "^3.0.2" }, @@ -9093,6 +11680,11 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/package-json-from-dist": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/package-json-from-dist/-/package-json-from-dist-1.0.1.tgz", + "integrity": "sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==" + }, "node_modules/parent-module": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz", @@ -9165,7 +11757,6 @@ "version": "4.0.0", "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", "integrity": "sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==", - "dev": true, "engines": { "node": ">=8" } @@ -9215,6 +11806,34 @@ "node": ">=8" } }, + "node_modules/pg-int8": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/pg-int8/-/pg-int8-1.0.1.tgz", + "integrity": "sha512-WCtabS6t3c8SkpDBUlb1kjOs7l66xsGdKpIPZsg4wR+B3+u9UAum2odSsF9tnvxg80h4ZxLWMy4pRjOsFIqQpw==", + "engines": { + "node": ">=4.0.0" + } + }, + "node_modules/pg-protocol": { + "version": "1.7.0", + "resolved": "https://registry.npmjs.org/pg-protocol/-/pg-protocol-1.7.0.tgz", + "integrity": "sha512-hTK/mE36i8fDDhgDFjy6xNOG+LCorxLG3WO17tku+ij6sVHXh1jQUJ8hYAnRhNla4QVD2H8er/FOjc/+EgC6yQ==" + }, + "node_modules/pg-types": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/pg-types/-/pg-types-2.2.0.tgz", + "integrity": "sha512-qTAAlrEsl8s4OiEQY69wDvcMIdQN6wdz5ojQiOy6YRMuynxenON0O5oCpJI6lshc6scgAY8qvJ2On/p+CXY0GA==", + "dependencies": { + "pg-int8": "1.0.1", + "postgres-array": "~2.0.0", + "postgres-bytea": "~1.0.0", + "postgres-date": "~1.0.4", + "postgres-interval": "^1.1.0" + }, + "engines": { + "node": ">=4" + } + }, "node_modules/picocolors": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/picocolors/-/picocolors-1.0.1.tgz", @@ -9410,55 +12029,59 @@ "resolved": "https://registry.npmjs.org/postcss-value-parser/-/postcss-value-parser-4.2.0.tgz", "integrity": "sha512-1NNCs6uurfkVbeXG4S8JFT9t19m45ICnif8zWLd5oPSZ50QnwMfK+H3jv408d4jw/7Bttv5axS5IiHoLaVNHeQ==" }, - "node_modules/prebuild-install": { - "version": "7.1.2", - "resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.2.tgz", - "integrity": "sha512-UnNke3IQb6sgarcZIDU3gbMeTp/9SSU1DAIkil7PrqG1vZlBtY5msYccSKSHDqa3hNg436IXK+SNImReuA1wEQ==", - "dependencies": { - "detect-libc": "^2.0.0", - "expand-template": "^2.0.3", - "github-from-package": "0.0.0", - "minimist": "^1.2.3", - "mkdirp-classic": "^0.5.3", - "napi-build-utils": "^1.0.1", - "node-abi": "^3.3.0", - "pump": "^3.0.0", - "rc": "^1.2.7", - "simple-get": "^4.0.0", - "tar-fs": "^2.0.0", - "tunnel-agent": "^0.6.0" - }, - "bin": { - "prebuild-install": "bin.js" - }, + "node_modules/postgres-array": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/postgres-array/-/postgres-array-2.0.0.tgz", + "integrity": "sha512-VpZrUqU5A69eQyW2c5CA1jtLecCsN2U/bD6VilrFDWq5+5UIEVO7nazS3TEcHf1zuPYO/sqGvUvW62g86RXZuA==", "engines": { - "node": ">=10" + "node": ">=4" } }, - "node_modules/prebuild-install/node_modules/tar-fs": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.1.tgz", - "integrity": "sha512-V0r2Y9scmbDRLCNex/+hYzvp/zyYjvFbHPNgVTKfQvVrb6guiE/fxP+XblDNR011utopbkex2nM4dHNV6GDsng==", - "dependencies": { - "chownr": "^1.1.1", - "mkdirp-classic": "^0.5.2", - "pump": "^3.0.0", - "tar-stream": "^2.1.4" + "node_modules/postgres-bytea": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/postgres-bytea/-/postgres-bytea-1.0.0.tgz", + "integrity": "sha512-xy3pmLuQqRBZBXDULy7KbaitYqLcmxigw14Q5sj8QBVLqEwXfeybIKVWiqAXTlcvdvb0+xkOtDbfQMOf4lST1w==", + "engines": { + "node": ">=0.10.0" } }, - "node_modules/prebuild-install/node_modules/tar-stream": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-2.2.0.tgz", - "integrity": "sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==", + "node_modules/postgres-date": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/postgres-date/-/postgres-date-1.0.7.tgz", + "integrity": "sha512-suDmjLVQg78nMK2UZ454hAG+OAW+HQPZ6n++TNDUX+L0+uUlLywnoxJKDou51Zm+zTCjrCl0Nq6J9C5hP9vK/Q==", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/postgres-interval": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/postgres-interval/-/postgres-interval-1.2.0.tgz", + "integrity": "sha512-9ZhXKM/rw350N1ovuWHbGxnGh/SNJ4cnxHiM0rxE4VN41wsg8P8zWn9hv/buK00RP4WvlOyr/RBDiptyxVbkZQ==", "dependencies": { - "bl": "^4.0.3", - "end-of-stream": "^1.4.1", - "fs-constants": "^1.0.0", - "inherits": "^2.0.3", - "readable-stream": "^3.1.1" + "xtend": "^4.0.0" }, "engines": { - "node": ">=6" + "node": ">=0.10.0" + } + }, + "node_modules/posthog-js": { + "version": "1.176.0", + "resolved": "https://registry.npmjs.org/posthog-js/-/posthog-js-1.176.0.tgz", + "integrity": "sha512-T5XKNtRzp7q6CGb7Vc7wAI76rWap9fiuDUPxPsyPBPDkreKya91x9RIsSapAVFafwD1AEin1QMczCmt9Le9BWw==", + "dependencies": { + "core-js": "^3.38.1", + "fflate": "^0.4.8", + "preact": "^10.19.3", + "web-vitals": "^4.2.0" + } + }, + "node_modules/preact": { + "version": "10.24.3", + "resolved": "https://registry.npmjs.org/preact/-/preact-10.24.3.tgz", + "integrity": "sha512-Z2dPnBnMUfyQfSQ+GBdsGa16hz35YmLmtTLhM169uW944hYL6xzTYkJjC07j+Wosz733pMWx0fgON3JNw1jJQA==", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/preact" } }, "node_modules/prelude-ls": { @@ -9493,6 +12116,14 @@ "node": ">=6" } }, + "node_modules/progress": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz", + "integrity": "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==", + "engines": { + "node": ">=0.4.0" + } + }, "node_modules/prop-types": { "version": "15.8.1", "resolved": "https://registry.npmjs.org/prop-types/-/prop-types-15.8.1.tgz", @@ -9517,24 +12148,33 @@ "url": "https://github.com/sponsors/wooorm" } }, - "node_modules/pump": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz", - "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==", - "dependencies": { - "end-of-stream": "^1.1.0", - "once": "^1.3.1" - } + "node_modules/proxy-from-env": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", + "integrity": "sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==" }, "node_modules/punycode": { "version": "2.3.1", "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==", - "dev": true, "engines": { "node": ">=6" } }, + "node_modules/qs": { + "version": "6.13.0", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.13.0.tgz", + "integrity": "sha512-+38qI9SOr8tfZ4QmJNplMUxqjbe7LKvvZgWdExBOmd+egZTtjLB67Gu0HRX3u/XOq7UU2Nx6nsjvS16Z9uwfpg==", + "dependencies": { + "side-channel": "^1.0.6" + }, + "engines": { + "node": ">=0.6" + }, + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, "node_modules/queue-microtask": { "version": "1.2.3", "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz", @@ -9554,31 +12194,13 @@ } ] }, - "node_modules/queue-tick": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/queue-tick/-/queue-tick-1.0.1.tgz", - "integrity": "sha512-kJt5qhMxoszgU/62PLP1CJytzd2NKetjSRnyuj31fDd3Rlcz3fzlFdFLD1SItunPwyqEOkca6GbV612BWfaBag==" - }, - "node_modules/rc": { - "version": "1.2.8", - "resolved": "https://registry.npmjs.org/rc/-/rc-1.2.8.tgz", - "integrity": "sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw==", + "node_modules/randombytes": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/randombytes/-/randombytes-2.1.0.tgz", + "integrity": "sha512-vYl3iOX+4CKUWuxGi9Ukhie6fsqXqS9FE2Zaic4tNFD2N2QQaXOMFbuKK4QmDHC0JO6B1Zp41J0LpT0oR68amQ==", + "peer": true, "dependencies": { - "deep-extend": "^0.6.0", - "ini": "~1.3.0", - "minimist": "^1.2.0", - "strip-json-comments": "~2.0.1" - }, - "bin": { - "rc": "cli.js" - } - }, - "node_modules/rc/node_modules/strip-json-comments": { - "version": "2.0.1", - "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-2.0.1.tgz", - "integrity": "sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ==", - "engines": { - "node": ">=0.10.0" + "safe-buffer": "^5.1.0" } }, "node_modules/react": { @@ -9596,6 +12218,7 @@ "version": "8.10.1", "resolved": "https://registry.npmjs.org/react-day-picker/-/react-day-picker-8.10.1.tgz", "integrity": "sha512-TMx7fNbhLk15eqcMt+7Z7S2KF7mfTId/XJDjKE8f+IUcFn0l08/kI4FiYTL/0yuOLmEcbR4Fwe3GJf/NiiMnPA==", + "license": "MIT", "funding": { "type": "individual", "url": "https://github.com/sponsors/gpbl" @@ -9627,49 +12250,107 @@ "prop-types": "^15.8.1" }, "engines": { - "node": ">= 10.13" + "node": ">= 10.13" + }, + "peerDependencies": { + "react": ">= 16.8 || 18.0.0" + } + }, + "node_modules/react-fast-compare": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/react-fast-compare/-/react-fast-compare-2.0.4.tgz", + "integrity": "sha512-suNP+J1VU1MWFKcyt7RtjiSWUjvidmQSlqu+eHslq+342xCbGTYmC0mEhPCOHxlW0CywylOC1u2DFAT+bv4dBw==" + }, + "node_modules/react-icons": { + "version": "4.12.0", + "resolved": "https://registry.npmjs.org/react-icons/-/react-icons-4.12.0.tgz", + "integrity": "sha512-IBaDuHiShdZqmfc/TwHu6+d6k2ltNCf3AszxNmjJc1KUfXdEeRJOKyNvLmAHaarhzGmTSVygNdyu8/opXv2gaw==", + "peerDependencies": { + "react": "*" + } + }, + "node_modules/react-is": { + "version": "19.0.0-rc-fb9a90fa48-20240614", + "resolved": "https://registry.npmjs.org/react-is/-/react-is-19.0.0-rc-fb9a90fa48-20240614.tgz", + "integrity": "sha512-60qI7v1B9RhmZwjTCnAgzcuABOQsIH20vTbETQPaze96s1lY2lSawv9dvXAfF8Z1MIqOppWSKLNOshF0WsZ3OA==" + }, + "node_modules/react-loader-spinner": { + "version": "5.4.5", + "resolved": "https://registry.npmjs.org/react-loader-spinner/-/react-loader-spinner-5.4.5.tgz", + "integrity": "sha512-32f+sb/v2tnNfyvnCCOS4fpyVHsGXjSyNo6QLniHcaj1XjKLxx14L2z0h6szRugOL8IEJ+53GPwNAdbkDqmy4g==", + "dependencies": { + "react-is": "^18.2.0", + "styled-components": "^5.3.5", + "styled-tools": "^1.7.2" + }, + "peerDependencies": { + "react": "^16.0.0 || ^17.0.0 || ^18.0.0", + "react-dom": "^16.0.0 || ^17.0.0 || ^18.0.0" + } + }, + "node_modules/react-loader-spinner/node_modules/has-flag": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz", + "integrity": "sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw==", + "engines": { + "node": ">=4" + } + }, + "node_modules/react-loader-spinner/node_modules/styled-components": { + "version": "5.3.11", + "resolved": "https://registry.npmjs.org/styled-components/-/styled-components-5.3.11.tgz", + "integrity": "sha512-uuzIIfnVkagcVHv9nE0VPlHPSCmXIUGKfJ42LNjxCCTDTL5sgnJ8Z7GZBq0EnLYGln77tPpEpExt2+qa+cZqSw==", + "dependencies": { + "@babel/helper-module-imports": "^7.0.0", + "@babel/traverse": "^7.4.5", + "@emotion/is-prop-valid": "^1.1.0", + "@emotion/stylis": "^0.8.4", + "@emotion/unitless": "^0.7.4", + "babel-plugin-styled-components": ">= 1.12.0", + "css-to-react-native": "^3.0.0", + "hoist-non-react-statics": "^3.0.0", + "shallowequal": "^1.1.0", + "supports-color": "^5.5.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/styled-components" }, "peerDependencies": { - "react": ">= 16.8 || 18.0.0" + "react": ">= 16.8.0", + "react-dom": ">= 16.8.0", + "react-is": ">= 16.8.0" } }, - "node_modules/react-fast-compare": { - "version": "2.0.4", - "resolved": "https://registry.npmjs.org/react-fast-compare/-/react-fast-compare-2.0.4.tgz", - "integrity": "sha512-suNP+J1VU1MWFKcyt7RtjiSWUjvidmQSlqu+eHslq+342xCbGTYmC0mEhPCOHxlW0CywylOC1u2DFAT+bv4dBw==" - }, - "node_modules/react-icons": { - "version": "4.12.0", - "resolved": "https://registry.npmjs.org/react-icons/-/react-icons-4.12.0.tgz", - "integrity": "sha512-IBaDuHiShdZqmfc/TwHu6+d6k2ltNCf3AszxNmjJc1KUfXdEeRJOKyNvLmAHaarhzGmTSVygNdyu8/opXv2gaw==", + "node_modules/react-loader-spinner/node_modules/styled-components/node_modules/babel-plugin-styled-components": { + "version": "2.1.4", + "resolved": "https://registry.npmjs.org/babel-plugin-styled-components/-/babel-plugin-styled-components-2.1.4.tgz", + "integrity": "sha512-Xgp9g+A/cG47sUyRwwYxGM4bR/jDRg5N6it/8+HxCnbT5XNKSKDT9xm4oag/osgqjC2It/vH0yXsomOG6k558g==", + "dependencies": { + "@babel/helper-annotate-as-pure": "^7.22.5", + "@babel/helper-module-imports": "^7.22.5", + "@babel/plugin-syntax-jsx": "^7.22.5", + "lodash": "^4.17.21", + "picomatch": "^2.3.1" + }, "peerDependencies": { - "react": "*" + "styled-components": ">= 2" } }, - "node_modules/react-is": { - "version": "16.13.1", - "resolved": "https://registry.npmjs.org/react-is/-/react-is-16.13.1.tgz", - "integrity": "sha512-24e6ynE2H+OKt4kqsOvNd8kBpV65zoxbA4BVsEOB3ARVWQki/DHzaUoC5KuON/BiccDaCCTZBuOcfZs70kR8bQ==" - }, - "node_modules/react-loader-spinner": { - "version": "5.4.5", - "resolved": "https://registry.npmjs.org/react-loader-spinner/-/react-loader-spinner-5.4.5.tgz", - "integrity": "sha512-32f+sb/v2tnNfyvnCCOS4fpyVHsGXjSyNo6QLniHcaj1XjKLxx14L2z0h6szRugOL8IEJ+53GPwNAdbkDqmy4g==", + "node_modules/react-loader-spinner/node_modules/supports-color": { + "version": "5.5.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz", + "integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==", "dependencies": { - "react-is": "^18.2.0", - "styled-components": "^5.3.5", - "styled-tools": "^1.7.2" + "has-flag": "^3.0.0" }, - "peerDependencies": { - "react": "^16.0.0 || ^17.0.0 || ^18.0.0", - "react-dom": "^16.0.0 || ^17.0.0 || ^18.0.0" + "engines": { + "node": ">=4" } }, - "node_modules/react-loader-spinner/node_modules/react-is": { - "version": "18.3.1", - "resolved": "https://registry.npmjs.org/react-is/-/react-is-18.3.1.tgz", - "integrity": "sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==" - }, "node_modules/react-markdown": { "version": "9.0.1", "resolved": "https://registry.npmjs.org/react-markdown/-/react-markdown-9.0.1.tgz", @@ -9695,51 +12376,6 @@ "react": ">=18" } }, - "node_modules/react-remove-scroll": { - "version": "2.5.5", - "resolved": "https://registry.npmjs.org/react-remove-scroll/-/react-remove-scroll-2.5.5.tgz", - "integrity": "sha512-ImKhrzJJsyXJfBZ4bzu8Bwpka14c/fQt0k+cyFp/PBhTfyDnU5hjOtM4AG/0AMyy8oKzOTR0lDgJIM7pYXI0kw==", - "dependencies": { - "react-remove-scroll-bar": "^2.3.3", - "react-style-singleton": "^2.2.1", - "tslib": "^2.1.0", - "use-callback-ref": "^1.3.0", - "use-sidecar": "^1.1.2" - }, - "engines": { - "node": ">=10" - }, - "peerDependencies": { - "@types/react": "^16.8.0 || ^17.0.0 || ^18.0.0", - "react": "^16.8.0 || ^17.0.0 || ^18.0.0" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/react-remove-scroll-bar": { - "version": "2.3.6", - "resolved": "https://registry.npmjs.org/react-remove-scroll-bar/-/react-remove-scroll-bar-2.3.6.tgz", - "integrity": "sha512-DtSYaao4mBmX+HDo5YWYdBWQwYIQQshUV/dVxFxK+KM26Wjwp1gZ6rv6OC3oujI6Bfu6Xyg3TwK533AQutsn/g==", - "dependencies": { - "react-style-singleton": "^2.2.1", - "tslib": "^2.0.0" - }, - "engines": { - "node": ">=10" - }, - "peerDependencies": { - "@types/react": "^16.8.0 || ^17.0.0 || ^18.0.0", - "react": "^16.8.0 || ^17.0.0 || ^18.0.0" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, "node_modules/react-select": { "version": "5.8.0", "resolved": "https://registry.npmjs.org/react-select/-/react-select-5.8.0.tgz", @@ -9760,35 +12396,22 @@ "react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0" } }, - "node_modules/react-smooth": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/react-smooth/-/react-smooth-4.0.1.tgz", - "integrity": "sha512-OE4hm7XqR0jNOq3Qmk9mFLyd6p2+j6bvbPJ7qlB7+oo0eNcL2l7WQzG6MBnT3EXY6xzkLMUBec3AfewJdA0J8w==", - "dependencies": { - "fast-equals": "^5.0.1", - "prop-types": "^15.8.1", - "react-transition-group": "^4.4.5" - }, - "peerDependencies": { - "react": "^16.8.0 || ^17.0.0 || ^18.0.0", - "react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0" - } - }, - "node_modules/react-style-singleton": { - "version": "2.2.1", - "resolved": "https://registry.npmjs.org/react-style-singleton/-/react-style-singleton-2.2.1.tgz", - "integrity": "sha512-ZWj0fHEMyWkHzKYUr2Bs/4zU6XLmq9HsgBURm7g5pAVfyn49DgUiNgY2d4lXRlYSiCif9YBGpQleewkcqddc7g==", + "node_modules/react-select/node_modules/@emotion/react": { + "version": "11.13.3", + "resolved": "https://registry.npmjs.org/@emotion/react/-/react-11.13.3.tgz", + "integrity": "sha512-lIsdU6JNrmYfJ5EbUCf4xW1ovy5wKQ2CkPRM4xogziOxH1nXxBSjpC9YqbFAP7circxMfYp+6x676BqWcEiixg==", "dependencies": { - "get-nonce": "^1.0.0", - "invariant": "^2.2.4", - "tslib": "^2.0.0" - }, - "engines": { - "node": ">=10" + "@babel/runtime": "^7.18.3", + "@emotion/babel-plugin": "^11.12.0", + "@emotion/cache": "^11.13.0", + "@emotion/serialize": "^1.3.1", + "@emotion/use-insertion-effect-with-fallbacks": "^1.1.0", + "@emotion/utils": "^1.4.0", + "@emotion/weak-memoize": "^0.4.0", + "hoist-non-react-statics": "^3.3.1" }, "peerDependencies": { - "@types/react": "^16.8.0 || ^17.0.0 || ^18.0.0", - "react": "^16.8.0 || ^17.0.0 || ^18.0.0" + "react": ">=16.8.0" }, "peerDependenciesMeta": { "@types/react": { @@ -9796,7 +12419,15 @@ } } }, - "node_modules/react-transition-group": { + "node_modules/react-select/node_modules/@emotion/react/node_modules/@emotion/use-insertion-effect-with-fallbacks": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@emotion/use-insertion-effect-with-fallbacks/-/use-insertion-effect-with-fallbacks-1.1.0.tgz", + "integrity": "sha512-+wBOcIV5snwGgI2ya3u99D7/FJquOIniQT1IKyDsBmEgwvpxMNeS65Oib7OnE2d2aY+3BU4OiH+0Wchf8yk3Hw==", + "peerDependencies": { + "react": ">=16.8.0" + } + }, + "node_modules/react-select/node_modules/react-transition-group": { "version": "4.4.5", "resolved": "https://registry.npmjs.org/react-transition-group/-/react-transition-group-4.4.5.tgz", "integrity": "sha512-pZcd1MCJoiKiBR2NRxeCRg13uCXbydPnmB4EOeRrY7480qNWO8IIgQG6zlDkm6uRMsURXPuKq0GWtiM59a5Q6g==", @@ -9811,13 +12442,17 @@ "react-dom": ">=16.6.0" } }, - "node_modules/react-transition-state": { - "version": "2.1.1", - "resolved": "https://registry.npmjs.org/react-transition-state/-/react-transition-state-2.1.1.tgz", - "integrity": "sha512-kQx5g1FVu9knoz1T1WkapjUgFz08qQ/g1OmuWGi3/AoEFfS0kStxrPlZx81urjCXdz2d+1DqLpU6TyLW/Ro04Q==", + "node_modules/react-select/node_modules/use-isomorphic-layout-effect": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/use-isomorphic-layout-effect/-/use-isomorphic-layout-effect-1.1.2.tgz", + "integrity": "sha512-49L8yCO3iGT/ZF9QttjwLF/ZD9Iwto5LnH5LmEdk/6cFmXddqi2ulF0edxTwjj+7mqvpVVGQWvbXZdn32wRSHA==", "peerDependencies": { - "react": ">=16.8.0", - "react-dom": ">=16.8.0" + "react": "^16.8.0 || ^17.0.0 || ^18.0.0" + }, + "peerDependenciesMeta": { + "@types/react": { + "optional": true + } } }, "node_modules/read-cache": { @@ -9828,19 +12463,6 @@ "pify": "^2.3.0" } }, - "node_modules/readable-stream": { - "version": "3.6.2", - "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", - "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", - "dependencies": { - "inherits": "^2.0.3", - "string_decoder": "^1.1.1", - "util-deprecate": "^1.0.1" - }, - "engines": { - "node": ">= 6" - } - }, "node_modules/readdirp": { "version": "3.6.0", "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-3.6.0.tgz", @@ -9853,14 +12475,15 @@ } }, "node_modules/recharts": { - "version": "2.12.7", - "resolved": "https://registry.npmjs.org/recharts/-/recharts-2.12.7.tgz", - "integrity": "sha512-hlLJMhPQfv4/3NBSAyq3gzGg4h2v69RJh6KU7b3pXYNNAELs9kEoXOjbkxdXpALqKBoVmVptGfLpxdaVYqjmXQ==", + "version": "2.13.1", + "resolved": "https://registry.npmjs.org/recharts/-/recharts-2.13.1.tgz", + "integrity": "sha512-87LdsmgK/MHLmWQfTC6yDysno2cOigi/+2KRCwy0D8NDu1IOdtTGS8lMovA0VIvJ7kf3zdp1IiwznHZWSPJhYw==", + "license": "MIT", "dependencies": { "clsx": "^2.0.0", "eventemitter3": "^4.0.1", "lodash": "^4.17.21", - "react-is": "^16.10.2", + "react-is": "^18.3.1", "react-smooth": "^4.0.0", "recharts-scale": "^0.4.4", "tiny-invariant": "^1.3.1", @@ -9882,6 +12505,35 @@ "decimal.js-light": "^2.4.1" } }, + "node_modules/recharts/node_modules/react-smooth": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/react-smooth/-/react-smooth-4.0.1.tgz", + "integrity": "sha512-OE4hm7XqR0jNOq3Qmk9mFLyd6p2+j6bvbPJ7qlB7+oo0eNcL2l7WQzG6MBnT3EXY6xzkLMUBec3AfewJdA0J8w==", + "dependencies": { + "fast-equals": "^5.0.1", + "prop-types": "^15.8.1", + "react-transition-group": "^4.4.5" + }, + "peerDependencies": { + "react": "^16.8.0 || ^17.0.0 || ^18.0.0", + "react-dom": "^16.8.0 || ^17.0.0 || ^18.0.0" + } + }, + "node_modules/recharts/node_modules/react-smooth/node_modules/react-transition-group": { + "version": "4.4.5", + "resolved": "https://registry.npmjs.org/react-transition-group/-/react-transition-group-4.4.5.tgz", + "integrity": "sha512-pZcd1MCJoiKiBR2NRxeCRg13uCXbydPnmB4EOeRrY7480qNWO8IIgQG6zlDkm6uRMsURXPuKq0GWtiM59a5Q6g==", + "dependencies": { + "@babel/runtime": "^7.5.5", + "dom-helpers": "^5.0.1", + "loose-envify": "^1.4.0", + "prop-types": "^15.6.2" + }, + "peerDependencies": { + "react": ">=16.6.0", + "react-dom": ">=16.6.0" + } + }, "node_modules/reflect.getprototypeof": { "version": "1.0.6", "resolved": "https://registry.npmjs.org/reflect.getprototypeof/-/reflect.getprototypeof-1.0.6.tgz", @@ -10043,6 +12695,19 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/require-in-the-middle": { + "version": "7.4.0", + "resolved": "https://registry.npmjs.org/require-in-the-middle/-/require-in-the-middle-7.4.0.tgz", + "integrity": "sha512-X34iHADNbNDfr6OTStIAHWSAvvKQRYgLO6duASaVf7J2VA3lvmNYboAHOuLC2huav1IwgZJtyEcJCKVzFxOSMQ==", + "dependencies": { + "debug": "^4.3.5", + "module-details-from-path": "^1.0.3", + "resolve": "^1.22.8" + }, + "engines": { + "node": ">=8.6.0" + } + }, "node_modules/resolve": { "version": "1.22.8", "resolved": "https://registry.npmjs.org/resolve/-/resolve-1.22.8.tgz", @@ -10120,6 +12785,21 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/rollup": { + "version": "3.29.5", + "resolved": "https://registry.npmjs.org/rollup/-/rollup-3.29.5.tgz", + "integrity": "sha512-GVsDdsbJzzy4S/v3dqWPJ7EfvZJfCHiDqe80IyrF59LYuP+e6U1LJoUqeuqRbwAWoMNoXivMNeNAOf5E22VA1w==", + "bin": { + "rollup": "dist/bin/rollup" + }, + "engines": { + "node": ">=14.18.0", + "npm": ">=8.0.0" + }, + "optionalDependencies": { + "fsevents": "~2.3.2" + } + }, "node_modules/run-parallel": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz", @@ -10177,7 +12857,8 @@ "type": "consulting", "url": "https://feross.org/support" } - ] + ], + "peer": true }, "node_modules/safe-regex-test": { "version": "1.0.3", @@ -10204,10 +12885,28 @@ "loose-envify": "^1.1.0" } }, + "node_modules/schema-utils": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/schema-utils/-/schema-utils-3.3.0.tgz", + "integrity": "sha512-pN/yOAvcC+5rQ5nERGuwrjLlYvLTbCibnZ1I7B1LaiAz9BRBlE9GMgE/eqV30P7aJQUf7Ddimy/RsbYO/GrVGg==", + "peer": true, + "dependencies": { + "@types/json-schema": "^7.0.8", + "ajv": "^6.12.5", + "ajv-keywords": "^3.5.2" + }, + "engines": { + "node": ">= 10.13.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/webpack" + } + }, "node_modules/semver": { - "version": "7.6.2", - "resolved": "https://registry.npmjs.org/semver/-/semver-7.6.2.tgz", - "integrity": "sha512-FNAIBWCx9qcRhoHcgcJ0gvU7SN1lYU2ZXuSfl04bSC5OpvDHFyJCjdNHomPXxjQlCBU67YW64PzY7/VIEH7F2w==", + "version": "7.6.3", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.6.3.tgz", + "integrity": "sha512-oVekP1cKtI+CTDvHWYFUcMtsK/00wmAEfyqKfNdARm8u1wNVhSgaX7A8d4UuIlUI5e84iEwOhs7ZPYRmzU9U6A==", "bin": { "semver": "bin/semver.js" }, @@ -10215,11 +12914,19 @@ "node": ">=10" } }, + "node_modules/serialize-javascript": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-6.0.2.tgz", + "integrity": "sha512-Saa1xPByTTq2gdeFZYLLo+RFE35NHZkAbqZeWNd3BpzppeVisAqpDjcp8dyf6uIvEqJRd46jemmyA4iFIeVk8g==", + "peer": true, + "dependencies": { + "randombytes": "^2.1.0" + } + }, "node_modules/set-function-length": { "version": "1.2.2", "resolved": "https://registry.npmjs.org/set-function-length/-/set-function-length-1.2.2.tgz", "integrity": "sha512-pgRc4hJ4/sNjWCSS9AmnS40x3bNMDTknHgL5UaMBTMyJnU90EgWh1Rz+MC9eFu4BuN/UwZjKQuY/1v3rM7HMfg==", - "dev": true, "dependencies": { "define-data-property": "^1.1.4", "es-errors": "^1.3.0", @@ -10253,25 +12960,41 @@ "integrity": "sha512-y0m1JoUZSlPAjXVtPPW70aZWfIL/dSP7AFkRnniLCrK/8MDKog3TySTBmckD+RObVxH0v4Tox67+F14PdED2oQ==" }, "node_modules/sharp": { - "version": "0.32.6", - "resolved": "https://registry.npmjs.org/sharp/-/sharp-0.32.6.tgz", - "integrity": "sha512-KyLTWwgcR9Oe4d9HwCwNM2l7+J0dUQwn/yf7S0EnTtb0eVS4RxO0eUSvxPtzT4F3SY+C4K6fqdv/DO27sJ/v/w==", + "version": "0.33.5", + "resolved": "https://registry.npmjs.org/sharp/-/sharp-0.33.5.tgz", + "integrity": "sha512-haPVm1EkS9pgvHrQ/F3Xy+hgcuMV0Wm9vfIBSiwZ05k+xgb0PkBQpGsAA/oWdDobNaZTH5ppvHtzCFbnSEwHVw==", "hasInstallScript": true, "dependencies": { "color": "^4.2.3", - "detect-libc": "^2.0.2", - "node-addon-api": "^6.1.0", - "prebuild-install": "^7.1.1", - "semver": "^7.5.4", - "simple-get": "^4.0.1", - "tar-fs": "^3.0.4", - "tunnel-agent": "^0.6.0" + "detect-libc": "^2.0.3", + "semver": "^7.6.3" }, "engines": { - "node": ">=14.15.0" + "node": "^18.17.0 || ^20.3.0 || >=21.0.0" }, "funding": { "url": "https://opencollective.com/libvips" + }, + "optionalDependencies": { + "@img/sharp-darwin-arm64": "0.33.5", + "@img/sharp-darwin-x64": "0.33.5", + "@img/sharp-libvips-darwin-arm64": "1.0.4", + "@img/sharp-libvips-darwin-x64": "1.0.4", + "@img/sharp-libvips-linux-arm": "1.0.5", + "@img/sharp-libvips-linux-arm64": "1.0.4", + "@img/sharp-libvips-linux-s390x": "1.0.4", + "@img/sharp-libvips-linux-x64": "1.0.4", + "@img/sharp-libvips-linuxmusl-arm64": "1.0.4", + "@img/sharp-libvips-linuxmusl-x64": "1.0.4", + "@img/sharp-linux-arm": "0.33.5", + "@img/sharp-linux-arm64": "0.33.5", + "@img/sharp-linux-s390x": "0.33.5", + "@img/sharp-linux-x64": "0.33.5", + "@img/sharp-linuxmusl-arm64": "0.33.5", + "@img/sharp-linuxmusl-x64": "0.33.5", + "@img/sharp-wasm32": "0.33.5", + "@img/sharp-win32-ia32": "0.33.5", + "@img/sharp-win32-x64": "0.33.5" } }, "node_modules/shebang-command": { @@ -10293,11 +13016,15 @@ "node": ">=8" } }, + "node_modules/shimmer": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/shimmer/-/shimmer-1.2.1.tgz", + "integrity": "sha512-sQTKC1Re/rM6XyFM6fIAGHRPVGvyXfgzIDvzoq608vM+jeyVD0Tu1E6Np0Kc2zAIFWIj963V2800iF/9LPieQw==" + }, "node_modules/side-channel": { "version": "1.0.6", "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.0.6.tgz", "integrity": "sha512-fDW/EZ6Q9RiO8eFG8Hj+7u/oW+XrPTIChwCOM2+th2A6OblDtYYIpve9m+KvI9Z4C9qSEXlaGR6bTEYHReuglA==", - "dev": true, "dependencies": { "call-bind": "^1.0.7", "es-errors": "^1.3.0", @@ -10322,49 +13049,6 @@ "url": "https://github.com/sponsors/isaacs" } }, - "node_modules/simple-concat": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/simple-concat/-/simple-concat-1.0.1.tgz", - "integrity": "sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ] - }, - "node_modules/simple-get": { - "version": "4.0.1", - "resolved": "https://registry.npmjs.org/simple-get/-/simple-get-4.0.1.tgz", - "integrity": "sha512-brv7p5WgH0jmQJr1ZDDfKDOSeWWg+OVypG99A/5vYGPqJ6pxiaHLy8nxtFjBA7oMa01ebA9gfh1uMCFqOuXxvA==", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/feross" - }, - { - "type": "patreon", - "url": "https://www.patreon.com/feross" - }, - { - "type": "consulting", - "url": "https://feross.org/support" - } - ], - "dependencies": { - "decompress-response": "^6.0.0", - "once": "^1.3.1", - "simple-concat": "^1.0.0" - } - }, "node_modules/simple-swizzle": { "version": "0.2.2", "resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.2.tgz", @@ -10390,10 +13074,29 @@ "node": ">=0.10.0" } }, - "node_modules/source-map-js": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.0.tgz", - "integrity": "sha512-itJW8lvSA0TXEphiRoawsCksnlf8SyvmFzIhltqAHluXd88pkCd+cXJVHTDwdCr0IzwptSm035IHQktUu1QUMg==", + "node_modules/source-map-js": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.0.tgz", + "integrity": "sha512-itJW8lvSA0TXEphiRoawsCksnlf8SyvmFzIhltqAHluXd88pkCd+cXJVHTDwdCr0IzwptSm035IHQktUu1QUMg==", + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/source-map-support": { + "version": "0.5.21", + "resolved": "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.21.tgz", + "integrity": "sha512-uBHU3L3czsIyYXKX88fdrGovxdSCoTGDRZ6SYXtSRxLZUzHg5P/66Ht6uoUlHu9EZod+inXhKo3qQgwXUT/y1w==", + "peer": true, + "dependencies": { + "buffer-from": "^1.0.0", + "source-map": "^0.6.0" + } + }, + "node_modules/source-map-support/node_modules/source-map": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", + "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", + "peer": true, "engines": { "node": ">=0.10.0" } @@ -10407,6 +13110,25 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/stacktrace-parser": { + "version": "0.1.10", + "resolved": "https://registry.npmjs.org/stacktrace-parser/-/stacktrace-parser-0.1.10.tgz", + "integrity": "sha512-KJP1OCML99+8fhOHxwwzyWrlUuVX5GQ0ZpJTd1DFXhdkrvg1szxfHhawXUZ3g9TkXORQd4/WG68jMlQZ2p8wlg==", + "dependencies": { + "type-fest": "^0.7.1" + }, + "engines": { + "node": ">=6" + } + }, + "node_modules/stacktrace-parser/node_modules/type-fest": { + "version": "0.7.1", + "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.7.1.tgz", + "integrity": "sha512-Ne2YiiGN8bmrmJJEuTWTLJR32nh/JdL1+PSicowtNb0WFpn59GK8/lfD61bVtzguz7b3PBt74nxpv/Pw5po5Rg==", + "engines": { + "node": ">=8" + } + }, "node_modules/streamsearch": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/streamsearch/-/streamsearch-1.1.0.tgz", @@ -10415,26 +13137,6 @@ "node": ">=10.0.0" } }, - "node_modules/streamx": { - "version": "2.16.1", - "resolved": "https://registry.npmjs.org/streamx/-/streamx-2.16.1.tgz", - "integrity": "sha512-m9QYj6WygWyWa3H1YY69amr4nVgy61xfjys7xO7kviL5rfIEc2naf+ewFiOA+aEJD7y0JO3h2GoiUv4TDwEGzQ==", - "dependencies": { - "fast-fifo": "^1.1.0", - "queue-tick": "^1.0.1" - }, - "optionalDependencies": { - "bare-events": "^2.2.0" - } - }, - "node_modules/string_decoder": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", - "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==", - "dependencies": { - "safe-buffer": "~5.2.0" - } - }, "node_modules/string-width": { "version": "5.1.2", "resolved": "https://registry.npmjs.org/string-width/-/string-width-5.1.2.tgz", @@ -10627,66 +13329,30 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/style-to-object": { - "version": "1.0.6", - "resolved": "https://registry.npmjs.org/style-to-object/-/style-to-object-1.0.6.tgz", - "integrity": "sha512-khxq+Qm3xEyZfKd/y9L3oIWQimxuc4STrQKtQn8aSDRHb8mFgpukgX1hdzfrMEW6JCjyJ8p89x+IUMVnCBI1PA==", - "dependencies": { - "inline-style-parser": "0.2.3" - } - }, - "node_modules/styled-components": { - "version": "5.3.11", - "resolved": "https://registry.npmjs.org/styled-components/-/styled-components-5.3.11.tgz", - "integrity": "sha512-uuzIIfnVkagcVHv9nE0VPlHPSCmXIUGKfJ42LNjxCCTDTL5sgnJ8Z7GZBq0EnLYGln77tPpEpExt2+qa+cZqSw==", + "node_modules/stripe": { + "version": "17.0.0", + "resolved": "https://registry.npmjs.org/stripe/-/stripe-17.0.0.tgz", + "integrity": "sha512-URKpnjH2O+OWxhvXLIaEIaAkp2fQvqITm/3zJS0a3nGCREjH3qJYxmGowngA46Qu1x2MumNL3Y/OdY6uzIhpCQ==", "dependencies": { - "@babel/helper-module-imports": "^7.0.0", - "@babel/traverse": "^7.4.5", - "@emotion/is-prop-valid": "^1.1.0", - "@emotion/stylis": "^0.8.4", - "@emotion/unitless": "^0.7.4", - "babel-plugin-styled-components": ">= 1.12.0", - "css-to-react-native": "^3.0.0", - "hoist-non-react-statics": "^3.0.0", - "shallowequal": "^1.1.0", - "supports-color": "^5.5.0" + "@types/node": ">=8.1.0", + "qs": "^6.11.0" }, "engines": { - "node": ">=10" - }, - "funding": { - "type": "opencollective", - "url": "https://opencollective.com/styled-components" - }, - "peerDependencies": { - "react": ">= 16.8.0", - "react-dom": ">= 16.8.0", - "react-is": ">= 16.8.0" - } - }, - "node_modules/styled-components/node_modules/has-flag": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz", - "integrity": "sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw==", - "engines": { - "node": ">=4" + "node": ">=12.*" } }, - "node_modules/styled-components/node_modules/supports-color": { - "version": "5.5.0", - "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz", - "integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==", + "node_modules/style-to-object": { + "version": "1.0.6", + "resolved": "https://registry.npmjs.org/style-to-object/-/style-to-object-1.0.6.tgz", + "integrity": "sha512-khxq+Qm3xEyZfKd/y9L3oIWQimxuc4STrQKtQn8aSDRHb8mFgpukgX1hdzfrMEW6JCjyJ8p89x+IUMVnCBI1PA==", "dependencies": { - "has-flag": "^3.0.0" - }, - "engines": { - "node": ">=4" + "inline-style-parser": "0.2.3" } }, "node_modules/styled-jsx": { - "version": "5.1.1", - "resolved": "https://registry.npmjs.org/styled-jsx/-/styled-jsx-5.1.1.tgz", - "integrity": "sha512-pW7uC1l4mBZ8ugbiZrcIsiIvVx1UmTfw7UkC3Um2tmfUq9Bhk8IiyEIPl6F8agHgjzku6j0xQEZbfA5uSgSaCw==", + "version": "5.1.6", + "resolved": "https://registry.npmjs.org/styled-jsx/-/styled-jsx-5.1.6.tgz", + "integrity": "sha512-qSVyDTeMotdvQYoHWLNGwRFJHC+i+ZvdBRYosOFgC+Wg1vx4frN2/RG/NA7SYqqvKNLf39P2LSRA2pu6n0XYZA==", "dependencies": { "client-only": "0.0.1" }, @@ -10694,7 +13360,7 @@ "node": ">= 12.0.0" }, "peerDependencies": { - "react": ">= 16.8.0 || 17.x.x || ^18.0.0-0" + "react": ">= 16.8.0 || 17.x.x || ^18.0.0-0 || ^19.0.0-0" }, "peerDependenciesMeta": { "@babel/core": { @@ -10740,7 +13406,6 @@ "version": "7.2.0", "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", "integrity": "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==", - "dev": true, "dependencies": { "has-flag": "^4.0.0" }, @@ -10771,15 +13436,24 @@ "react": "^16.11.0 || ^17.0.0 || ^18.0.0" } }, + "node_modules/swr/node_modules/use-sync-external-store": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/use-sync-external-store/-/use-sync-external-store-1.2.2.tgz", + "integrity": "sha512-PElTlVMwpblvbNqQ82d2n6RjStvdSoNe9FG28kNfz3WiXilJm4DdNkEzRhCZuIDwY8U08WVihhGR5iRqAwfDiw==", + "peerDependencies": { + "react": "^16.8.0 || ^17.0.0 || ^18.0.0" + } + }, "node_modules/tabbable": { "version": "6.2.0", "resolved": "https://registry.npmjs.org/tabbable/-/tabbable-6.2.0.tgz", "integrity": "sha512-Cat63mxsVJlzYvN51JmVXIgNoUokrIaT2zLclCXjRd8boZ0004U4KCs/sToJ75C6sdlByWxpYnb5Boif1VSFew==" }, "node_modules/tailwind-merge": { - "version": "1.14.0", - "resolved": "https://registry.npmjs.org/tailwind-merge/-/tailwind-merge-1.14.0.tgz", - "integrity": "sha512-3mFKyCo/MBcgyOTlrY8T7odzZFx+w+qKSMAmdFzRvqBfLlSigU6TZnlFHK0lkMwj9Bj8OYU+9yW9lmGuS0QEnQ==", + "version": "2.5.4", + "resolved": "https://registry.npmjs.org/tailwind-merge/-/tailwind-merge-2.5.4.tgz", + "integrity": "sha512-0q8cfZHMu9nuYP/b5Shb7Y7Sh1B7Nnl5GqNr1U+n2p6+mybvRtayrQ+0042Z5byvTA8ihjlP8Odo8/VnHbZu4Q==", + "license": "MIT", "funding": { "type": "github", "url": "https://github.com/sponsors/dcastil" @@ -10821,6 +13495,15 @@ "node": ">=14.0.0" } }, + "node_modules/tailwindcss-animate": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/tailwindcss-animate/-/tailwindcss-animate-1.0.7.tgz", + "integrity": "sha512-bl6mpH3T7I3UFxuvDEXLxy/VuFxBk5bbzplh7tXI68mwMokNYd1t9qPBHlnyTwfa4JGC4zP516I1hYYtQ/vspA==", + "license": "MIT", + "peerDependencies": { + "tailwindcss": ">=3.0.0 || insiders" + } + }, "node_modules/tailwindcss/node_modules/postcss-selector-parser": { "version": "6.0.16", "resolved": "https://registry.npmjs.org/postcss-selector-parser/-/postcss-selector-parser-6.0.16.tgz", @@ -10837,34 +13520,68 @@ "version": "2.2.1", "resolved": "https://registry.npmjs.org/tapable/-/tapable-2.2.1.tgz", "integrity": "sha512-GNzQvQTOIP6RyTfE2Qxb8ZVlNmw0n88vp1szwWRimP02mnTsx3Wtn5qRdqY9w2XduFNUgvOwhNnQsjwCp+kqaQ==", - "dev": true, "engines": { "node": ">=6" } }, - "node_modules/tar-fs": { - "version": "3.0.6", - "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.0.6.tgz", - "integrity": "sha512-iokBDQQkUyeXhgPYaZxmczGPhnhXZ0CmrqI+MOb/WFGS9DW5wnfrLgtjUJBvz50vQ3qfRwJ62QVoCFu8mPVu5w==", + "node_modules/terser": { + "version": "5.34.1", + "resolved": "https://registry.npmjs.org/terser/-/terser-5.34.1.tgz", + "integrity": "sha512-FsJZ7iZLd/BXkz+4xrRTGJ26o/6VTjQytUk8b8OxkwcD2I+79VPJlz7qss1+zE7h8GNIScFqXcDyJ/KqBYZFVA==", + "peer": true, "dependencies": { - "pump": "^3.0.0", - "tar-stream": "^3.1.5" + "@jridgewell/source-map": "^0.3.3", + "acorn": "^8.8.2", + "commander": "^2.20.0", + "source-map-support": "~0.5.20" }, - "optionalDependencies": { - "bare-fs": "^2.1.1", - "bare-path": "^2.1.0" + "bin": { + "terser": "bin/terser" + }, + "engines": { + "node": ">=10" } }, - "node_modules/tar-stream": { - "version": "3.1.7", - "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.1.7.tgz", - "integrity": "sha512-qJj60CXt7IU1Ffyc3NJMjh6EkuCFej46zUqJ4J7pqYlThyd9bO0XBTmcOIhSzZJVWfsLks0+nle/j538YAW9RQ==", + "node_modules/terser-webpack-plugin": { + "version": "5.3.10", + "resolved": "https://registry.npmjs.org/terser-webpack-plugin/-/terser-webpack-plugin-5.3.10.tgz", + "integrity": "sha512-BKFPWlPDndPs+NGGCr1U59t0XScL5317Y0UReNrHaw9/FwhPENlq6bfgs+4yPfyP51vqC1bQ4rp1EfXW5ZSH9w==", + "peer": true, "dependencies": { - "b4a": "^1.6.4", - "fast-fifo": "^1.2.0", - "streamx": "^2.15.0" + "@jridgewell/trace-mapping": "^0.3.20", + "jest-worker": "^27.4.5", + "schema-utils": "^3.1.1", + "serialize-javascript": "^6.0.1", + "terser": "^5.26.0" + }, + "engines": { + "node": ">= 10.13.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/webpack" + }, + "peerDependencies": { + "webpack": "^5.1.0" + }, + "peerDependenciesMeta": { + "@swc/core": { + "optional": true + }, + "esbuild": { + "optional": true + }, + "uglify-js": { + "optional": true + } } }, + "node_modules/terser/node_modules/commander": { + "version": "2.20.3", + "resolved": "https://registry.npmjs.org/commander/-/commander-2.20.3.tgz", + "integrity": "sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ==", + "peer": true + }, "node_modules/text-table": { "version": "0.2.0", "resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz", @@ -10905,14 +13622,6 @@ "resolved": "https://registry.npmjs.org/tiny-warning/-/tiny-warning-1.0.3.tgz", "integrity": "sha512-lBN9zLN/oAf68o3zNXYrdCt1kP8WsiGW8Oo2ka41b2IM5JL/S1CTyX1rW0mb/zSuJun0ZUrDxx4sqvYS2FWzPA==" }, - "node_modules/to-fast-properties": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/to-fast-properties/-/to-fast-properties-2.0.0.tgz", - "integrity": "sha512-/OaKK0xYrs3DmxRYqL/yDc+FxFUVYhDlXMhRmv3z915w2HF1tnN1omB354j8VUGO/hbRzyD6Y3sA7v7GS/ceog==", - "engines": { - "node": ">=4" - } - }, "node_modules/to-regex-range": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/to-regex-range/-/to-regex-range-5.0.1.tgz", @@ -10929,6 +13638,11 @@ "resolved": "https://registry.npmjs.org/toposort/-/toposort-2.0.2.tgz", "integrity": "sha512-0a5EOkAUp8D4moMi2W8ZF8jcga7BgZd91O/yabJCFY8az+XSzeGyTKs0Aoo897iV1Nj6guFq8orWDS96z91oGg==" }, + "node_modules/tr46": { + "version": "0.0.3", + "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", + "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==" + }, "node_modules/trim-lines": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/trim-lines/-/trim-lines-3.0.1.tgz", @@ -10993,17 +13707,6 @@ "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.2.tgz", "integrity": "sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q==" }, - "node_modules/tunnel-agent": { - "version": "0.6.0", - "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz", - "integrity": "sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w==", - "dependencies": { - "safe-buffer": "^5.0.1" - }, - "engines": { - "node": "*" - } - }, "node_modules/type-check": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.4.0.tgz", @@ -11232,6 +13935,17 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/unplugin": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/unplugin/-/unplugin-1.0.1.tgz", + "integrity": "sha512-aqrHaVBWW1JVKBHmGo33T5TxeL0qWzfvjWokObHA9bYmN7eNDkwOxmLjhioHl9878qDFMAaT51XNroRyuz7WxA==", + "dependencies": { + "acorn": "^8.8.1", + "chokidar": "^3.5.3", + "webpack-sources": "^3.2.3", + "webpack-virtual-modules": "^0.5.0" + } + }, "node_modules/update-browserslist-db": { "version": "1.0.16", "resolved": "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.0.16.tgz", @@ -11265,73 +13979,10 @@ "version": "4.4.1", "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz", "integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==", - "dev": true, "dependencies": { "punycode": "^2.1.0" } }, - "node_modules/use-callback-ref": { - "version": "1.3.2", - "resolved": "https://registry.npmjs.org/use-callback-ref/-/use-callback-ref-1.3.2.tgz", - "integrity": "sha512-elOQwe6Q8gqZgDA8mrh44qRTQqpIHDcZ3hXTLjBe1i4ph8XpNJnO+aQf3NaG+lriLopI4HMx9VjQLfPQ6vhnoA==", - "dependencies": { - "tslib": "^2.0.0" - }, - "engines": { - "node": ">=10" - }, - "peerDependencies": { - "@types/react": "^16.8.0 || ^17.0.0 || ^18.0.0", - "react": "^16.8.0 || ^17.0.0 || ^18.0.0" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/use-isomorphic-layout-effect": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/use-isomorphic-layout-effect/-/use-isomorphic-layout-effect-1.1.2.tgz", - "integrity": "sha512-49L8yCO3iGT/ZF9QttjwLF/ZD9Iwto5LnH5LmEdk/6cFmXddqi2ulF0edxTwjj+7mqvpVVGQWvbXZdn32wRSHA==", - "peerDependencies": { - "react": "^16.8.0 || ^17.0.0 || ^18.0.0" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/use-sidecar": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/use-sidecar/-/use-sidecar-1.1.2.tgz", - "integrity": "sha512-epTbsLuzZ7lPClpz2TyryBfztm7m+28DlEv2ZCQ3MDr5ssiwyOwGH/e5F9CkfWjJ1t4clvI58yF822/GUkjjhw==", - "dependencies": { - "detect-node-es": "^1.1.0", - "tslib": "^2.0.0" - }, - "engines": { - "node": ">=10" - }, - "peerDependencies": { - "@types/react": "^16.9.0 || ^17.0.0 || ^18.0.0", - "react": "^16.8.0 || ^17.0.0 || ^18.0.0" - }, - "peerDependenciesMeta": { - "@types/react": { - "optional": true - } - } - }, - "node_modules/use-sync-external-store": { - "version": "1.2.2", - "resolved": "https://registry.npmjs.org/use-sync-external-store/-/use-sync-external-store-1.2.2.tgz", - "integrity": "sha512-PElTlVMwpblvbNqQ82d2n6RjStvdSoNe9FG28kNfz3WiXilJm4DdNkEzRhCZuIDwY8U08WVihhGR5iRqAwfDiw==", - "peerDependencies": { - "react": "^16.8.0 || ^17.0.0 || ^18.0.0" - } - }, "node_modules/util-deprecate": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", @@ -11410,6 +14061,19 @@ "d3-timer": "^3.0.1" } }, + "node_modules/watchpack": { + "version": "2.4.2", + "resolved": "https://registry.npmjs.org/watchpack/-/watchpack-2.4.2.tgz", + "integrity": "sha512-TnbFSbcOCcDgjZ4piURLCbJ3nJhznVh9kw6F6iokjiFPl8ONxe9A6nMDVXDiNbrSfLILs6vB07F7wLBrwPYzJw==", + "peer": true, + "dependencies": { + "glob-to-regexp": "^0.4.1", + "graceful-fs": "^4.1.2" + }, + "engines": { + "node": ">=10.13.0" + } + }, "node_modules/web-namespaces": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/web-namespaces/-/web-namespaces-2.0.1.tgz", @@ -11419,6 +14083,106 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/web-vitals": { + "version": "4.2.4", + "resolved": "https://registry.npmjs.org/web-vitals/-/web-vitals-4.2.4.tgz", + "integrity": "sha512-r4DIlprAGwJ7YM11VZp4R884m0Vmgr6EAKe3P+kO0PPj3Unqyvv59rczf6UiGcb9Z8QxZVcqKNwv/g0WNdWwsw==" + }, + "node_modules/webidl-conversions": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", + "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==" + }, + "node_modules/webpack": { + "version": "5.95.0", + "resolved": "https://registry.npmjs.org/webpack/-/webpack-5.95.0.tgz", + "integrity": "sha512-2t3XstrKULz41MNMBF+cJ97TyHdyQ8HCt//pqErqDvNjU9YQBnZxIHa11VXsi7F3mb5/aO2tuDxdeTPdU7xu9Q==", + "peer": true, + "dependencies": { + "@types/estree": "^1.0.5", + "@webassemblyjs/ast": "^1.12.1", + "@webassemblyjs/wasm-edit": "^1.12.1", + "@webassemblyjs/wasm-parser": "^1.12.1", + "acorn": "^8.7.1", + "acorn-import-attributes": "^1.9.5", + "browserslist": "^4.21.10", + "chrome-trace-event": "^1.0.2", + "enhanced-resolve": "^5.17.1", + "es-module-lexer": "^1.2.1", + "eslint-scope": "5.1.1", + "events": "^3.2.0", + "glob-to-regexp": "^0.4.1", + "graceful-fs": "^4.2.11", + "json-parse-even-better-errors": "^2.3.1", + "loader-runner": "^4.2.0", + "mime-types": "^2.1.27", + "neo-async": "^2.6.2", + "schema-utils": "^3.2.0", + "tapable": "^2.1.1", + "terser-webpack-plugin": "^5.3.10", + "watchpack": "^2.4.1", + "webpack-sources": "^3.2.3" + }, + "bin": { + "webpack": "bin/webpack.js" + }, + "engines": { + "node": ">=10.13.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/webpack" + }, + "peerDependenciesMeta": { + "webpack-cli": { + "optional": true + } + } + }, + "node_modules/webpack-sources": { + "version": "3.2.3", + "resolved": "https://registry.npmjs.org/webpack-sources/-/webpack-sources-3.2.3.tgz", + "integrity": "sha512-/DyMEOrDgLKKIG0fmvtz+4dUX/3Ghozwgm6iPp8KRhvn+eQf9+Q7GWxVNMk3+uCPWfdXYC4ExGBckIXdFEfH1w==", + "engines": { + "node": ">=10.13.0" + } + }, + "node_modules/webpack-virtual-modules": { + "version": "0.5.0", + "resolved": "https://registry.npmjs.org/webpack-virtual-modules/-/webpack-virtual-modules-0.5.0.tgz", + "integrity": "sha512-kyDivFZ7ZM0BVOUteVbDFhlRt7Ah/CSPwJdi8hBpkK7QLumUqdLtVfm/PX/hkcnrvr0i77fO5+TjZ94Pe+C9iw==" + }, + "node_modules/webpack/node_modules/eslint-scope": { + "version": "5.1.1", + "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-5.1.1.tgz", + "integrity": "sha512-2NxwbF/hZ0KpepYN0cNbo+FN6XoK7GaHlQhgx/hIZl6Va0bF45RQOOwhLIy8lQDbuCiadSLCBnH2CFYquit5bw==", + "peer": true, + "dependencies": { + "esrecurse": "^4.3.0", + "estraverse": "^4.1.1" + }, + "engines": { + "node": ">=8.0.0" + } + }, + "node_modules/webpack/node_modules/estraverse": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-4.3.0.tgz", + "integrity": "sha512-39nnKffWz8xN1BU/2c79n9nB9HDzo0niYUqx6xyqUnyoAnQyyWpOTdZEeiCch8BBu515t4wp9ZmgVfVhn9EBpw==", + "peer": true, + "engines": { + "node": ">=4.0" + } + }, + "node_modules/whatwg-url": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", + "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", + "dependencies": { + "tr46": "~0.0.3", + "webidl-conversions": "^3.0.0" + } + }, "node_modules/which": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", @@ -11611,13 +14375,21 @@ "node_modules/wrappy": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", - "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==" + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", + "dev": true + }, + "node_modules/xtend": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/xtend/-/xtend-4.0.2.tgz", + "integrity": "sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==", + "engines": { + "node": ">=0.4" + } }, "node_modules/yallist": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/yallist/-/yallist-3.1.1.tgz", - "integrity": "sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==", - "peer": true + "integrity": "sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==" }, "node_modules/yaml": { "version": "2.4.2", @@ -11634,7 +14406,6 @@ "version": "0.1.0", "resolved": "https://registry.npmjs.org/yocto-queue/-/yocto-queue-0.1.0.tgz", "integrity": "sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==", - "dev": true, "engines": { "node": ">=10" }, @@ -11646,6 +14417,7 @@ "version": "1.4.0", "resolved": "https://registry.npmjs.org/yup/-/yup-1.4.0.tgz", "integrity": "sha512-wPbgkJRCqIf+OHyiTBQoJiP5PFuAXaWiJK6AmYkzQAh5/c2K9hzSApBZG5wV9KoKSePF7sAxmNSvh/13YHkFDg==", + "license": "MIT", "dependencies": { "property-expr": "^2.0.5", "tiny-case": "^1.0.3", diff --git a/web/package.json b/web/package.json index 190ec9b1aa6..6f8aed2e738 100644 --- a/web/package.json +++ b/web/package.json @@ -3,7 +3,7 @@ "version": "0.2-dev", "private": true, "scripts": { - "dev": "next dev", + "dev": "next dev --turbopack", "build": "next build", "start": "next start", "lint": "next lint" @@ -12,11 +12,19 @@ "@dnd-kit/core": "^6.1.0", "@dnd-kit/modifiers": "^7.0.0", "@dnd-kit/sortable": "^8.0.0", + "@dnd-kit/utilities": "^3.2.2", + "@headlessui/react": "^2.2.0", + "@headlessui/tailwindcss": "^0.2.1", "@phosphor-icons/react": "^2.0.8", "@radix-ui/react-dialog": "^1.0.5", - "@radix-ui/react-popover": "^1.0.7", - "@radix-ui/react-tooltip": "^1.0.7", - "@tremor/react": "^3.9.2", + "@radix-ui/react-popover": "^1.1.2", + "@radix-ui/react-select": "^2.1.2", + "@radix-ui/react-separator": "^1.1.0", + "@radix-ui/react-slot": "^1.1.0", + "@radix-ui/react-tabs": "^1.1.1", + "@radix-ui/react-tooltip": "^1.1.3", + "@sentry/nextjs": "^8.34.0", + "@stripe/stripe-js": "^4.6.0", "@types/js-cookie": "^3.0.3", "@types/lodash": "^4.17.0", "@types/node": "18.15.11", @@ -25,35 +33,48 @@ "@types/react-dom": "18.0.11", "@types/uuid": "^9.0.8", "autoprefixer": "^10.4.14", + "class-variance-authority": "^0.7.0", + "clsx": "^2.1.1", + "date-fns": "^3.6.0", "formik": "^2.2.9", "js-cookie": "^3.0.5", "lodash": "^4.17.21", + "lucide-react": "^0.454.0", "mdast-util-find-and-replace": "^3.0.1", - "next": "^14.2.3", + "next": "^15.0.2", "npm": "^10.8.0", "postcss": "^8.4.31", + "posthog-js": "^1.176.0", "prismjs": "^1.29.0", "react": "^18.3.1", + "react-day-picker": "^8.10.1", "react-dom": "^18.3.1", "react-dropzone": "^14.2.3", "react-icons": "^4.8.0", "react-loader-spinner": "^5.4.5", "react-markdown": "^9.0.1", "react-select": "^5.8.0", + "recharts": "^2.13.1", "rehype-prism-plus": "^2.0.0", "remark-gfm": "^4.0.0", "semver": "^7.5.4", - "sharp": "^0.32.6", + "sharp": "^0.33.5", + "stripe": "^17.0.0", "swr": "^2.1.5", + "tailwind-merge": "^2.5.4", "tailwindcss": "^3.3.1", + "tailwindcss-animate": "^1.0.7", "typescript": "5.0.3", "uuid": "^9.0.1", - "yup": "^1.1.1" + "yup": "^1.4.0" }, "devDependencies": { "@tailwindcss/typography": "^0.5.10", "eslint": "^8.48.0", "eslint-config-next": "^14.1.0", "prettier": "2.8.8" + }, + "overrides": { + "react-is": "^19.0.0-rc-69d4b800-20241021" } } diff --git a/web/public/Freshdesk.png b/web/public/Freshdesk.png new file mode 100644 index 00000000000..a3343ceb01d Binary files /dev/null and b/web/public/Freshdesk.png differ diff --git a/web/public/RequestTracker.png b/web/public/RequestTracker.png deleted file mode 100644 index 95d6680e95c..00000000000 Binary files a/web/public/RequestTracker.png and /dev/null differ diff --git a/web/public/Wikipedia.svg b/web/public/Wikipedia.svg index dc32f9848f4..ee4a3caa55f 100644 --- a/web/public/Wikipedia.svg +++ b/web/public/Wikipedia.svg @@ -1 +1,535 @@ -]>Wikipedia logo version 2 \ No newline at end of file + + + Wikipedia logo version 2 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/web/sentry.client.config.ts b/web/sentry.client.config.ts new file mode 100644 index 00000000000..0a0fe92c4ce --- /dev/null +++ b/web/sentry.client.config.ts @@ -0,0 +1,14 @@ +import * as Sentry from "@sentry/nextjs"; + +if (process.env.NEXT_PUBLIC_SENTRY_DSN) { + Sentry.init({ + dsn: process.env.NEXT_PUBLIC_SENTRY_DSN, + // Only capture unhandled exceptions + enableTracing: false, + integrations: [], + tracesSampleRate: 0, + replaysSessionSampleRate: 0, + replaysOnErrorSampleRate: 0, + autoSessionTracking: false, + }); +} diff --git a/web/sentry.edge.config.ts b/web/sentry.edge.config.ts new file mode 100644 index 00000000000..6a412ea21e6 --- /dev/null +++ b/web/sentry.edge.config.ts @@ -0,0 +1,11 @@ +import * as Sentry from "@sentry/nextjs"; + +if (process.env.NEXT_PUBLIC_SENTRY_DSN) { + Sentry.init({ + dsn: process.env.NEXT_PUBLIC_SENTRY_DSN, + // Only capture unhandled exceptions + tracesSampleRate: 0, + enableTracing: false, + autoSessionTracking: false, + }); +} diff --git a/web/sentry.server.config.ts b/web/sentry.server.config.ts new file mode 100644 index 00000000000..3cc7749a7ed --- /dev/null +++ b/web/sentry.server.config.ts @@ -0,0 +1,18 @@ +import * as Sentry from "@sentry/nextjs"; + +if (process.env.NEXT_PUBLIC_SENTRY_DSN) { + Sentry.init({ + dsn: process.env.NEXT_PUBLIC_SENTRY_DSN, + + // Only capture unhandled exceptions and errors + tracesSampleRate: 0, + enableTracing: false, + + // Disable performance monitoring and only capture errors + profilesSampleRate: 0, + + // Note: if you want to override the automatic release value, do not set a + // `release` value here - use the environment variable `SENTRY_RELEASE`, so + // that it will also get attached to your source maps + }); +} diff --git a/web/src/app/PostHogPageView.tsx b/web/src/app/PostHogPageView.tsx new file mode 100644 index 00000000000..e75c09731d1 --- /dev/null +++ b/web/src/app/PostHogPageView.tsx @@ -0,0 +1,30 @@ +"use client"; + +import { usePathname, useSearchParams } from "next/navigation"; +import { useEffect } from "react"; +import { usePostHog } from "posthog-js/react"; + +export default function PostHogPageView(): null { + const pathname = usePathname(); + const searchParams = useSearchParams(); + const posthog = usePostHog(); + + useEffect(() => { + if (!posthog) { + return; + } + + // Track pageviews + if (pathname) { + let url = window.origin + pathname; + if (searchParams.toString()) { + url = url + `?${searchParams.toString()}`; + } + posthog.capture("$pageview", { + $current_url: url, + }); + } + }, [pathname, searchParams, posthog]); + + return null; +} diff --git a/web/src/app/admin/add-connector/page.tsx b/web/src/app/admin/add-connector/page.tsx index 8d73131e69a..dc1e0721f3f 100644 --- a/web/src/app/admin/add-connector/page.tsx +++ b/web/src/app/admin/add-connector/page.tsx @@ -4,9 +4,10 @@ import { AdminPageTitle } from "@/components/admin/Title"; import { ConnectorIcon } from "@/components/icons/icons"; import { SourceCategory, SourceMetadata } from "@/lib/search/interfaces"; import { listSourceMetadata } from "@/lib/sources"; -import { Title, Text, Button } from "@tremor/react"; +import Title from "@/components/ui/title"; +import { Button } from "@/components/ui/button"; import Link from "next/link"; -import { useEffect, useMemo, useRef, useState } from "react"; +import { useCallback, useEffect, useMemo, useRef, useState } from "react"; function SourceTile({ sourceMetadata, @@ -32,9 +33,7 @@ function SourceTile({ href={sourceMetadata.adminUrl} > - - {sourceMetadata.displayName} - +

{sourceMetadata.displayName}

); } @@ -49,15 +48,18 @@ export default function Page() { searchInputRef.current.focus(); } }, []); - const filterSources = (sources: SourceMetadata[]) => { - if (!searchTerm) return sources; - const lowerSearchTerm = searchTerm.toLowerCase(); - return sources.filter( - (source) => - source.displayName.toLowerCase().includes(lowerSearchTerm) || - source.category.toLowerCase().includes(lowerSearchTerm) - ); - }; + const filterSources = useCallback( + (sources: SourceMetadata[]) => { + if (!searchTerm) return sources; + const lowerSearchTerm = searchTerm.toLowerCase(); + return sources.filter( + (source) => + source.displayName.toLowerCase().includes(lowerSearchTerm) || + source.category.toLowerCase().includes(lowerSearchTerm) + ); + }, + [searchTerm] + ); const categorizedSources = useMemo(() => { const filtered = filterSources(sources); @@ -98,9 +100,7 @@ export default function Page() { title="Add Connector" farRightElement={ - + } /> @@ -122,7 +122,7 @@ export default function Page() {
{category}
- {getCategoryDescription(category as SourceCategory)} +

{getCategoryDescription(category as SourceCategory)}

{sources.map((source, sourceInd) => ( tool.in_code_tool_id === "SearchTool"); @@ -105,7 +102,9 @@ export function AssistantEditor({ shouldAddAssistantToUserPreferences?: boolean; admin?: boolean; }) { + const { refreshAssistants, isImageGenerationAvailable } = useAssistants(); const router = useRouter(); + const { popup, setPopup } = usePopup(); const colorOptions = [ @@ -131,46 +130,17 @@ export function AssistantEditor({ if (defaultIconShape === null) { setDefaultIconShape(generateRandomIconShape().encodedGrid); } - }, []); + }, [defaultIconShape]); const [isIconDropdownOpen, setIsIconDropdownOpen] = useState(false); - const [finalPrompt, setFinalPrompt] = useState(""); - const [finalPromptError, setFinalPromptError] = useState(""); const [removePersonaImage, setRemovePersonaImage] = useState(false); - const triggerFinalPromptUpdate = async ( - systemPrompt: string, - taskPrompt: string, - retrievalDisabled: boolean - ) => { - const response = await buildFinalPrompt( - systemPrompt, - taskPrompt, - retrievalDisabled - ); - if (response.ok) { - setFinalPrompt((await response.json()).final_prompt_template); - } - }; - const isUpdate = existingPersona !== undefined && existingPersona !== null; const existingPrompt = existingPersona?.prompts[0] ?? null; - - useEffect(() => { - if (isUpdate && existingPrompt) { - triggerFinalPromptUpdate( - existingPrompt.system_prompt, - existingPrompt.task_prompt, - existingPersona.num_chunks === 0 - ); - } - }, []); - const defaultProvider = llmProviders.find( (llmProvider) => llmProvider.is_default_provider ); - const defaultProviderName = defaultProvider?.provider; const defaultModelName = defaultProvider?.default_model_name; const providerDisplayNameToProviderName = new Map(); llmProviders.forEach((llmProvider) => { @@ -191,15 +161,11 @@ export function AssistantEditor({ modelOptionsByProvider.set(llmProvider.name, providerOptions); }); - const providerSupportingImageGenerationExists = - providersContainImageGeneratingSupport(llmProviders); - const personaCurrentToolIds = existingPersona?.tools.map((tool) => tool.id) || []; + const searchTool = findSearchTool(tools); - const imageGenerationTool = providerSupportingImageGenerationExists - ? findImageGenerationTool(tools) - : undefined; + const imageGenerationTool = findImageGenerationTool(tools); const internetSearchTool = findInternetSearchTool(tools); const customTools = tools.filter( @@ -315,14 +281,6 @@ export function AssistantEditor({ } )} onSubmit={async (values, formikHelpers) => { - if (finalPromptError) { - setPopup({ - type: "error", - message: "Cannot submit while there are errors in the form", - }); - return; - } - if ( values.llm_model_provider_override && !values.llm_model_version_override @@ -348,12 +306,9 @@ export function AssistantEditor({ if (imageGenerationToolEnabled) { if ( - !checkLLMSupportsImageOutput( - providerDisplayNameToProviderName.get( - values.llm_model_provider_override || "" - ) || - defaultProviderName || - "", + // model must support image input for image generation + // to work + !checkLLMSupportsImageInput( values.llm_model_version_override || defaultModelName || "" ) ) { @@ -439,6 +394,7 @@ export function AssistantEditor({ }); } } + await refreshAssistants(); router.push( redirectType === SuccessfulPersonaUpdateRedirectType.ADMIN ? `/admin/assistants?u=${Date.now()}` @@ -468,12 +424,9 @@ export function AssistantEditor({ : false; } - const currentLLMSupportsImageOutput = checkLLMSupportsImageOutput( - providerDisplayNameToProviderName.get( - values.llm_model_provider_override || "" - ) || - defaultProviderName || - "", + // model must support image input for image generation + // to work + const currentLLMSupportsImageOutput = checkLLMSupportsImageInput( values.llm_model_version_override || defaultModelName || "" ); @@ -612,15 +565,13 @@ export function AssistantEditor({ align="start" side="bottom" /> - + -

- This icon will visually represent your Assistant -

+ This icon will visually represent your Assistant
@@ -648,13 +599,7 @@ export function AssistantEditor({ placeholder="e.g. 'You are a professional email writing assistant that always uses a polite enthusiastic tone, emphasizes action items, and leaves blanks for the human to fill in when you have unknowns'" onChange={(e) => { setFieldValue("system_prompt", e.target.value); - triggerFinalPromptUpdate( - e.target.value, - values.task_prompt, - searchToolEnabled() - ); }} - error={finalPromptError} />
@@ -662,16 +607,14 @@ export function AssistantEditor({
Default AI Model{" "}
- + -

- Select a Large Language Model (Generative AI model) to - power this Assistant -

+ Select a Large Language Model (Generative AI model) to + power this Assistant
@@ -681,7 +624,10 @@ export function AssistantEditor({ otherwise specified below. {admin && user?.preferences.default_model && - ` Your current (user-specific) default model is ${getDisplayNameForModel(destructureValue(user?.preferences?.default_model!).modelName)}`} + ` Your current (user-specific) default model is ${getDisplayNameForModel( + destructureValue(user?.preferences?.default_model!) + .modelName + )}`}

{admin ? (
@@ -756,16 +702,14 @@ export function AssistantEditor({
Capabilities{" "}
- + -

- You can give your assistant advanced capabilities like - image generation -

+ You can give your assistant advanced capabilities like + image generation
@@ -776,12 +720,13 @@ export function AssistantEditor({
{imageGenerationTool && ( - +
{ toggleToolInValues(imageGenerationTool.id); }} - disabled={!currentLLMSupportsImageOutput} + disabled={ + !currentLLMSupportsImageOutput || + !isImageGenerationAvailable + } />
- {!currentLLMSupportsImageOutput && ( + {!currentLLMSupportsImageOutput ? (

To use Image Generation, select GPT-4o or another @@ -805,13 +753,22 @@ export function AssistantEditor({ this Assistant.

+ ) : ( + !isImageGenerationAvailable && ( + +

+ Image Generation requires an OpenAI or Azure + Dalle configuration. +

+
+ ) )}
)} {searchTool && ( - +
) : ( - +

No Document Sets available.{" "} {user?.role !== "admin" && ( <> @@ -919,7 +876,7 @@ export function AssistantEditor({ Danswer for assistance. )} - +

)}
@@ -1002,7 +959,7 @@ export function AssistantEditor({ alignTop={tool.description != null} key={tool.id} name={`enabled_tools_map.${tool.id}`} - label={tool.name} + label={tool.display_name} subtext={tool.description} onChange={() => { toggleToolInValues(tool.id); @@ -1013,7 +970,7 @@ export function AssistantEditor({ )}
- + { setFieldValue("task_prompt", e.target.value); - triggerFinalPromptUpdate( - values.system_prompt, - e.target.value, - searchToolEnabled() - ); }} explanationText="Learn about prompting in our docs!" explanationLink="https://docs.danswer.dev/guides/assistants" @@ -1048,6 +1000,10 @@ export function AssistantEditor({ Starter Messages (Optional){" "}
+ + Add pre-defined messages to help users get started. Only + the first 4 will be displayed. + Add New @@ -1208,9 +1162,7 @@ export function AssistantEditor({
diff --git a/web/src/app/admin/assistants/[id]/page.tsx b/web/src/app/admin/assistants/[id]/page.tsx index fab6f9f038b..249ded66d65 100644 --- a/web/src/app/admin/assistants/[id]/page.tsx +++ b/web/src/app/admin/assistants/[id]/page.tsx @@ -1,14 +1,17 @@ import { ErrorCallout } from "@/components/ErrorCallout"; import { AssistantEditor } from "../AssistantEditor"; import { BackButton } from "@/components/BackButton"; -import { Card, Title } from "@tremor/react"; + import { DeletePersonaButton } from "./DeletePersonaButton"; import { fetchAssistantEditorInfoSS } from "@/lib/assistants/fetchPersonaEditorInfoSS"; import { SuccessfulPersonaUpdateRedirectType } from "../enums"; import { RobotIcon } from "@/components/icons/icons"; import { AdminPageTitle } from "@/components/admin/Title"; +import CardSection from "@/components/admin/CardSection"; +import Title from "@/components/ui/title"; -export default async function Page({ params }: { params: { id: string } }) { +export default async function Page(props: { params: Promise<{ id: string }> }) { + const params = await props.params; const [values, error] = await fetchAssistantEditorInfoSS(params.id); let body; @@ -19,16 +22,17 @@ export default async function Page({ params }: { params: { id: string } }) { } else { body = ( <> - + - +
Delete Assistant +
provider.provider === "openai"); } + +// Default fallback persona for when we must display a persona +// but assistant has access to none +export const defaultPersona: Persona = { + id: 0, + name: "Default Assistant", + description: "A default assistant", + is_visible: true, + is_public: true, + builtin_persona: false, + is_default_persona: true, + users: [], + groups: [], + document_sets: [], + prompts: [], + tools: [], + starter_messages: null, + display_priority: null, + search_start_date: null, + owner: null, + icon_shape: 50910, + icon_color: "#FF6F6F", +}; diff --git a/web/src/app/admin/assistants/new/page.tsx b/web/src/app/admin/assistants/new/page.tsx index c770056321f..a61d9ab54bf 100644 --- a/web/src/app/admin/assistants/new/page.tsx +++ b/web/src/app/admin/assistants/new/page.tsx @@ -2,7 +2,7 @@ import { AssistantEditor } from "../AssistantEditor"; import { ErrorCallout } from "@/components/ErrorCallout"; import { RobotIcon } from "@/components/icons/icons"; import { BackButton } from "@/components/BackButton"; -import { Card } from "@tremor/react"; +import CardSection from "@/components/admin/CardSection"; import { AdminPageTitle } from "@/components/admin/Title"; import { fetchAssistantEditorInfoSS } from "@/lib/assistants/fetchPersonaEditorInfoSS"; import { SuccessfulPersonaUpdateRedirectType } from "../enums"; @@ -17,14 +17,14 @@ export default async function Page() { ); } else { body = ( - + - + ); } diff --git a/web/src/app/admin/assistants/page.tsx b/web/src/app/admin/assistants/page.tsx index 15909470582..66139dbd030 100644 --- a/web/src/app/admin/assistants/page.tsx +++ b/web/src/app/admin/assistants/page.tsx @@ -1,34 +1,13 @@ import { PersonasTable } from "./PersonaTable"; import { FiPlusSquare } from "react-icons/fi"; import Link from "next/link"; -import { Divider, Text, Title } from "@tremor/react"; -import { fetchSS } from "@/lib/utilsSS"; -import { ErrorCallout } from "@/components/ErrorCallout"; -import { Persona } from "./interfaces"; -import { AssistantsIcon, RobotIcon } from "@/components/icons/icons"; +import Text from "@/components/ui/text"; +import Title from "@/components/ui/title"; +import { Separator } from "@/components/ui/separator"; +import { AssistantsIcon } from "@/components/icons/icons"; import { AdminPageTitle } from "@/components/admin/Title"; export default async function Page() { - const allPersonaResponse = await fetchSS("/admin/persona"); - const editablePersonaResponse = await fetchSS( - "/admin/persona?get_editable=true" - ); - - if (!allPersonaResponse.ok || !editablePersonaResponse.ok) { - return ( - - ); - } - - const allPersonas = (await allPersonaResponse.json()) as Persona[]; - const editablePersonas = (await editablePersonaResponse.json()) as Persona[]; - return (
} title="Assistants" /> @@ -48,7 +27,7 @@ export default async function Page() {
- + Create an Assistant - + Existing Assistants - +
); diff --git a/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx b/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx index 4f79c79936a..e7113717949 100644 --- a/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx +++ b/web/src/app/admin/bot/SlackBotConfigCreationForm.tsx @@ -16,11 +16,12 @@ import { isPersonaASlackBotPersona, updateSlackBotConfig, } from "./lib"; -import { Button, Card, Divider } from "@tremor/react"; +import { Separator } from "@/components/ui/separator"; +import CardSection from "@/components/admin/CardSection"; +import { Button } from "@/components/ui/button"; import { useRouter } from "next/navigation"; import { Persona } from "../assistants/interfaces"; import { useState } from "react"; -import MultiSelectDropdown from "@/components/MultiSelectDropdown"; import { AdvancedOptionsToggle } from "@/components/AdvancedOptionsToggle"; import { DocumentSetSelectable } from "@/components/documentSet/DocumentSetSelectable"; import CollapsibleSection from "../assistants/CollapsibleSection"; @@ -53,7 +54,7 @@ export const SlackBotCreationForm = ({ return (
- + {popup}
- +
); }; diff --git a/web/src/app/admin/bot/SlackBotTokensForm.tsx b/web/src/app/admin/bot/SlackBotTokensForm.tsx index ed6b9d11f24..f290787ce3d 100644 --- a/web/src/app/admin/bot/SlackBotTokensForm.tsx +++ b/web/src/app/admin/bot/SlackBotTokensForm.tsx @@ -4,7 +4,8 @@ import { PopupSpec } from "@/components/admin/connectors/Popup"; import { SlackBotTokens } from "@/lib/types"; import { TextFormField } from "@/components/admin/connectors/Field"; import { setSlackBotTokens } from "./lib"; -import { Button, Card } from "@tremor/react"; +import CardSection from "@/components/admin/CardSection"; +import { Button } from "@/components/ui/button"; interface SlackBotTokensFormProps { onClose: () => void; @@ -18,7 +19,7 @@ export const SlackBotTokensForm = ({ existingTokens, }: SlackBotTokensFormProps) => { return ( - +
-
)}
-
+ ); }; diff --git a/web/src/app/admin/bot/[id]/page.tsx b/web/src/app/admin/bot/[id]/page.tsx index 61fb6ee2e0d..cdffb4e8ff6 100644 --- a/web/src/app/admin/bot/[id]/page.tsx +++ b/web/src/app/admin/bot/[id]/page.tsx @@ -4,7 +4,7 @@ import { SlackBotCreationForm } from "../SlackBotConfigCreationForm"; import { fetchSS } from "@/lib/utilsSS"; import { ErrorCallout } from "@/components/ErrorCallout"; import { DocumentSet, SlackBotConfig } from "@/lib/types"; -import { Text } from "@tremor/react"; +import Text from "@/components/ui/text"; import { BackButton } from "@/components/BackButton"; import { InstantSSRAutoRefresh } from "@/components/SSRAutoRefresh"; import { @@ -13,7 +13,8 @@ import { } from "@/lib/assistants/fetchAssistantsSS"; import { getStandardAnswerCategoriesIfEE } from "@/components/standardAnswers/getStandardAnswerCategoriesIfEE"; -async function Page({ params }: { params: { id: string } }) { +async function Page(props: { params: Promise<{ id: string }> }) { + const params = await props.params; const tasks = [ fetchSS("/manage/admin/slack-bot/config"), fetchSS("/manage/document-set"), diff --git a/web/src/app/admin/bot/new/page.tsx b/web/src/app/admin/bot/new/page.tsx index 094682c3696..79761934023 100644 --- a/web/src/app/admin/bot/new/page.tsx +++ b/web/src/app/admin/bot/new/page.tsx @@ -3,9 +3,8 @@ import { CPUIcon } from "@/components/icons/icons"; import { SlackBotCreationForm } from "../SlackBotConfigCreationForm"; import { fetchSS } from "@/lib/utilsSS"; import { ErrorCallout } from "@/components/ErrorCallout"; -import { DocumentSet, StandardAnswerCategory } from "@/lib/types"; +import { DocumentSet } from "@/lib/types"; import { BackButton } from "@/components/BackButton"; -import { Text } from "@tremor/react"; import { FetchAssistantsResponse, fetchAssistantsSS, diff --git a/web/src/app/admin/bot/page.tsx b/web/src/app/admin/bot/page.tsx index c3ef70ccbf6..af1f12d56bc 100644 --- a/web/src/app/admin/bot/page.tsx +++ b/web/src/app/admin/bot/page.tsx @@ -2,12 +2,7 @@ import { ThreeDotsLoader } from "@/components/Loading"; import { PageSelector } from "@/components/PageSelector"; -import { - CPUIcon, - EditIcon, - SlackIcon, - TrashIcon, -} from "@/components/icons/icons"; +import { EditIcon, SlackIcon, TrashIcon } from "@/components/icons/icons"; import { SlackBotConfig } from "@/lib/types"; import { useState } from "react"; import { useSlackBotConfigs, useSlackBotTokens } from "./hooks"; @@ -16,25 +11,20 @@ import { deleteSlackBotConfig, isPersonaASlackBotPersona } from "./lib"; import { SlackBotTokensForm } from "./SlackBotTokensForm"; import { AdminPageTitle } from "@/components/admin/Title"; import { - Button, Table, TableBody, TableCell, TableHead, - TableHeaderCell, + TableHeader, TableRow, - Text, - Title, -} from "@tremor/react"; -import { - FiArrowUpRight, - FiChevronDown, - FiChevronUp, - FiSlack, -} from "react-icons/fi"; +} from "@/components/ui/table"; +import Text from "@/components/ui/text"; +import Title from "@/components/ui/title"; +import { FiArrowUpRight, FiChevronDown, FiChevronUp } from "react-icons/fi"; import Link from "next/link"; import { InstantSSRAutoRefresh } from "@/components/SSRAutoRefresh"; import { ErrorCallout } from "@/components/ErrorCallout"; +import { Button } from "@/components/ui/button"; const numToDisplay = 50; @@ -63,14 +53,14 @@ const SlackBotConfigsTable = ({ return (
- + - Channels - Assistant - Document Sets - Delete + Channels + Assistant + Document Sets + Delete - + {slackBotConfigs .slice(numToDisplay * (page - 1), numToDisplay * page) @@ -223,6 +213,7 @@ const Main = () => { className="text-blue-500" href="https://docs.danswer.dev/slack_bot_setup" target="_blank" + rel="noreferrer" > guide{" "} @@ -244,8 +235,7 @@ const Main = () => { onClick={() => { setSlackBotTokensModalIsOpen(!slackBotTokensModalIsOpen); }} - color="blue" - size="xs" + variant="outline" className="mt-2" icon={slackBotTokensModalIsOpen ? FiChevronUp : FiChevronDown} > @@ -277,7 +267,7 @@ const Main = () => {
- diff --git a/web/src/app/admin/configuration/document-processing/page.tsx b/web/src/app/admin/configuration/document-processing/page.tsx new file mode 100644 index 00000000000..1381e8c4061 --- /dev/null +++ b/web/src/app/admin/configuration/document-processing/page.tsx @@ -0,0 +1,137 @@ +"use client"; + +import { useState } from "react"; +import CardSection from "@/components/admin/CardSection"; +import { Button } from "@/components/ui/button"; +import { DocumentIcon2 } from "@/components/icons/icons"; +import useSWR from "swr"; +import { ThreeDotsLoader } from "@/components/Loading"; +import { AdminPageTitle } from "@/components/admin/Title"; +import { Lock } from "@phosphor-icons/react"; + +function Main() { + const { + data: isApiKeySet, + error, + mutate, + isLoading, + } = useSWR<{ + unstructured_api_key: string | null; + }>("/api/search-settings/unstructured-api-key-set", (url: string) => + fetch(url).then((res) => res.json()) + ); + + const [apiKey, setApiKey] = useState(""); + + const handleSave = async () => { + try { + await fetch( + `/api/search-settings/upsert-unstructured-api-key?unstructured_api_key=${apiKey}`, + { + method: "PUT", + } + ); + } catch (error) { + console.error("Failed to save API key:", error); + } + mutate(); + }; + + const handleDelete = async () => { + try { + await fetch("/api/search-settings/delete-unstructured-api-key", { + method: "DELETE", + }); + setApiKey(""); + } catch (error) { + console.error("Failed to delete API key:", error); + } + mutate(); + }; + + if (isLoading) { + return ; + } + return ( +
+ +

+ Process with Unstructured API +

+ +
+

+ Unstructured extracts and transforms complex data from formats like + .pdf, .docx, .png, .pptx, etc. into clean text for Danswer to + ingest. Provide an API key to enable Unstructured document + processing. +
+
Note: this will send documents to + Unstructured servers for processing. +

+

+ Learn more about Unstructured{" "} + + here + + . +

+
+ {isApiKeySet ? ( +
+ •••••••••••••••• + +
+ ) : ( + setApiKey(e.target.value)} + className="w-full p-3 border rounded-md bg-background text-text focus:ring-2 focus:ring-blue-500 transition duration-200" + /> + )} +
+
+ {isApiKeySet ? ( + <> + +

+ Delete the current API key before updating. +

+ + ) : ( + + )} +
+
+
+
+ ); +} + +function Page() { + return ( +
+ } + /> +
+
+ ); +} + +export default Page; diff --git a/web/src/app/admin/configuration/llm/ConfiguredLLMProviderDisplay.tsx b/web/src/app/admin/configuration/llm/ConfiguredLLMProviderDisplay.tsx index aa8c0f9725d..5921a9a2ada 100644 --- a/web/src/app/admin/configuration/llm/ConfiguredLLMProviderDisplay.tsx +++ b/web/src/app/admin/configuration/llm/ConfiguredLLMProviderDisplay.tsx @@ -6,7 +6,8 @@ import { CustomLLMProviderUpdateForm } from "./CustomLLMProviderUpdateForm"; import { useState } from "react"; import { LLM_PROVIDERS_ADMIN_URL } from "./constants"; import { mutate } from "swr"; -import { Badge, Button } from "@tremor/react"; +import { Badge } from "@/components/ui/badge"; +import { Button } from "@/components/ui/button"; import isEqual from "lodash/isEqual"; function LLMProviderUpdateModal({ @@ -112,27 +113,23 @@ function LLMProviderDisplay({ {existingLlmProvider && (
{existingLlmProvider.is_default_provider ? ( - - Default - + Default ) : ( - - Enabled - + Enabled )}
)}
+ {formIsVisible && ( void; existingLlmProvider?: FullLLMProvider; shouldMarkAsDefault?: boolean; setPopup?: (popup: PopupSpec) => void; + hideSuccess?: boolean; }) { const { mutate } = useSWRConfig(); @@ -72,6 +71,7 @@ export function CustomLLMProviderUpdateForm({ : [], is_public: existingLlmProvider?.is_public ?? true, groups: existingLlmProvider?.groups ?? [], + deployment_name: existingLlmProvider?.deployment_name ?? null, }; // Setup validation schema if required @@ -88,6 +88,7 @@ export function CustomLLMProviderUpdateForm({ // EE Only is_public: Yup.boolean().required(), groups: Yup.array().of(Yup.number()), + deployment_name: Yup.string().nullable(), }); return ( @@ -111,9 +112,6 @@ export function CustomLLMProviderUpdateForm({ return; } - // don't set groups if marked as public - const groups = values.is_public ? [] : values.groups; - // test the configuration if (!isEqual(values, initialValues)) { setIsTesting(true); @@ -193,7 +191,7 @@ export function CustomLLMProviderUpdateForm({ const successMsg = existingLlmProvider ? "Provider updated successfully!" : "Provider enabled successfully!"; - if (setPopup) { + if (!hideSuccess && setPopup) { setPopup({ type: "success", message: successMsg, @@ -213,6 +211,7 @@ export function CustomLLMProviderUpdateForm({ label="Display Name" subtext="A name which you can use to identify this provider when selecting it in the UI." placeholder="Display Name" + disabled={existingLlmProvider ? true : false} /> https://docs.litellm.ai/docs/providers @@ -234,7 +234,7 @@ export function CustomLLMProviderUpdateForm({ placeholder="Name of the custom provider" /> - + Fill in the following as is needed. Refer to the LiteLLM @@ -249,6 +249,14 @@ export function CustomLLMProviderUpdateForm({ type="password" /> + {existingLlmProvider?.deployment_name && ( + + )} + @@ -362,31 +369,34 @@ export function CustomLLMProviderUpdateForm({ )} /> - - - - List the individual models that you want to make available as - a part of this provider. At least one must be specified. For - the best experience your [Provider Name]/[Model Name] should - match one of the pairs listed{" "} - - here - - . - - } - /> + + + {!existingLlmProvider?.deployment_name && ( + + List the individual models that you want to make available + as a part of this provider. At least one must be specified. + For the best experience your [Provider Name]/[Model Name] + should match one of the pairs listed{" "} + + here + + . + + } + /> + )} - + - + label="[Optional] Fast Model" + placeholder="E.g. gpt-4" + /> + )} - + - @@ -117,7 +120,7 @@ function AddCustomLLMProvider({ } return ( - ); @@ -154,7 +157,7 @@ export function LLMConfiguration() { /> ) : ( - + Please set one up below in order to start using Danswer! )} diff --git a/web/src/app/admin/configuration/llm/LLMProviderUpdateForm.tsx b/web/src/app/admin/configuration/llm/LLMProviderUpdateForm.tsx index 11c84825232..59c0bb0fd1a 100644 --- a/web/src/app/admin/configuration/llm/LLMProviderUpdateForm.tsx +++ b/web/src/app/admin/configuration/llm/LLMProviderUpdateForm.tsx @@ -1,27 +1,21 @@ import { LoadingAnimation } from "@/components/Loading"; import { AdvancedOptionsToggle } from "@/components/AdvancedOptionsToggle"; -import { Button, Divider, Text } from "@tremor/react"; +import Text from "@/components/ui/text"; +import { Separator } from "@/components/ui/separator"; +import { Button } from "@/components/ui/button"; import { Form, Formik } from "formik"; import { FiTrash } from "react-icons/fi"; import { LLM_PROVIDERS_ADMIN_URL } from "./constants"; import { SelectorFormField, TextFormField, - BooleanFormField, MultiSelectField, } from "@/components/admin/connectors/Field"; import { useState } from "react"; -import { Bubble } from "@/components/Bubble"; -import { GroupsIcon } from "@/components/icons/icons"; import { useSWRConfig } from "swr"; -import { - defaultModelsByProvider, - getDisplayNameForModel, - useUserGroups, -} from "@/lib/hooks"; +import { defaultModelsByProvider, getDisplayNameForModel } from "@/lib/hooks"; import { FullLLMProvider, WellKnownLLMProviderDescriptor } from "./interfaces"; import { PopupSpec } from "@/components/admin/connectors/Popup"; -import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled"; import * as Yup from "yup"; import isEqual from "lodash/isEqual"; import { IsPublicGroupSelector } from "@/components/IsPublicGroupSelector"; @@ -33,6 +27,7 @@ export function LLMProviderUpdateForm({ shouldMarkAsDefault, setPopup, hideAdvanced, + hideSuccess, }: { llmProviderDescriptor: WellKnownLLMProviderDescriptor; onClose: () => void; @@ -40,14 +35,10 @@ export function LLMProviderUpdateForm({ shouldMarkAsDefault?: boolean; hideAdvanced?: boolean; setPopup?: (popup: PopupSpec) => void; + hideSuccess?: boolean; }) { const { mutate } = useSWRConfig(); - const isPaidEnterpriseFeaturesEnabled = usePaidEnterpriseFeaturesEnabled(); - - // EE only - const { data: userGroups, isLoading: userGroupsIsLoading } = useUserGroups(); - const [isTesting, setIsTesting] = useState(false); const [testError, setTestError] = useState(""); @@ -81,6 +72,7 @@ export function LLMProviderUpdateForm({ existingLlmProvider?.display_model_names || defaultModelsByProvider[llmProviderDescriptor.name] || [], + deployment_name: existingLlmProvider?.deployment_name, }; // Setup validation schema if required @@ -112,6 +104,9 @@ export function LLMProviderUpdateForm({ ), } : {}), + deployment_name: llmProviderDescriptor.deployment_name_required + ? Yup.string().required("Deployment Name is required") + : Yup.string().nullable(), default_model_name: Yup.string().required("Model name is required"), fast_default_model_name: Yup.string().nullable(), // EE Only @@ -151,7 +146,9 @@ export function LLMProviderUpdateForm({ } const response = await fetch( - `${LLM_PROVIDERS_ADMIN_URL}${existingLlmProvider ? "" : "?is_creation=true"}`, + `${LLM_PROVIDERS_ADMIN_URL}${ + existingLlmProvider ? "" : "?is_creation=true" + }`, { method: "PUT", headers: { @@ -211,7 +208,7 @@ export function LLMProviderUpdateForm({ const successMsg = existingLlmProvider ? "Provider updated successfully!" : "Provider enabled successfully!"; - if (setPopup) { + if (!hideSuccess && setPopup) { setPopup({ type: "success", message: successMsg, @@ -278,9 +275,9 @@ export function LLMProviderUpdateForm({ ))} - {!hideAdvanced && ( + {!(hideAdvanced && llmProviderDescriptor.name != "azure") && ( <> - + {llmProviderDescriptor.llm_names.length > 0 ? ( )} - {llmProviderDescriptor.llm_names.length > 0 ? ( - ({ - name: getDisplayNameForModel(name), - value: name, - }))} - includeDefault - maxHeight="max-h-56" - /> - ) : ( + {llmProviderDescriptor.deployment_name_required && ( )} - + {!llmProviderDescriptor.single_model_supported && + (llmProviderDescriptor.llm_names.length > 0 ? ( + ({ + name: getDisplayNameForModel(name), + value: name, + }))} + includeDefault + maxHeight="max-h-56" + /> + ) : ( + + ))} {llmProviderDescriptor.name != "azure" && ( - + <> + + + + )} {showAdvancedOptions && ( @@ -378,7 +387,7 @@ export function LLMProviderUpdateForm({ {testError && {testError}}
-
-
@@ -122,8 +131,7 @@ export default function UpgradingPage({ + ) : ( diff --git a/web/src/app/admin/connector/[ccPairId]/ConfigDisplay.tsx b/web/src/app/admin/connector/[ccPairId]/ConfigDisplay.tsx index 7bd42947116..f03b74ba848 100644 --- a/web/src/app/admin/connector/[ccPairId]/ConfigDisplay.tsx +++ b/web/src/app/admin/connector/[ccPairId]/ConfigDisplay.tsx @@ -1,6 +1,7 @@ +import CardSection from "@/components/admin/CardSection"; import { getNameFromPath } from "@/lib/fileUtils"; import { ValidSources } from "@/lib/types"; -import { List, ListItem, Card, Title } from "@tremor/react"; +import Title from "@/components/ui/title"; function convertObjectToString(obj: any): string | any { // Check if obj is an object and not an array or null @@ -73,28 +74,37 @@ export function AdvancedConfigDisplay({ return ( <> Advanced Configuration - - + +
    {pruneFreq && ( - +
  • Pruning Frequency {formatPruneFrequency(pruneFreq)} - +
  • )} {refreshFreq && ( - +
  • Refresh Frequency {formatRefreshFrequency(refreshFreq)} - +
  • )} {indexingStart && ( - +
  • Indexing Start {formatDate(indexingStart)} - +
  • )} - - +
+
); } @@ -116,16 +126,19 @@ export function ConfigDisplay({ return ( <> Configuration - - + +
    {configEntries.map(([key, value]) => ( - +
  • {key} {convertObjectToString(value) || "-"} - +
  • ))} - - +
+
); } diff --git a/web/src/app/admin/connector/[ccPairId]/DeletionButton.tsx b/web/src/app/admin/connector/[ccPairId]/DeletionButton.tsx index 7ea03747bd3..ccef14b5a35 100644 --- a/web/src/app/admin/connector/[ccPairId]/DeletionButton.tsx +++ b/web/src/app/admin/connector/[ccPairId]/DeletionButton.tsx @@ -1,6 +1,6 @@ "use client"; -import { Button } from "@tremor/react"; +import { Button } from "@/components/ui/button"; import { CCPairFullInfo, ConnectorCredentialPairStatus } from "./types"; import { usePopup } from "@/components/admin/connectors/Popup"; import { FiTrash } from "react-icons/fi"; @@ -30,8 +30,7 @@ export function DeletionButton({ ccPair }: { ccPair: CCPairFullInfo }) {
{popup}
- + - Time Started - Status - New Doc Cnt - + Time Started + Status + New Doc Cnt +
- - - Total Doc Cnt - - - + + + + + Total Doc Cnt + + + + + Total number of documents replaced in the index during + this indexing attempt + + +
-
- Error Message +
+ Error Message - + {currentPageData.index_attempts.map((indexAttempt) => { const docsPerMinute = @@ -223,7 +275,6 @@ export function IndexingAttemptsTable({ ccPair }: { ccPair: CCPairFullInfo }) { {docsPerMinute ? (
diff --git a/web/src/app/admin/connector/[ccPairId]/ModifyStatusButtonCluster.tsx b/web/src/app/admin/connector/[ccPairId]/ModifyStatusButtonCluster.tsx index 10460459e32..71d26a8eb47 100644 --- a/web/src/app/admin/connector/[ccPairId]/ModifyStatusButtonCluster.tsx +++ b/web/src/app/admin/connector/[ccPairId]/ModifyStatusButtonCluster.tsx @@ -1,6 +1,6 @@ "use client"; -import { Button } from "@tremor/react"; +import { Button } from "@/components/ui/button"; import { CCPairFullInfo, ConnectorCredentialPairStatus } from "./types"; import { usePopup } from "@/components/admin/connectors/Popup"; import { mutate } from "swr"; @@ -19,8 +19,7 @@ export function ModifyStatusButtonCluster({ {popup} {ccPair.status === ConnectorCredentialPairStatus.PAUSED ? ( ) : ( -
@@ -173,7 +171,9 @@ function Main({ ccPairId }: { ccPairId: number }) { onClick={() => ccPair.is_editable_for_current_user && startEditing() } - className={`group flex ${ccPair.is_editable_for_current_user ? "cursor-pointer" : ""} text-3xl text-emphasis gap-x-2 items-center font-bold`} + className={`group flex ${ + ccPair.is_editable_for_current_user ? "cursor-pointer" : "" + } text-3xl text-emphasis gap-x-2 items-center font-bold`} > {ccPair.name} {ccPair.is_editable_for_current_user && ( @@ -192,8 +192,10 @@ function Main({ ccPairId }: { ccPairId: number }) { connectorId={ccPair.connector.id} credentialId={ccPair.credential.id} isDisabled={ + ccPair.indexing || ccPair.status === ConnectorCredentialPairStatus.PAUSED } + isIndexing={ccPair.indexing} isDeleting={isDeleting} /> )} @@ -231,7 +233,7 @@ function Main({ ccPairId }: { ccPairId: number }) { {credentialTemplates[ccPair.connector.source] && ccPair.is_editable_for_current_user && ( <> - + Credentials @@ -242,7 +244,7 @@ function Main({ ccPairId }: { ccPairId: number }) { /> )} - + - +
{ccPair.is_editable_for_current_user && ( @@ -276,7 +278,8 @@ function Main({ ccPairId }: { ccPairId: number }) { ); } -export default function Page({ params }: { params: { ccPairId: string } }) { +export default function Page(props: { params: Promise<{ ccPairId: string }> }) { + const params = use(props.params); const ccPairId = parseInt(params.ccPairId); return ( diff --git a/web/src/app/admin/connector/[ccPairId]/types.ts b/web/src/app/admin/connector/[ccPairId]/types.ts index 55bbe955730..5e9cec428c1 100644 --- a/web/src/app/admin/connector/[ccPairId]/types.ts +++ b/web/src/app/admin/connector/[ccPairId]/types.ts @@ -25,6 +25,7 @@ export interface CCPairFullInfo { is_public: boolean; is_editable_for_current_user: boolean; deletion_failure_message: string | null; + indexing: boolean; } export interface PaginatedIndexAttempts { diff --git a/web/src/app/admin/connectors/[connector]/AddConnectorPage.tsx b/web/src/app/admin/connectors/[connector]/AddConnectorPage.tsx index 8294b6ca0d0..9ea868c9389 100644 --- a/web/src/app/admin/connectors/[connector]/AddConnectorPage.tsx +++ b/web/src/app/admin/connectors/[connector]/AddConnectorPage.tsx @@ -1,10 +1,10 @@ "use client"; -import { FetchError, errorHandlingFetcher } from "@/lib/fetcher"; +import { errorHandlingFetcher } from "@/lib/fetcher"; import useSWR, { mutate } from "swr"; import { HealthCheckBanner } from "@/components/health/healthcheck"; -import { Card, Title } from "@tremor/react"; +import Title from "@/components/ui/title"; import { AdminPageTitle } from "@/components/admin/Title"; import { buildSimilarCredentialInfoURL } from "@/app/admin/connector/[ccPairId]/lib"; import { usePopup } from "@/components/admin/connectors/Popup"; @@ -29,6 +29,8 @@ import { defaultPruneFreqDays, defaultRefreshFreqMinutes, isLoadState, + Connector, + ConnectorBase, } from "@/lib/connectors/connectors"; import { Modal } from "@/components/Modal"; import GDriveMain from "./pages/gdrive/GoogleDrivePage"; @@ -38,16 +40,14 @@ import { useGoogleDriveCredentials, } from "./pages/utils/hooks"; import { Formik } from "formik"; -import { AccessTypeForm } from "@/components/admin/connectors/AccessTypeForm"; -import { AccessTypeGroupSelector } from "@/components/admin/connectors/AccessTypeGroupSelector"; import NavigationRow from "./NavigationRow"; - +import { useRouter } from "next/navigation"; +import CardSection from "@/components/admin/CardSection"; export interface AdvancedConfig { refreshFreq: number; pruneFreq: number; indexingStart: string; } -import { Connector, ConnectorBase } from "@/lib/connectors/connectors"; const BASE_CONNECTOR_URL = "/api/manage/admin/connector"; @@ -111,6 +111,8 @@ export default function AddConnector({ }: { connector: ConfigurableSources; }) { + const router = useRouter(); + // State for managing credentials and files const [currentCredential, setCurrentCredential] = useState | null>(null); @@ -140,8 +142,8 @@ export default function AddConnector({ const { popup, setPopup } = usePopup(); // Hooks for Google Drive and Gmail credentials - const { liveGDriveCredential } = useGoogleDriveCredentials(); - const { liveGmailCredential } = useGmailCredentials(); + const { liveGDriveCredential } = useGoogleDriveCredentials(connector); + const { liveGmailCredential } = useGmailCredentials(connector); // Check if credential is activated const credentialActivated = @@ -201,18 +203,20 @@ export default function AddConnector({ }; const onSuccess = () => { - setPopup({ - message: "Connector created! Redirecting to connector home page", - type: "success", - }); - setTimeout(() => { - window.open("/admin/indexing/status", "_self"); - }, 1000); + router.push("/admin/indexing/status?message=connector-created"); }; return ( [ + field.name, + field.default || "", + ]) + ), + }} validationSchema={createConnectorValidationSchema(connector)} onSubmit={async (values) => { const { @@ -250,9 +254,9 @@ export default function AddConnector({ // Apply advanced configuration-specific transforms. const advancedConfiguration: any = { - pruneFreq: (pruneFreq || defaultPruneFreqDays) * 60 * 60 * 24, + pruneFreq: (pruneFreq ?? defaultPruneFreqDays) * 60 * 60 * 24, indexingStart: convertStringToDateTime(indexingStart), - refreshFreq: (refreshFreq || defaultRefreshFreqMinutes) * 60, + refreshFreq: (refreshFreq ?? defaultRefreshFreqMinutes) * 60, }; // Google sites-specific handling @@ -265,6 +269,7 @@ export default function AddConnector({ advancedConfiguration.pruneFreq, advancedConfiguration.indexingStart, values.access_type == "public", + groups, name ); if (response) { @@ -359,7 +364,7 @@ export default function AddConnector({ /> {formStep == 0 && ( - + Select a credential {connector == "google_drive" ? ( @@ -416,27 +421,31 @@ export default function AddConnector({ )} )} - + )} {formStep == 1 && ( - + - - - - + )} {formStep === 2 && ( - + - + )} + + +

{buttonText}

+ +
+ ); +} export default function Sidebar() { const { formStep, setFormStep, connector, allowAdvanced, allowCreate } = useFormContext(); const combinedSettings = useContext(SettingsContext); - const { isLoadingUser, isAdmin } = useUser(); + const { isCurator, isAdmin, user } = useUser(); if (!combinedSettings) { return null; } @@ -55,17 +90,7 @@ export default function Sidebar() {
-
- - -

- {isAdmin ? "Admin Page" : "Curator Page"} -

- -
+
diff --git a/web/src/app/admin/connectors/[connector]/auth/callback/route.ts b/web/src/app/admin/connectors/[connector]/auth/callback/route.ts index 9d80e1b2fd2..79d831cfba6 100644 --- a/web/src/app/admin/connectors/[connector]/auth/callback/route.ts +++ b/web/src/app/admin/connectors/[connector]/auth/callback/route.ts @@ -9,6 +9,7 @@ import { import { processCookies } from "@/lib/userSS"; export const GET = async (request: NextRequest) => { + const requestCookies = await cookies(); const connector = request.url.includes("gmail") ? "gmail" : "google-drive"; const callbackEndpoint = `/manage/connector/${connector}/callback`; const url = new URL(buildUrl(callbackEndpoint)); @@ -16,7 +17,7 @@ export const GET = async (request: NextRequest) => { const response = await fetch(url.toString(), { headers: { - cookie: processCookies(cookies()), + cookie: processCookies(requestCookies), }, }); @@ -33,7 +34,7 @@ export const GET = async (request: NextRequest) => { ? GMAIL_AUTH_IS_ADMIN_COOKIE_NAME : GOOGLE_DRIVE_AUTH_IS_ADMIN_COOKIE_NAME; - if (cookies().get(authCookieName)?.value?.toLowerCase() === "true") { + if (requestCookies.get(authCookieName)?.value?.toLowerCase() === "true") { return NextResponse.redirect( new URL(`/admin/connectors/${connector}`, getDomain(request)) ); diff --git a/web/src/app/admin/connectors/[connector]/page.tsx b/web/src/app/admin/connectors/[connector]/page.tsx index 1e5a690f93d..f9f737a0e3a 100644 --- a/web/src/app/admin/connectors/[connector]/page.tsx +++ b/web/src/app/admin/connectors/[connector]/page.tsx @@ -1,11 +1,10 @@ import { ConfigurableSources } from "@/lib/types"; import ConnectorWrapper from "./ConnectorWrapper"; -export default async function Page({ - params, -}: { - params: { connector: string }; +export default async function Page(props: { + params: Promise<{ connector: string }>; }) { + const params = await props.params; return ( void; } export default function FileInput({ @@ -15,9 +14,9 @@ export default function FileInput({ label, optional = false, description, - selectedFiles, - setSelectedFiles, }: FileInputProps) { + const [field, meta, helpers] = useField(name); + return ( <>
- + - Document Name - Is Searchable? - Score + Document Name + Is Searchable? + Score - + {documents .slice((page - 1) * numToDisplay, page * numToDisplay) diff --git a/web/src/app/admin/documents/feedback/page.tsx b/web/src/app/admin/documents/feedback/page.tsx index 30f425bc725..d66d3f60a99 100644 --- a/web/src/app/admin/documents/feedback/page.tsx +++ b/web/src/app/admin/documents/feedback/page.tsx @@ -6,7 +6,7 @@ import { useMostReactedToDocuments } from "@/lib/hooks"; import { DocumentFeedbackTable } from "./DocumentFeedbackTable"; import { numPages, numToDisplay } from "./constants"; import { AdminPageTitle } from "@/components/admin/Title"; -import { Title } from "@tremor/react"; +import Title from "@/components/ui/title"; const Main = () => { const { diff --git a/web/src/app/admin/documents/sets/DocumentSetCreationForm.tsx b/web/src/app/admin/documents/sets/DocumentSetCreationForm.tsx index fb7e56cc60c..1691b9c46d8 100644 --- a/web/src/app/admin/documents/sets/DocumentSetCreationForm.tsx +++ b/web/src/app/admin/documents/sets/DocumentSetCreationForm.tsx @@ -16,7 +16,8 @@ import { } from "@/lib/types"; import { TextFormField } from "@/components/admin/connectors/Field"; import { ConnectorTitle } from "@/components/admin/connectors/ConnectorTitle"; -import { Button, Divider } from "@tremor/react"; +import { Separator } from "@/components/ui/separator"; +import { Button } from "@/components/ui/button"; import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled"; import { IsPublicGroupSelector } from "@/components/IsPublicGroupSelector"; import React, { useEffect, useState } from "react"; @@ -125,6 +126,7 @@ export const DocumentSetCreationForm = ({ placeholder="Describe what the document set represents" autoCompleteDisabled={true} /> + {isPaidEnterpriseFeaturesEnabled && ( )} - + {user?.role === UserRole.CURATOR ? ( <> @@ -177,7 +179,7 @@ export const DocumentSetCreationForm = ({ const ind = props.values.cc_pair_ids.indexOf( ccPair.cc_pair_id ); - let isSelected = ind !== -1; + const isSelected = ind !== -1; return (
0 ? ( <> - +

These connectors are not available to the{" "} {userGroups && userGroups.length > 1 - ? `group${props.values.groups.length > 1 ? "s" : ""} you have selected` + ? `group${ + props.values.groups.length > 1 ? "s" : "" + } you have selected` : "group you curate"} :

@@ -291,7 +295,7 @@ export const DocumentSetCreationForm = ({ const ind = props.values.cc_pair_ids.indexOf( ccPair.cc_pair_id ); - let isSelected = ind !== -1; + const isSelected = ind !== -1; return (
); } -export default function Page({ - params, -}: { - params: { documentSetId: string }; +export default function Page(props: { + params: Promise<{ documentSetId: string }>; }) { + const params = use(props.params); const documentSetId = parseInt(params.documentSetId); return ( diff --git a/web/src/app/admin/documents/sets/new/page.tsx b/web/src/app/admin/documents/sets/new/page.tsx index 24b337460b8..828f5128c4b 100644 --- a/web/src/app/admin/documents/sets/new/page.tsx +++ b/web/src/app/admin/documents/sets/new/page.tsx @@ -9,12 +9,11 @@ import { } from "@/lib/hooks"; import { ThreeDotsLoader } from "@/components/Loading"; import { usePopup } from "@/components/admin/connectors/Popup"; -import { Card } from "@tremor/react"; import { BackButton } from "@/components/BackButton"; import { ErrorCallout } from "@/components/ErrorCallout"; import { useRouter } from "next/navigation"; -import { UserGroup } from "@/lib/types"; import { refreshDocumentSets } from "../hooks"; +import CardSection from "@/components/admin/CardSection"; function Main() { const { popup, setPopup } = usePopup(); @@ -46,7 +45,7 @@ function Main() { <> {popup} - + - + ); } diff --git a/web/src/app/admin/documents/sets/page.tsx b/web/src/app/admin/documents/sets/page.tsx index 41104f9c343..5d443cd01ce 100644 --- a/web/src/app/admin/documents/sets/page.tsx +++ b/web/src/app/admin/documents/sets/page.tsx @@ -7,24 +7,21 @@ import { Table, TableHead, TableRow, - TableHeaderCell, TableBody, TableCell, - Title, - Divider, - Badge, -} from "@tremor/react"; +} from "@/components/ui/table"; +import Text from "@/components/ui/text"; +import Title from "@/components/ui/title"; +import { Separator } from "@/components/ui/separator"; +import { Button } from "@/components/ui/button"; import { useConnectorCredentialIndexingStatus } from "@/lib/hooks"; import { ConnectorIndexingStatus, DocumentSet } from "@/lib/types"; -import { useState, useEffect } from "react"; -import { getCurrentUser } from "@/lib/user"; -import { User, UserRole } from "@/lib/types"; +import { useState } from "react"; import { useDocumentSets } from "./hooks"; import { ConnectorTitle } from "@/components/admin/connectors/ConnectorTitle"; import { deleteDocumentSet } from "./lib"; import { PopupSpec, usePopup } from "@/components/admin/connectors/Popup"; import { AdminPageTitle } from "@/components/admin/Title"; -import { Button, Text } from "@tremor/react"; import { FiAlertTriangle, FiCheckCircle, @@ -36,6 +33,14 @@ import { import { DeleteButton } from "@/components/DeleteButton"; import Link from "next/link"; import { useRouter } from "next/navigation"; +import { TableHeader } from "@/components/ui/table"; +import { Badge } from "@/components/ui/badge"; +import { + Tooltip, + TooltipContent, + TooltipProvider, + TooltipTrigger, +} from "@/components/ui/tooltip"; const numToDisplay = 50; @@ -48,8 +53,6 @@ const EditRow = ({ }) => { const router = useRouter(); - const [isSyncingTooltipOpen, setIsSyncingTooltipOpen] = useState(false); - if (!isEditable) { return (
@@ -60,37 +63,36 @@ const EditRow = ({ return (
- {isSyncingTooltipOpen && ( -
- Cannot update - while syncing! Wait for the sync to finish, then try again. -
- )} -
{ - if (documentSet.is_up_to_date) { - router.push(`/admin/documents/sets/${documentSet.id}`); - } - }} - onMouseEnter={() => { - if (!documentSet.is_up_to_date) { - setIsSyncingTooltipOpen(true); - } - }} - onMouseLeave={() => { - if (!documentSet.is_up_to_date) { - setIsSyncingTooltipOpen(false); - } - }} - > - - {documentSet.name} -
+ + + +
{ + if (documentSet.is_up_to_date) { + router.push(`/admin/documents/sets/${documentSet.id}`); + } + }} + > + + {documentSet.name} +
+
+ {!documentSet.is_up_to_date && ( + +
+ + Cannot update while syncing! Wait for the sync to finish, then + try again. +
+
+ )} +
+
); }; @@ -135,15 +137,15 @@ const DocumentSetTable = ({
Existing Document Sets
- + - Name - Connectors - Status - Public - Delete + Name + Connectors + Status + Public + Delete - + {sortedDocumentSets .slice((page - 1) * numToDisplay, page * numToDisplay) @@ -189,15 +191,19 @@ const DocumentSetTable = ({ {documentSet.is_up_to_date ? ( - + Up to Date ) : documentSet.cc_pair_descriptors.length > 0 ? ( - + Syncing ) : ( - + Deleting )} @@ -206,7 +212,7 @@ const DocumentSetTable = ({ {documentSet.is_public ? ( Public @@ -214,7 +220,7 @@ const DocumentSetTable = ({ ) : ( Private @@ -323,15 +329,13 @@ const Main = () => {
- +
{documentSets.length > 0 && ( <> - + >; @@ -50,6 +54,11 @@ export function EmbeddingModelSelection({ updateSelectedProvider: ( model: CloudEmbeddingModel | HostedEmbeddingModel ) => void; + updateCurrentModel: ( + newModel: string, + provider_type: EmbeddingProvider + ) => void; + advancedEmbeddingDetails: AdvancedSearchConfiguration; }) { // Cloud Provider based modals const [showTentativeProvider, setShowTentativeProvider] = @@ -73,12 +82,6 @@ export function EmbeddingModelSelection({ const [showTentativeOpenProvider, setShowTentativeOpenProvider] = useState(null); - // Enabled / unenabled providers - const [newEnabledProviders, setNewEnabledProviders] = useState([]); - const [newUnenabledProviders, setNewUnenabledProviders] = useState( - [] - ); - const [showDeleteCredentialsModal, setShowDeleteCredentialsModal] = useState(false); @@ -91,7 +94,10 @@ export function EmbeddingModelSelection({ { refreshInterval: 5000 } // 5 seconds ); - const { data: embeddingProviderDetails } = useSWR( + const { + data: embeddingProviderDetails, + mutate: mutateEmbeddingProviderDetails, + } = useSWR( EMBEDDING_PROVIDERS_ADMIN_URL, errorHandlingFetcher, { refreshInterval: 5000 } // 5 seconds @@ -133,32 +139,6 @@ export function EmbeddingModelSelection({ } }; - const clientsideAddProvider = (provider: CloudEmbeddingProvider) => { - const providerType = provider.provider_type; - setNewEnabledProviders((newEnabledProviders) => [ - ...newEnabledProviders, - providerType, - ]); - setNewUnenabledProviders((newUnenabledProviders) => - newUnenabledProviders.filter( - (givenProviderType) => givenProviderType != providerType - ) - ); - }; - - const clientsideRemoveProvider = (provider: CloudEmbeddingProvider) => { - const providerType = provider.provider_type; - setNewEnabledProviders((newEnabledProviders) => - newEnabledProviders.filter( - (givenProviderType) => givenProviderType != providerType - ) - ); - setNewUnenabledProviders((newUnenabledProviders) => [ - ...newUnenabledProviders, - providerType, - ]); - }; - return (
{alreadySelectedModel && ( @@ -187,14 +167,16 @@ export function EmbeddingModelSelection({ {showTentativeProvider && ( { setShowTentativeProvider(showUnconfiguredProvider); - clientsideAddProvider(showTentativeProvider); if (showModelInQueue) { setShowTentativeModel(showModelInQueue); } + mutateEmbeddingProviderDetails(); }} onCancel={() => { setShowModelInQueue(null); @@ -206,10 +188,11 @@ export function EmbeddingModelSelection({ {changeCredentialsProvider && ( { - clientsideRemoveProvider(changeCredentialsProvider); setChangeCredentialsProvider(null); + mutateEmbeddingProviderDetails(); }} provider={changeCredentialsProvider} onConfirm={() => setChangeCredentialsProvider(null)} @@ -237,12 +220,13 @@ export function EmbeddingModelSelection({ modelProvider={showTentativeProvider!} onConfirm={() => { setShowDeleteCredentialsModal(false); + mutateEmbeddingProviderDetails(); }} onCancel={() => setShowDeleteCredentialsModal(false)} /> )} -

+

Select from cloud, self-hosted models, or continue with your current embedding model.

@@ -292,14 +276,13 @@ export function EmbeddingModelSelection({ {modelTab == "cloud" && ( diff --git a/web/src/app/admin/embeddings/RerankingFormPage.tsx b/web/src/app/admin/embeddings/RerankingFormPage.tsx index 5425fc89329..6bf887bc230 100644 --- a/web/src/app/admin/embeddings/RerankingFormPage.tsx +++ b/web/src/app/admin/embeddings/RerankingFormPage.tsx @@ -20,7 +20,7 @@ import { MixedBreadIcon, } from "@/components/icons/icons"; import { Modal } from "@/components/Modal"; -import { Button } from "@tremor/react"; +import { Button } from "@/components/ui/button"; import { TextFormField } from "@/components/admin/connectors/Field"; import { SettingsContext } from "@/components/settings/SettingsProvider"; @@ -79,15 +79,21 @@ const RerankingDetailsForm = forwardRef< > {({ values, setFieldValue, resetForm }) => { const resetRerankingValues = () => { - setRerankingDetails(originalRerankingDetails); + setRerankingDetails({ + rerank_api_key: null, + rerank_provider_type: null, + rerank_model_name: null, + rerank_api_url: null, + }); resetForm(); }; return (
-

- Post-processing -

+

+ Select from cloud, self-hosted models, or use no reranking + model. +

{originalRerankingDetails.rerank_model_name && ( @@ -342,8 +349,7 @@ const RerankingDetailsForm = forwardRef< onClick={() => { setShowLiteLLMConfigurationModal(false); }} - color="blue" - size="xs" + variant="submit" > Update @@ -391,8 +397,7 @@ const RerankingDetailsForm = forwardRef<
diff --git a/web/src/app/admin/embeddings/interfaces.ts b/web/src/app/admin/embeddings/interfaces.ts index 2fc328eab86..3f7549fa741 100644 --- a/web/src/app/admin/embeddings/interfaces.ts +++ b/web/src/app/admin/embeddings/interfaces.ts @@ -21,11 +21,6 @@ export enum RerankerProvider { } export interface AdvancedSearchConfiguration { - model_name: string; - model_dim: number; - normalize: boolean; - query_prefix: string; - passage_prefix: string; index_name: string | null; multipass_indexing: boolean; multilingual_expansion: string[]; diff --git a/web/src/app/admin/embeddings/modals/AlreadyPickedModal.tsx b/web/src/app/admin/embeddings/modals/AlreadyPickedModal.tsx index d6e29424114..8aebcdce0d8 100644 --- a/web/src/app/admin/embeddings/modals/AlreadyPickedModal.tsx +++ b/web/src/app/admin/embeddings/modals/AlreadyPickedModal.tsx @@ -1,6 +1,7 @@ import React from "react"; import { Modal } from "@/components/Modal"; -import { Button, Text } from "@tremor/react"; +import { Button } from "@/components/ui/button"; +import Text from "@/components/ui/text"; import { CloudEmbeddingModel } from "../../../../components/embedding/interfaces"; @@ -22,7 +23,7 @@ export function AlreadyPickedModal({ You can select a different one if you want!
-
diff --git a/web/src/app/admin/embeddings/modals/ChangeCredentialsModal.tsx b/web/src/app/admin/embeddings/modals/ChangeCredentialsModal.tsx index 636aa562474..044155fce3a 100644 --- a/web/src/app/admin/embeddings/modals/ChangeCredentialsModal.tsx +++ b/web/src/app/admin/embeddings/modals/ChangeCredentialsModal.tsx @@ -1,7 +1,10 @@ import React, { useRef, useState } from "react"; import { Modal } from "@/components/Modal"; -import { Button, Text, Callout, Subtitle, Divider } from "@tremor/react"; -import { Label, TextFormField } from "@/components/admin/connectors/Field"; +import { Callout } from "@/components/ui/callout"; +import Text from "@/components/ui/text"; +import { Separator } from "@/components/ui/separator"; +import { Button } from "@/components/ui/button"; +import { Label } from "@/components/admin/connectors/Field"; import { CloudEmbeddingProvider } from "../../../../components/embedding/interfaces"; import { EMBEDDING_PROVIDERS_ADMIN_URL, @@ -16,6 +19,7 @@ export function ChangeCredentialsModal({ onDeleted, useFileUpload, isProxy = false, + isAzure = false, }: { provider: CloudEmbeddingProvider; onConfirm: () => void; @@ -23,6 +27,7 @@ export function ChangeCredentialsModal({ onDeleted: () => void; useFileUpload: boolean; isProxy?: boolean; + isAzure?: boolean; }) { const [apiKey, setApiKey] = useState(""); const [apiUrl, setApiUrl] = useState(""); @@ -140,7 +145,9 @@ export function ChangeCredentialsModal({ const errorData = await updateResponse.json(); throw new Error( errorData.detail || - `Failed to update provider- check your ${isProxy ? "API URL" : "API key"}` + `Failed to update provider- check your ${ + isProxy ? "API URL" : "API key" + }` ); } @@ -151,142 +158,145 @@ export function ChangeCredentialsModal({ ); } }; - return ( <> -

- You can modify your configuration by providing a new API key - {isProxy ? " or API URL." : "."} -

+ {!isAzure && ( + <> +

+ You can modify your configuration by providing a new API key + {isProxy ? " or API URL." : "."} +

-
- - {useFileUpload ? ( - <> - - - {fileName &&

Uploaded file: {fileName}

} - - ) : ( - <> - setApiKey(e.target.value)} - placeholder="Paste your API key here" - /> - - )} +
+ + {useFileUpload ? ( + <> + + + {fileName &&

Uploaded file: {fileName}

} + + ) : ( + <> + setApiKey(e.target.value)} + placeholder="Paste your API key here" + /> + + )} - {isProxy && ( - <> - + {isProxy && ( + <> + - setApiUrl(e.target.value)} - placeholder="Paste your API URL here" - /> + setApiUrl(e.target.value)} + placeholder="Paste your API URL here" + /> - {deletionError && ( - - {deletionError} - - )} + {deletionError && ( + + {deletionError} + + )} -
- -

- Since you are using a liteLLM proxy, we'll need a model - name to test the connection with. -

-
- setModelName(e.target.value)} - placeholder="Paste your API URL here" - /> +
+ +

+ Since you are using a liteLLM proxy, we'll need a + model name to test the connection with. +

+
+ setModelName(e.target.value)} + placeholder="Paste your model name here" + /> + + )} - {deletionError && ( - - {deletionError} + {testError && ( + + {testError} )} - - )} - - {testError && ( - - {testError} - - )} - + - + +
+ + )} - - You can also delete your configuration. - - - This is only possible if you have already switched to a different - embedding type! - + + You can delete your configuration. + + + This is only possible if you have already switched to a different + embedding type! + - - {deletionError && ( - - {deletionError} - - )} -
+ + {deletionError && ( + + {deletionError} + + )}
); diff --git a/web/src/app/admin/embeddings/modals/DeleteCredentialsModal.tsx b/web/src/app/admin/embeddings/modals/DeleteCredentialsModal.tsx index 2d77d0febd3..608ce8239f6 100644 --- a/web/src/app/admin/embeddings/modals/DeleteCredentialsModal.tsx +++ b/web/src/app/admin/embeddings/modals/DeleteCredentialsModal.tsx @@ -1,6 +1,8 @@ import React from "react"; import { Modal } from "@/components/Modal"; -import { Button, Text, Callout } from "@tremor/react"; +import Text from "@/components/ui/text"; +import { Button } from "@/components/ui/button"; +import { Callout } from "@/components/ui/callout"; import { CloudEmbeddingProvider } from "../../../../components/embedding/interfaces"; export function DeleteCredentialsModal({ @@ -23,16 +25,12 @@ export function DeleteCredentialsModal({ You're about to delete your {modelProvider.provider_type}{" "} credentials. Are you sure? - +
- -
diff --git a/web/src/app/admin/embeddings/modals/ModelSelectionModal.tsx b/web/src/app/admin/embeddings/modals/ModelSelectionModal.tsx index 3f59de11657..ca341a9b308 100644 --- a/web/src/app/admin/embeddings/modals/ModelSelectionModal.tsx +++ b/web/src/app/admin/embeddings/modals/ModelSelectionModal.tsx @@ -1,9 +1,8 @@ import { Modal } from "@/components/Modal"; -import { Button, Text, Callout } from "@tremor/react"; -import { - EmbeddingModelDescriptor, - HostedEmbeddingModel, -} from "../../../../components/embedding/interfaces"; +import Text from "@/components/ui/text"; +import { Callout } from "@/components/ui/callout"; +import { Button } from "@/components/ui/button"; +import { HostedEmbeddingModel } from "../../../../components/embedding/interfaces"; export function ModelSelectionConfirmationModal({ selectedModel, @@ -41,7 +40,7 @@ export function ModelSelectionConfirmationModal({ {isCustom && ( - + We've detected that this is a custom-specified embedding model. Since we have to download the model files before verifying the configuration's correctness, we won't be able to let @@ -53,7 +52,7 @@ export function ModelSelectionConfirmationModal({ )}
-
diff --git a/web/src/app/admin/embeddings/modals/ProviderCreationModal.tsx b/web/src/app/admin/embeddings/modals/ProviderCreationModal.tsx index 2377d5d16d5..4ca22e2501e 100644 --- a/web/src/app/admin/embeddings/modals/ProviderCreationModal.tsx +++ b/web/src/app/admin/embeddings/modals/ProviderCreationModal.tsx @@ -1,10 +1,15 @@ import React, { useRef, useState } from "react"; -import { Text, Button, Callout } from "@tremor/react"; +import Text from "@/components/ui/text"; +import { Callout } from "@/components/ui/callout"; +import { Button } from "@/components/ui/button"; import { Formik, Form } from "formik"; import * as Yup from "yup"; import { Label, TextFormField } from "@/components/admin/connectors/Field"; import { LoadingAnimation } from "@/components/Loading"; -import { CloudEmbeddingProvider } from "../../../../components/embedding/interfaces"; +import { + CloudEmbeddingProvider, + EmbeddingProvider, +} from "../../../../components/embedding/interfaces"; import { EMBEDDING_PROVIDERS_ADMIN_URL } from "../../configuration/llm/constants"; import { Modal } from "@/components/Modal"; @@ -14,12 +19,19 @@ export function ProviderCreationModal({ onCancel, existingProvider, isProxy, + isAzure, + updateCurrentModel, }: { + updateCurrentModel: ( + newModel: string, + provider_type: EmbeddingProvider + ) => void; selectedProvider: CloudEmbeddingProvider; onConfirm: () => void; onCancel: () => void; existingProvider?: CloudEmbeddingProvider; isProxy?: boolean; + isAzure?: boolean; }) { const useFileUpload = selectedProvider.provider_type == "Google"; @@ -41,16 +53,24 @@ export function ProviderCreationModal({ const validationSchema = Yup.object({ provider_type: Yup.string().required("Provider type is required"), - api_key: isProxy - ? Yup.string() - : useFileUpload + api_key: + isProxy || isAzure ? Yup.string() - : Yup.string().required("API Key is required"), + : useFileUpload + ? Yup.string() + : Yup.string().required("API Key is required"), model_name: isProxy ? Yup.string().required("Model name is required") : Yup.string().nullable(), - api_url: isProxy - ? Yup.string().required("API URL is required") + api_url: + isProxy || isAzure + ? Yup.string().required("API URL is required") + : Yup.string(), + deployment_name: isAzure + ? Yup.string().required("Deployment name is required") + : Yup.string(), + api_version: isAzure + ? Yup.string().required("API Version is required") : Yup.string(), custom_config: Yup.array().of(Yup.array().of(Yup.string()).length(2)), }); @@ -101,6 +121,8 @@ export function ProviderCreationModal({ api_key: values.api_key, api_url: values.api_url, model_name: values.model_name, + api_version: values.api_version, + deployment_name: values.deployment_name, }), } ); @@ -118,6 +140,8 @@ export function ProviderCreationModal({ headers: { "Content-Type": "application/json" }, body: JSON.stringify({ ...values, + api_version: values.api_version, + deployment_name: values.deployment_name, provider_type: values.provider_type.toLowerCase().split(" ")[0], custom_config: customConfig, is_default_provider: false, @@ -125,6 +149,10 @@ export function ProviderCreationModal({ }), }); + if (isAzure) { + updateCurrentModel(values.model_name, EmbeddingProvider.AZURE); + } + if (!response.ok) { const errorData = await response.json(); throw new Error( @@ -147,7 +175,6 @@ export function ProviderCreationModal({ return ( here {" "} @@ -175,27 +203,47 @@ export function ProviderCreationModal({ className="cursor-pointer underline" target="_blank" href={selectedProvider.apiLink} + rel="noreferrer" > - {isProxy ? "API URL" : "API KEY"} + {isProxy || isAzure ? "API URL" : "API KEY"}
+ {(isProxy || isAzure) && ( + + )} + {isProxy && ( - <> - - - + + )} + + {isAzure && ( + + )} + + {isAzure && ( + )} {useFileUpload ? ( @@ -213,7 +261,9 @@ export function ProviderCreationModal({ ) : ( @@ -223,20 +273,21 @@ export function ProviderCreationModal({ href={selectedProvider.apiLink} target="_blank" className="underline cursor-pointer" + rel="noreferrer" > Learn more here
{errorMsg && ( - + {errorMsg} )}
diff --git a/web/src/app/admin/embeddings/pages/AdvancedEmbeddingFormPage.tsx b/web/src/app/admin/embeddings/pages/AdvancedEmbeddingFormPage.tsx index c965bdfabf8..bea80322e1a 100644 --- a/web/src/app/admin/embeddings/pages/AdvancedEmbeddingFormPage.tsx +++ b/web/src/app/admin/embeddings/pages/AdvancedEmbeddingFormPage.tsx @@ -1,11 +1,14 @@ -import React, { Dispatch, forwardRef, SetStateAction } from "react"; +import React, { forwardRef } from "react"; import { Formik, Form, FormikProps, FieldArray, Field } from "formik"; import * as Yup from "yup"; -import CredentialSubText from "@/components/credentials/CredentialFields"; import { TrashIcon } from "@/components/icons/icons"; import { FaPlus } from "react-icons/fa"; import { AdvancedSearchConfiguration } from "../interfaces"; -import { BooleanFormField } from "@/components/admin/connectors/Field"; +import { + BooleanFormField, + Label, + SubLabel, +} from "@/components/admin/connectors/Field"; import NumberInput from "../../connectors/[connector]/pages/ConnectorInput/NumberInput"; interface AdvancedEmbeddingFormPageProps { @@ -22,9 +25,6 @@ const AdvancedEmbeddingFormPage = forwardRef< >(({ updateAdvancedEmbeddingDetails, advancedEmbeddingDetails }, ref) => { return (
-

- Advanced Configuration -

{({ push, remove }) => (
+ + + Add additional languages to the search. {values.multilingual_expansion.map( (_: any, index: number) => (
diff --git a/web/src/app/admin/embeddings/pages/CloudEmbeddingPage.tsx b/web/src/app/admin/embeddings/pages/CloudEmbeddingPage.tsx index a6c71530f24..309c10a2a9e 100644 --- a/web/src/app/admin/embeddings/pages/CloudEmbeddingPage.tsx +++ b/web/src/app/admin/embeddings/pages/CloudEmbeddingPage.tsx @@ -1,7 +1,7 @@ "use client"; -import { Button, Card, Text, Title } from "@tremor/react"; - +import Text from "@/components/ui/text"; +import Title from "@/components/ui/title"; import { CloudEmbeddingProvider, CloudEmbeddingModel, @@ -10,42 +10,43 @@ import { EmbeddingModelDescriptor, EmbeddingProvider, LITELLM_CLOUD_PROVIDER, + AZURE_CLOUD_PROVIDER, } from "../../../../components/embedding/interfaces"; import { EmbeddingDetails } from "../EmbeddingModelSelectionForm"; import { FiExternalLink, FiInfo, FiTrash } from "react-icons/fi"; import { HoverPopup } from "@/components/HoverPopup"; import { Dispatch, SetStateAction, useEffect, useState } from "react"; -import { LiteLLMModelForm } from "@/components/embedding/LiteLLMModelForm"; +import { CustomEmbeddingModelForm } from "@/components/embedding/CustomEmbeddingModelForm"; import { deleteSearchSettings } from "./utils"; import { usePopup } from "@/components/admin/connectors/Popup"; import { DeleteEntityModal } from "@/components/modals/DeleteEntityModal"; +import { AdvancedSearchConfiguration } from "../interfaces"; +import CardSection from "@/components/admin/CardSection"; export default function CloudEmbeddingPage({ currentModel, embeddingProviderDetails, embeddingModelDetails, - newEnabledProviders, - newUnenabledProviders, setShowTentativeProvider, setChangeCredentialsProvider, setAlreadySelectedModel, setShowTentativeModel, setShowModelInQueue, + advancedEmbeddingDetails, }: { setShowModelInQueue: Dispatch>; setShowTentativeModel: Dispatch>; currentModel: EmbeddingModelDescriptor | CloudEmbeddingModel; setAlreadySelectedModel: Dispatch>; - newUnenabledProviders: string[]; embeddingModelDetails?: CloudEmbeddingModel[]; embeddingProviderDetails?: EmbeddingDetails[]; - newEnabledProviders: string[]; setShowTentativeProvider: React.Dispatch< React.SetStateAction >; setChangeCredentialsProvider: React.Dispatch< React.SetStateAction >; + advancedEmbeddingDetails: AdvancedSearchConfiguration; }) { function hasProviderTypeinArray( arr: Array<{ provider_type: string }>, @@ -56,31 +57,42 @@ export default function CloudEmbeddingPage({ ); } - let providers: CloudEmbeddingProviderFull[] = AVAILABLE_CLOUD_PROVIDERS.map( + const providers: CloudEmbeddingProviderFull[] = AVAILABLE_CLOUD_PROVIDERS.map( (model) => ({ ...model, configured: - !newUnenabledProviders.includes(model.provider_type) && - (newEnabledProviders.includes(model.provider_type) || - (embeddingProviderDetails && - hasProviderTypeinArray( - embeddingProviderDetails, - model.provider_type - ))!), + embeddingProviderDetails && + hasProviderTypeinArray(embeddingProviderDetails, model.provider_type), }) ); const [liteLLMProvider, setLiteLLMProvider] = useState< EmbeddingDetails | undefined >(undefined); + const [azureProvider, setAzureProvider] = useState< + EmbeddingDetails | undefined + >(undefined); + useEffect(() => { - const foundProvider = embeddingProviderDetails?.find( + const liteLLMProvider = embeddingProviderDetails?.find( (provider) => provider.provider_type === EmbeddingProvider.LITELLM.toLowerCase() ); - setLiteLLMProvider(foundProvider); + setLiteLLMProvider(liteLLMProvider); + const azureProvider = embeddingProviderDetails?.find( + (provider) => + provider.provider_type === EmbeddingProvider.AZURE.toLowerCase() + ); + setAzureProvider(azureProvider); }, [embeddingProviderDetails]); + const isAzureConfigured = azureProvider !== undefined; + + // Get details of the configured Azure provider + const azureProviderDetails = embeddingProviderDetails?.find( + (provider) => provider.provider_type.toLowerCase() === "azure" + ); + return (
@@ -198,7 +210,7 @@ export default function CloudEmbeddingPage({ )} {!liteLLMProvider && ( - <Card className="mt-2 w-full max-w-4xl bg-gray-50 border border-gray-200"> + <CardSection className="mt-2 w-full max-w-4xl bg-gray-50 border border-gray-200"> <div className="p-4"> <Text className="text-lg font-semibold mb-2"> API URL Required @@ -216,7 +228,7 @@ export default function CloudEmbeddingPage({ </Text> </div> </div> - </Card> + </CardSection> )} {liteLLMProvider && ( <> @@ -241,14 +253,15 @@ export default function CloudEmbeddingPage({ ))} </div> - <Card + <CardSection className={`mt-2 w-full max-w-4xl ${ currentModel.provider_type === EmbeddingProvider.LITELLM ? "border-2 border-blue-500" : "" }`} > - <LiteLLMModelForm + <CustomEmbeddingModelForm + embeddingType={EmbeddingProvider.LITELLM} provider={liteLLMProvider} currentValues={ currentModel.provider_type === EmbeddingProvider.LITELLM @@ -257,11 +270,131 @@ export default function CloudEmbeddingPage({ } setShowTentativeModel={setShowTentativeModel} /> - </Card> + </CardSection> </> )} </div> </div> + + <Text className="mt-6"> + You can also use Azure OpenAI models for embeddings. Azure requires + separate configuration for each model. + </Text> + + <div key={AZURE_CLOUD_PROVIDER.provider_type} className="mt-4 w-full"> + <div className="flex items-center mb-2"> + {AZURE_CLOUD_PROVIDER.icon({ size: 40 })} + <h2 className="ml-2 mt-2 text-xl font-bold"> + {AZURE_CLOUD_PROVIDER.provider_type}{" "} + </h2> + <HoverPopup + mainContent={ + <FiInfo className="ml-2 mt-2 cursor-pointer" size={18} /> + } + popupContent={ + <div className="text-sm text-text-800 w-52"> + <div className="my-auto"> + {AZURE_CLOUD_PROVIDER.description} + </div> + </div> + } + style="dark" + /> + </div> + </div> + + <div className="w-full flex flex-col items-start"> + {!isAzureConfigured ? ( + <> + <button + onClick={() => setShowTentativeProvider(AZURE_CLOUD_PROVIDER)} + className="mb-2 px-4 py-2 bg-blue-500 text-white rounded hover:bg-blue-600 text-sm cursor-pointer" + > + Configure Azure OpenAI + </button> + <div className="mt-2 w-full max-w-4xl"> + <CardSection className="p-4 border border-gray-200 rounded-lg shadow-sm"> + <Text className="text-base font-medium mb-2"> + Configure Azure OpenAI for Embeddings + </Text> + <Text className="text-sm text-gray-600 mb-3"> + Click "Configure Azure OpenAI" to set up Azure + OpenAI for embeddings. + </Text> + <div className="flex items-center text-sm text-gray-700"> + <FiInfo className="text-gray-400 mr-2" size={16} /> + <Text> + You'll need: API version, base URL, API key, model + name, and deployment name. + </Text> + </div> + </CardSection> + </div> + </> + ) : ( + <> + <div className="mb-6 w-full"> + <Text className="text-lg font-semibold mb-3"> + Current Azure Configuration + </Text> + + {azureProviderDetails ? ( + <CardSection className="bg-white shadow-sm border border-gray-200 rounded-lg"> + <div className="p-4 space-y-3"> + <div className="flex justify-between"> + <span className="font-medium">API Version:</span> + <span>{azureProviderDetails.api_version}</span> + </div> + <div className="flex justify-between"> + <span className="font-medium">Base URL:</span> + <span>{azureProviderDetails.api_url}</span> + </div> + <div className="flex justify-between"> + <span className="font-medium">Deployment Name:</span> + <span>{azureProviderDetails.deployment_name}</span> + </div> + </div> + <button + onClick={() => + setChangeCredentialsProvider(AZURE_CLOUD_PROVIDER) + } + className="mt-2 px-4 py-2 bg-red-500 text-white rounded hover:bg-red-600 text-sm" + > + Delete Current Azure Provider + </button> + </CardSection> + ) : ( + <CardSection className="bg-gray-50 border border-gray-200 rounded-lg"> + <div className="p-4 text-gray-500 text-center"> + No Azure provider has been configured yet. + </div> + </CardSection> + )} + </div> + + <CardSection + className={`mt-2 w-full max-w-4xl ${ + currentModel.provider_type === EmbeddingProvider.AZURE + ? "border-2 border-blue-500" + : "" + }`} + > + {azureProvider && ( + <CustomEmbeddingModelForm + embeddingType={EmbeddingProvider.AZURE} + provider={azureProvider} + currentValues={ + currentModel.provider_type === EmbeddingProvider.AZURE + ? (currentModel as CloudEmbeddingModel) + : null + } + setShowTentativeModel={setShowTentativeModel} + /> + )} + </CardSection> + </> + )} + </div> </div> </div> ); diff --git a/web/src/app/admin/embeddings/pages/EmbeddingFormPage.tsx b/web/src/app/admin/embeddings/pages/EmbeddingFormPage.tsx index d6a241400e8..8b4f2955789 100644 --- a/web/src/app/admin/embeddings/pages/EmbeddingFormPage.tsx +++ b/web/src/app/admin/embeddings/pages/EmbeddingFormPage.tsx @@ -3,8 +3,9 @@ import { usePopup } from "@/components/admin/connectors/Popup"; import { HealthCheckBanner } from "@/components/health/healthcheck"; import { EmbeddingModelSelection } from "../EmbeddingModelSelectionForm"; -import { useEffect, useState } from "react"; -import { Button, Card, Text } from "@tremor/react"; +import { useEffect, useMemo, useState } from "react"; +import Text from "@/components/ui/text"; +import { Button } from "@/components/ui/button"; import { ArrowLeft, ArrowRight, WarningCircle } from "@phosphor-icons/react"; import { CloudEmbeddingModel, @@ -13,7 +14,7 @@ import { } from "@/components/embedding/interfaces"; import { errorHandlingFetcher } from "@/lib/fetcher"; import { ErrorCallout } from "@/components/ErrorCallout"; -import useSWR, { mutate } from "swr"; +import useSWR from "swr"; import { ThreeDotsLoader } from "@/components/Loading"; import AdvancedEmbeddingFormPage from "./AdvancedEmbeddingFormPage"; import { @@ -25,17 +26,16 @@ import RerankingDetailsForm from "../RerankingFormPage"; import { useEmbeddingFormContext } from "@/components/context/EmbeddingContext"; import { Modal } from "@/components/Modal"; +import { useRouter } from "next/navigation"; +import CardSection from "@/components/admin/CardSection"; +import { CardDescription } from "@/components/ui/card"; export default function EmbeddingForm() { const { formStep, nextFormStep, prevFormStep } = useEmbeddingFormContext(); const { popup, setPopup } = usePopup(); + const router = useRouter(); const [advancedEmbeddingDetails, setAdvancedEmbeddingDetails] = useState<AdvancedSearchConfiguration>({ - model_name: "", - model_dim: 0, - normalize: false, - query_prefix: "", - passage_prefix: "", index_name: "", multipass_indexing: true, multilingual_expansion: [], @@ -107,11 +107,6 @@ export default function EmbeddingForm() { useEffect(() => { if (searchSettings) { setAdvancedEmbeddingDetails({ - model_name: searchSettings.model_name, - model_dim: searchSettings.model_dim, - normalize: searchSettings.normalize, - query_prefix: searchSettings.query_prefix, - passage_prefix: searchSettings.passage_prefix, index_name: searchSettings.index_name, multipass_indexing: searchSettings.multipass_indexing, multilingual_expansion: searchSettings.multilingual_expansion, @@ -150,11 +145,68 @@ export default function EmbeddingForm() { } }, [currentEmbeddingModel]); - useEffect(() => { - if (currentEmbeddingModel) { - setSelectedProvider(currentEmbeddingModel); + const handleReindex = async () => { + const update = await updateSearch(); + if (update) { + await onConfirm(); } - }, [currentEmbeddingModel]); + }; + + const needsReIndex = + currentEmbeddingModel != selectedProvider || + searchSettings?.multipass_indexing != + advancedEmbeddingDetails.multipass_indexing; + + const ReIndexingButton = useMemo(() => { + const ReIndexingButtonComponent = ({ + needsReIndex, + }: { + needsReIndex: boolean; + }) => { + return needsReIndex ? ( + <div className="flex mx-auto gap-x-1 ml-auto items-center"> + <button + className="enabled:cursor-pointer disabled:bg-accent/50 disabled:cursor-not-allowed bg-accent flex gap-x-1 items-center text-white py-2.5 px-3.5 text-sm font-regular rounded-sm" + onClick={handleReindex} + > + Re-index + </button> + <div className="relative group"> + <WarningCircle + className="text-text-800 cursor-help" + size={20} + weight="fill" + /> + <div className="absolute z-10 invisible group-hover:visible bg-background-800 text-text-200 text-sm rounded-md shadow-md p-2 right-0 mt-1 w-64"> + <p className="font-semibold mb-2">Needs re-indexing due to:</p> + <ul className="list-disc pl-5"> + {currentEmbeddingModel != selectedProvider && ( + <li>Changed embedding provider</li> + )} + {searchSettings?.multipass_indexing != + advancedEmbeddingDetails.multipass_indexing && ( + <li>Multipass indexing modification</li> + )} + </ul> + </div> + </div> + </div> + ) : ( + <button + className="enabled:cursor-pointer ml-auto disabled:bg-accent/50 disabled:cursor-not-allowed bg-accent flex mx-auto gap-x-1 items-center text-white py-2.5 px-3.5 text-sm font-regular rounded-sm" + onClick={async () => { + updateSearch(); + navigateToEmbeddingPage("search settings"); + }} + > + Update Search + </button> + ); + }; + ReIndexingButtonComponent.displayName = "ReIndexingButton"; + return ReIndexingButtonComponent; + }, [needsReIndex]); + if (!selectedProvider) { return <ThreeDotsLoader />; } @@ -162,21 +214,24 @@ export default function EmbeddingForm() { return <ErrorCallout errorTitle="Failed to fetch embedding model status" />; } + const updateCurrentModel = (newModel: string) => { + setAdvancedEmbeddingDetails((values) => ({ + ...values, + model_name: newModel, + })); + }; + const updateSearch = async () => { - let values: SavedSearchSettings = { + const values: SavedSearchSettings = { ...rerankingDetails, ...advancedEmbeddingDetails, + ...selectedProvider, provider_type: selectedProvider.provider_type?.toLowerCase() as EmbeddingProvider | null, }; const response = await updateSearchSettings(values); if (response.ok) { - setPopup({ - message: "Updated search settings succesffuly", - type: "success", - }); - mutate("/api/search-settings/get-current-search-settings"); return true; } else { setPopup({ message: "Failed to update search settings", type: "error" }); @@ -184,6 +239,10 @@ export default function EmbeddingForm() { } }; + const navigateToEmbeddingPage = (changedResource: string) => { + router.push("/admin/configuration/search?message=search-settings"); + }; + const onConfirm = async () => { if (!selectedProvider) { return; @@ -192,12 +251,13 @@ export default function EmbeddingForm() { // We use a spread operation to merge properties from multiple objects into a single object. // Advanced embedding details may update default values. + // Do NOT modify the order unless you are positive the new hierarchy is correct. if (selectedProvider.provider_type != null) { // This is a cloud model newModel = { - ...rerankingDetails, - ...advancedEmbeddingDetails, ...selectedProvider, + ...advancedEmbeddingDetails, + ...rerankingDetails, provider_type: (selectedProvider.provider_type ?.toLowerCase() @@ -207,12 +267,12 @@ export default function EmbeddingForm() { // This is a locally hosted model newModel = { ...selectedProvider, - ...rerankingDetails, ...advancedEmbeddingDetails, - ...selectedProvider, + ...rerankingDetails, provider_type: null, }; } + newModel.index_name = null; const response = await fetch( @@ -227,14 +287,7 @@ export default function EmbeddingForm() { ); if (response.ok) { - setPopup({ - message: "Changed provider successfully. Redirecting to embedding page", - type: "success", - }); - mutate("/api/search-settings/get-secondary-search-settings"); - setTimeout(() => { - window.open("/admin/configuration/search", "_self"); - }, 2000); + navigateToEmbeddingPage("embedding model"); } else { setPopup({ message: "Failed to update embedding model", type: "error" }); @@ -242,57 +295,6 @@ export default function EmbeddingForm() { } }; - const needsReIndex = - currentEmbeddingModel != selectedProvider || - searchSettings?.multipass_indexing != - advancedEmbeddingDetails.multipass_indexing; - - const ReIndexingButton = ({ needsReIndex }: { needsReIndex: boolean }) => { - return needsReIndex ? ( - <div className="flex mx-auto gap-x-1 ml-auto items-center"> - <button - className="enabled:cursor-pointer disabled:bg-accent/50 disabled:cursor-not-allowed bg-accent flex gap-x-1 items-center text-white py-2.5 px-3.5 text-sm font-regular rounded-sm" - onClick={async () => { - const update = await updateSearch(); - if (update) { - await onConfirm(); - } - }} - > - Re-index - </button> - <div className="relative group"> - <WarningCircle - className="text-text-800 cursor-help" - size={20} - weight="fill" - /> - <div className="absolute z-10 invisible group-hover:visible bg-background-800 text-text-200 text-sm rounded-md shadow-md p-2 right-0 mt-1 w-64"> - <p className="font-semibold mb-2">Needs re-indexing due to:</p> - <ul className="list-disc pl-5"> - {currentEmbeddingModel != selectedProvider && ( - <li>Changed embedding provider</li> - )} - {searchSettings?.multipass_indexing != - advancedEmbeddingDetails.multipass_indexing && ( - <li>Multipass indexing modification</li> - )} - </ul> - </div> - </div> - </div> - ) : ( - <button - className="enabled:cursor-pointer ml-auto disabled:bg-accent/50 disabled:cursor-not-allowed bg-accent flex mx-auto gap-x-1 items-center text-white py-2.5 px-3.5 text-sm font-regular rounded-sm" - onClick={async () => { - updateSearch(); - }} - > - Update Search - </button> - ); - }; - return ( <div className="mx-auto mb-8 w-full"> {popup} @@ -314,15 +316,17 @@ export default function EmbeddingForm() { take hours or days. You can monitor the progress of the re-indexing on this page while the models are being switched. </Text> - <Card> + <CardSection> <EmbeddingModelSelection + updateCurrentModel={updateCurrentModel} setModelTab={setModelTab} modelTab={modelTab} selectedProvider={selectedProvider} currentEmbeddingModel={currentEmbeddingModel} updateSelectedProvider={updateSelectedProvider} + advancedEmbeddingDetails={advancedEmbeddingDetails} /> - </Card> + </CardSection> <div className="mt-4 flex w-full justify-end"> <button className="enabled:cursor-pointer disabled:cursor-not-allowed disabled:bg-blue-200 bg-blue-400 flex gap-x-1 items-center text-white py-2.5 px-3.5 text-sm font-regular rounded-sm" @@ -359,7 +363,10 @@ export default function EmbeddingForm() { <li>Nomic nomic-embed-text-v1 for self-hosted</li> </div> <div className="flex mt-4 justify-between"> - <Button color="green" onClick={() => setShowPoorModel(false)}> + <Button + variant="secondary" + onClick={() => setShowPoorModel(false)} + > Cancel update </Button> <Button @@ -377,7 +384,17 @@ export default function EmbeddingForm() { {formStep == 1 && ( <> - <Card> + <h2 className="text-2xl font-bold mb-4 text-text-800"> + Select a Reranking Model + </h2> + <Text className="mb-4"> + Updating the reranking model does not require re-indexing + documents. The reranker helps improve search quality by reordering + results after the initial embedding search. Changes will take + effect immediately for all new searches. + </Text> + + <CardSection> <RerankingDetailsForm setModelTab={setModelTab} modelTab={ @@ -389,9 +406,9 @@ export default function EmbeddingForm() { originalRerankingDetails={originalRerankingDetails} setRerankingDetails={setRerankingDetails} /> - </Card> + </CardSection> - <div className={` mt-4 w-full grid grid-cols-3`}> + <div className={`mt-4 w-full grid grid-cols-3`}> <button className="border-border-dark mr-auto border flex gap-x-1 items-center text-text p-2.5 text-sm font-regular rounded-sm " onClick={() => prevFormStep()} @@ -405,7 +422,6 @@ export default function EmbeddingForm() { <div className="flex w-full justify-end"> <button className={`enabled:cursor-pointer enabled:hover:underline disabled:cursor-not-allowed mt-auto enabled:text-text-600 disabled:text-text-400 ml-auto flex gap-x-1 items-center py-2.5 px-3.5 text-sm font-regular rounded-sm`} - // disabled={!isFormValid} onClick={() => { nextFormStep(); }} @@ -419,12 +435,20 @@ export default function EmbeddingForm() { )} {formStep == 2 && ( <> - <Card> + <h2 className="text-2xl font-bold mb-4 text-text-800"> + Advanced Search Configuration + </h2> + <Text className="mb-4"> + Configure advanced embedding and search settings. Changes will + require re-indexing documents. + </Text> + + <CardSection> <AdvancedEmbeddingFormPage advancedEmbeddingDetails={advancedEmbeddingDetails} updateAdvancedEmbeddingDetails={updateAdvancedEmbeddingDetails} /> - </Card> + </CardSection> <div className={`mt-4 grid grid-cols-3 w-full `}> <button diff --git a/web/src/app/admin/embeddings/pages/OpenEmbeddingPage.tsx b/web/src/app/admin/embeddings/pages/OpenEmbeddingPage.tsx index 2e28ce8e4b8..54317605541 100644 --- a/web/src/app/admin/embeddings/pages/OpenEmbeddingPage.tsx +++ b/web/src/app/admin/embeddings/pages/OpenEmbeddingPage.tsx @@ -1,5 +1,8 @@ "use client"; -import { Button, Card, Text } from "@tremor/react"; + +import { Button } from "@/components/ui/button"; +import Text from "@/components/ui/text"; +import Title from "@/components/ui/title"; import { ModelSelector } from "../../../../components/embedding/ModelSelector"; import { AVAILABLE_MODELS, @@ -8,7 +11,7 @@ import { } from "../../../../components/embedding/interfaces"; import { CustomModelForm } from "../../../../components/embedding/CustomModelForm"; import { useState } from "react"; -import { Title } from "@tremor/react"; +import CardSection from "@/components/admin/CardSection"; export default function OpenEmbeddingPage({ onSelectOpenSource, selectedProvider, @@ -34,7 +37,12 @@ export default function OpenEmbeddingPage({ <Text className="mt-6"> Alternatively, (if you know what you're doing) you can specify a{" "} - <a target="_blank" href="https://www.sbert.net/" className="text-link"> + <a + target="_blank" + href="https://www.sbert.net/" + className="text-link" + rel="noreferrer" + > SentenceTransformers </a> -compatible model of your choice below. The rough list of supported @@ -43,6 +51,7 @@ export default function OpenEmbeddingPage({ target="_blank" href="https://huggingface.co/models?library=sentence-transformers&sort=trending" className="text-link" + rel="noreferrer" > here </a> @@ -53,15 +62,19 @@ export default function OpenEmbeddingPage({ to the Danswer team. </Text> {!configureModel && ( - <Button onClick={() => setConfigureModel(true)} className="mt-4"> + <Button + onClick={() => setConfigureModel(true)} + className="mt-4" + variant="secondary" + > Configure custom model </Button> )} {configureModel && ( <div className="w-full flex"> - <Card className="mt-4 2xl:w-4/6 mx-auto"> + <CardSection className="mt-4 2xl:w-4/6 mx-auto"> <CustomModelForm onSubmit={onSelectOpenSource} /> - </Card> + </CardSection> </div> )} </div> diff --git a/web/src/app/admin/indexing/[id]/IndexAttemptErrorsTable.tsx b/web/src/app/admin/indexing/[id]/IndexAttemptErrorsTable.tsx index 6ee8efef511..de68a46069b 100644 --- a/web/src/app/admin/indexing/[id]/IndexAttemptErrorsTable.tsx +++ b/web/src/app/admin/indexing/[id]/IndexAttemptErrorsTable.tsx @@ -9,12 +9,12 @@ import { TableBody, TableCell, TableHead, - TableHeaderCell, TableRow, - Text, -} from "@tremor/react"; +} from "@/components/ui/table"; +import Text from "@/components/ui/text"; import { useState } from "react"; import { IndexAttemptError } from "./types"; +import { TableHeader } from "@/components/ui/table"; const NUM_IN_PAGE = 8; @@ -100,14 +100,14 @@ export function IndexAttemptErrorsTable({ )} <Table> - <TableHead> + <TableHeader> <TableRow> - <TableHeaderCell>Timestamp</TableHeaderCell> - <TableHeaderCell>Batch Number</TableHeaderCell> - <TableHeaderCell>Document Summaries</TableHeaderCell> - <TableHeaderCell>Error Message</TableHeaderCell> + <TableHead>Timestamp</TableHead> + <TableHead>Batch Number</TableHead> + <TableHead>Document Summaries</TableHead> + <TableHead>Error Message</TableHead> </TableRow> - </TableHead> + </TableHeader> <TableBody> {indexAttemptErrors .slice(NUM_IN_PAGE * (page - 1), NUM_IN_PAGE * page) diff --git a/web/src/app/admin/indexing/[id]/page.tsx b/web/src/app/admin/indexing/[id]/page.tsx index 51fe694541c..75aa482a00f 100644 --- a/web/src/app/admin/indexing/[id]/page.tsx +++ b/web/src/app/admin/indexing/[id]/page.tsx @@ -1,11 +1,11 @@ "use client"; +import { use } from "react"; import { BackButton } from "@/components/BackButton"; import { ErrorCallout } from "@/components/ErrorCallout"; import { ThreeDotsLoader } from "@/components/Loading"; import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ValidSources } from "@/lib/types"; -import { Title } from "@tremor/react"; +import Title from "@/components/ui/title"; import useSWR from "swr"; import { IndexAttemptErrorsTable } from "./IndexAttemptErrorsTable"; import { buildIndexingErrorsUrl } from "./lib"; @@ -47,7 +47,8 @@ function Main({ id }: { id: number }) { ); } -export default function Page({ params }: { params: { id: string } }) { +export default function Page(props: { params: Promise<{ id: string }> }) { + const params = use(props.params); const id = parseInt(params.id); return ( diff --git a/web/src/app/admin/indexing/status/CCPairIndexingStatusTable.tsx b/web/src/app/admin/indexing/status/CCPairIndexingStatusTable.tsx index 2b78e11de2d..3b04e4a65d7 100644 --- a/web/src/app/admin/indexing/status/CCPairIndexingStatusTable.tsx +++ b/web/src/app/admin/indexing/status/CCPairIndexingStatusTable.tsx @@ -2,12 +2,13 @@ import React, { useState, useMemo, useEffect, useRef } from "react"; import { Table, TableRow, - TableHeaderCell, + TableHead, TableBody, TableCell, - Badge, - Button, -} from "@tremor/react"; + TableHeader, +} from "@/components/ui/table"; +import { Badge } from "@/components/ui/badge"; +import { Button } from "@/components/ui/button"; import { IndexAttemptStatus } from "@/components/Status"; import { timeAgo } from "@/lib/time"; import { @@ -23,8 +24,15 @@ import { FiSettings, FiLock, FiUnlock, + FiRefreshCw, + FiPauseCircle, } from "react-icons/fi"; -import { Tooltip } from "@/components/tooltip/Tooltip"; +import { + Tooltip, + TooltipContent, + TooltipProvider, + TooltipTrigger, +} from "@/components/ui/tooltip"; import { SourceIcon } from "@/components/SourceIcon"; import { getSourceDisplayName } from "@/lib/sources"; import { CustomTooltip } from "@/components/tooltip/CustomTooltip"; @@ -74,21 +82,26 @@ function SummaryRow({ <TableCell> <div className="text-sm text-gray-500">Active Connectors</div> - <Tooltip - content={`${summary.active} out of ${summary.count} connectors are active`} - > - <div className="flex items-center mt-1"> - <div className="w-full bg-gray-200 rounded-full h-2 mr-2"> - <div - className="bg-green-500 h-2 rounded-full" - style={{ width: `${activePercentage}%` }} - ></div> - </div> - <span className="text-sm font-medium whitespace-nowrap"> - {summary.active} ({activePercentage.toFixed(0)}%) - </span> - </div> - </Tooltip> + <TooltipProvider> + <Tooltip> + <TooltipTrigger asChild> + <div className="flex items-center mt-1"> + <div className="w-full bg-gray-200 rounded-full h-2 mr-2"> + <div + className="bg-green-500 h-2 rounded-full" + style={{ width: `${activePercentage}%` }} + ></div> + </div> + <span className="text-sm font-medium whitespace-nowrap"> + {summary.active} ({activePercentage.toFixed(0)}%) + </span> + </div> + </TooltipTrigger> + <TooltipContent> + {summary.active} out of {summary.count} connectors are active + </TooltipContent> + </Tooltip> + </TooltipProvider> </TableCell> {isPaidEnterpriseFeaturesEnabled && ( @@ -143,30 +156,14 @@ function ConnectorRow({ ccPairsIndexingStatus.cc_pair_status === ConnectorCredentialPairStatus.DELETING ) { - return ( - <Badge - color="red" - className="w-fit px-2 py-1 rounded-full border border-red-500" - > - <div className="flex text-xs items-center gap-x-1"> - <div className="w-3 h-3 rounded-full bg-red-500"></div> - Deleting - </div> - </Badge> - ); + return <Badge variant="destructive">Deleting</Badge>; } else if ( ccPairsIndexingStatus.cc_pair_status === ConnectorCredentialPairStatus.PAUSED ) { return ( - <Badge - color="yellow" - className="w-fit px-2 py-1 rounded-full border border-yellow-500" - > - <div className="flex text-xs items-center gap-x-1"> - <div className="w-3 h-3 rounded-full bg-yellow-500"></div> - Paused - </div> + <Badge icon={FiPauseCircle} variant="paused"> + Paused </Badge> ); } @@ -175,38 +172,20 @@ function ConnectorRow({ switch (ccPairsIndexingStatus.last_status) { case "in_progress": return ( - <Badge - color="green" - className="w-fit px-2 py-1 rounded-full border border-green-500" - > - <div className="flex text-xs items-center gap-x-1"> - <div className="w-3 h-3 rounded-full bg-green-500"></div> - Indexing - </div> + <Badge circle variant="success"> + Indexing </Badge> ); case "not_started": return ( - <Badge - color="purple" - className="w-fit px-2 py-1 rounded-full border border-purple-500" - > - <div className="flex text-xs items-center gap-x-1"> - <div className="w-3 h-3 rounded-full bg-purple-500"></div> - Scheduled - </div> + <Badge circle variant="purple"> + Scheduled </Badge> ); default: return ( - <Badge - color="green" - className="w-fit px-2 py-1 rounded-full border border-green-500" - > - <div className="flex text-xs items-center gap-x-1"> - <div className="w-3 h-3 rounded-full bg-green-500"></div> - Active - </div> + <Badge circle variant="success"> + Active </Badge> ); } @@ -215,8 +194,8 @@ function ConnectorRow({ return ( <TableRow className={`hover:bg-hover-light ${ - invisible ? "invisible h-0 !-mb-10" : "border border-border !border-b" - } w-full cursor-pointer relative`} + invisible ? "invisible !h-0 !-mb-10" : "!border !border-border" + } w-full cursor-pointer relative `} onClick={() => { router.push(`/admin/connector/${ccPairsIndexingStatus.cc_pair_id}`); }} @@ -233,15 +212,21 @@ function ConnectorRow({ {isPaidEnterpriseFeaturesEnabled && ( <TableCell> {ccPairsIndexingStatus.access_type === "public" ? ( + <Badge variant={isEditable ? "success" : "default"} icon={FiUnlock}> + Public + </Badge> + ) : ccPairsIndexingStatus.access_type === "sync" ? ( <Badge - size="md" - color={isEditable ? "green" : "gray"} - icon={FiUnlock} + variant={isEditable ? "orange" : "default"} + icon={FiRefreshCw} > - Public + Sync </Badge> ) : ( - <Badge size="md" color={isEditable ? "blue" : "gray"} icon={FiLock}> + <Badge + variant={isEditable ? "in_progress" : "default"} + icon={FiLock} + > Private </Badge> )} @@ -252,7 +237,6 @@ function ConnectorRow({ <IndexAttemptStatus status={ccPairsIndexingStatus.last_finished_status || null} errorMsg={ccPairsIndexingStatus?.latest_index_attempt?.error_msg} - size="xs" /> </TableCell> <TableCell> @@ -384,11 +368,11 @@ export function CCPairIndexingStatusTable({ }; const shouldExpand = Object.values(connectorsToggled).filter(Boolean).length < - sortedSources.length / 2; + sortedSources.length; return ( - <div className="-mt-20"> - <Table> + <Table> + <TableHeader> <ConnectorRow invisible ccPairsIndexingStatus={{ @@ -430,91 +414,88 @@ export function CCPairIndexingStatusTable({ error_msg: "", deletion_attempt: null, is_deletable: true, + in_progress: false, groups: [], // Add this line }} isEditable={false} /> - <div className="flex items-center w-0 mt-4 gap-x-2"> - <input - type="text" - ref={searchInputRef} - placeholder="Search connectors..." - value={searchTerm} - onChange={(e) => setSearchTerm(e.target.value)} - className="ml-1 w-96 h-9 flex-none rounded-md border border-border bg-background-50 px-3 py-1 text-sm shadow-sm transition-colors placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring" - /> - - <Button className="h-9" onClick={() => toggleSources()}> - {!shouldExpand ? "Collapse All" : "Expand All"} - </Button> - </div> + </TableHeader> + <div className="flex -mt-12 items-center w-0 m4 gap-x-2"> + <input + type="text" + ref={searchInputRef} + placeholder="Search connectors..." + value={searchTerm} + onChange={(e) => setSearchTerm(e.target.value)} + className="ml-1 w-96 h-9 flex-none rounded-md bg-background-50 px-3 py-1 text-sm shadow-sm transition-colors placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring" + /> - <TableBody> - {sortedSources - .filter( - (source) => - source != "not_applicable" && source != "ingestion_api" - ) - .map((source, ind) => { - const sourceMatches = source - .toLowerCase() - .includes(searchTerm.toLowerCase()); - const matchingConnectors = groupedStatuses[source].filter( - (status) => - (status.name || "") - .toLowerCase() - .includes(searchTerm.toLowerCase()) - ); - if (sourceMatches || matchingConnectors.length > 0) { - return ( - <React.Fragment key={ind}> - <br className="mt-4" /> - - <SummaryRow - source={source} - summary={groupSummaries[source]} - isOpen={connectorsToggled[source] || false} - onToggle={() => toggleSource(source)} - /> - - {connectorsToggled[source] && ( - <> - <TableRow className="border border-border"> - <TableHeaderCell>Name</TableHeaderCell> - <TableHeaderCell>Last Indexed</TableHeaderCell> - <TableHeaderCell>Activity</TableHeaderCell> - {isPaidEnterpriseFeaturesEnabled && ( - <TableHeaderCell>Permissions</TableHeaderCell> + <Button className="h-9" onClick={() => toggleSources()}> + {!shouldExpand ? "Collapse All" : "Expand All"} + </Button> + </div> + + <TableBody> + {sortedSources + .filter( + (source) => source != "not_applicable" && source != "ingestion_api" + ) + .map((source, ind) => { + const sourceMatches = source + .toLowerCase() + .includes(searchTerm.toLowerCase()); + const matchingConnectors = groupedStatuses[source].filter( + (status) => + (status.name || "") + .toLowerCase() + .includes(searchTerm.toLowerCase()) + ); + if (sourceMatches || matchingConnectors.length > 0) { + return ( + <React.Fragment key={ind}> + <br className="mt-4" /> + + <SummaryRow + source={source} + summary={groupSummaries[source]} + isOpen={connectorsToggled[source] || false} + onToggle={() => toggleSource(source)} + /> + + {connectorsToggled[source] && ( + <> + <TableRow className="border border-border"> + <TableHead>Name</TableHead> + <TableHead>Last Indexed</TableHead> + <TableHead>Activity</TableHead> + {isPaidEnterpriseFeaturesEnabled && ( + <TableHead>Permissions</TableHead> + )} + <TableHead>Total Docs</TableHead> + <TableHead>Last Status</TableHead> + <TableHead></TableHead> + </TableRow> + {(sourceMatches + ? groupedStatuses[source] + : matchingConnectors + ).map((ccPairsIndexingStatus) => ( + <ConnectorRow + key={ccPairsIndexingStatus.cc_pair_id} + ccPairsIndexingStatus={ccPairsIndexingStatus} + isEditable={editableCcPairsIndexingStatuses.some( + (e) => + e.cc_pair_id === ccPairsIndexingStatus.cc_pair_id )} - <TableHeaderCell>Total Docs</TableHeaderCell> - <TableHeaderCell>Last Status</TableHeaderCell> - <TableHeaderCell></TableHeaderCell> - </TableRow> - {(sourceMatches - ? groupedStatuses[source] - : matchingConnectors - ).map((ccPairsIndexingStatus) => ( - <ConnectorRow - key={ccPairsIndexingStatus.cc_pair_id} - ccPairsIndexingStatus={ccPairsIndexingStatus} - isEditable={editableCcPairsIndexingStatuses.some( - (e) => - e.cc_pair_id === - ccPairsIndexingStatus.cc_pair_id - )} - /> - ))} - </> - )} - </React.Fragment> - ); - } - return null; - })} - </TableBody> - - <div className="invisible w-full pb-40" /> - </Table> - </div> + /> + ))} + </> + )} + </React.Fragment> + ); + } + return null; + })} + </TableBody> + </Table> ); } diff --git a/web/src/app/admin/indexing/status/page.tsx b/web/src/app/admin/indexing/status/page.tsx index f5d64d3ac3a..7e6d8ec94c4 100644 --- a/web/src/app/admin/indexing/status/page.tsx +++ b/web/src/app/admin/indexing/status/page.tsx @@ -1,16 +1,14 @@ "use client"; -import useSWR from "swr"; - import { LoadingAnimation } from "@/components/Loading"; import { NotebookIcon } from "@/components/icons/icons"; -import { errorHandlingFetcher } from "@/lib/fetcher"; -import { ConnectorIndexingStatus } from "@/lib/types"; import { CCPairIndexingStatusTable } from "./CCPairIndexingStatusTable"; import { AdminPageTitle } from "@/components/admin/Title"; import Link from "next/link"; -import { Button, Text } from "@tremor/react"; +import Text from "@/components/ui/text"; import { useConnectorCredentialIndexingStatus } from "@/lib/hooks"; +import { usePopupFromQuery } from "@/components/popup/PopupFromQuery"; +import { Button } from "@/components/ui/button"; function Main() { const { @@ -75,19 +73,30 @@ function Main() { } export default function Status() { + const { popup } = usePopupFromQuery({ + "connector-created": { + message: "Connector created successfully", + type: "success", + }, + "connector-deleted": { + message: "Connector deleted successfully", + type: "success", + }, + }); + return ( <div className="mx-auto container"> + {popup} <AdminPageTitle icon={<NotebookIcon size={32} />} title="Existing Connectors" farRightElement={ <Link href="/admin/add-connector"> - <Button color="green" size="xs"> - Add Connector - </Button> + <Button variant="success-reverse">Add Connector</Button> </Link> } /> + <Main /> </div> ); diff --git a/web/src/app/admin/prompt-library/modals/AddPromptModal.tsx b/web/src/app/admin/prompt-library/modals/AddPromptModal.tsx index 0d385f48479..1d6ca466367 100644 --- a/web/src/app/admin/prompt-library/modals/AddPromptModal.tsx +++ b/web/src/app/admin/prompt-library/modals/AddPromptModal.tsx @@ -1,12 +1,12 @@ import React from "react"; -import { Formik, Form, Field, ErrorMessage } from "formik"; +import { Formik, Form } from "formik"; import * as Yup from "yup"; -import { ModalWrapper } from "@/components/modals/ModalWrapper"; -import { Button, Textarea, TextInput } from "@tremor/react"; +import { Button } from "@/components/ui/button"; import { BookstackIcon } from "@/components/icons/icons"; import { AddPromptModalProps } from "../interfaces"; import { TextFormField } from "@/components/admin/connectors/Field"; +import { Modal } from "@/components/Modal"; const AddPromptSchema = Yup.object().shape({ title: Yup.string().required("Title is required"), @@ -15,7 +15,7 @@ const AddPromptSchema = Yup.object().shape({ const AddPromptModal = ({ onClose, onSubmit }: AddPromptModalProps) => { return ( - <ModalWrapper onClose={onClose} modalClassName="max-w-xl"> + <Modal onOutsideClick={onClose} width="w-full max-w-3xl"> <Formik initialValues={{ title: "", @@ -51,13 +51,18 @@ const AddPromptModal = ({ onClose, onSubmit }: AddPromptModalProps) => { placeholder="Enter a prompt (e.g. 'help me rewrite the following politely and concisely for professional communication')" /> - <Button type="submit" className="w-full" disabled={isSubmitting}> + <Button + type="submit" + className="w-full" + disabled={isSubmitting} + variant="submit" + > Add prompt </Button> </Form> )} </Formik> - </ModalWrapper> + </Modal> ); }; diff --git a/web/src/app/admin/prompt-library/modals/EditPromptModal.tsx b/web/src/app/admin/prompt-library/modals/EditPromptModal.tsx index a5b44da512d..c12006b07fc 100644 --- a/web/src/app/admin/prompt-library/modals/EditPromptModal.tsx +++ b/web/src/app/admin/prompt-library/modals/EditPromptModal.tsx @@ -1,10 +1,12 @@ import React from "react"; import { Formik, Form, Field, ErrorMessage } from "formik"; import * as Yup from "yup"; -import { ModalWrapper } from "@/components/modals/ModalWrapper"; -import { Button, Textarea, TextInput } from "@tremor/react"; +import { Modal } from "@/components/Modal"; +import { Textarea } from "@/components/ui/textarea"; +import { Button } from "@/components/ui/button"; import { useInputPrompt } from "../hooks"; import { EditPromptModalProps } from "../interfaces"; +import { Input } from "@/components/ui/input"; const EditPromptSchema = Yup.object().shape({ prompt: Yup.string().required("Title is required"), @@ -25,20 +27,20 @@ const EditPromptModal = ({ if (error) return ( - <ModalWrapper onClose={onClose} modalClassName="max-w-xl"> + <Modal onOutsideClick={onClose} width="max-w-xl"> <p>Failed to load prompt data</p> - </ModalWrapper> + </Modal> ); if (!promptData) return ( - <ModalWrapper onClose={onClose} modalClassName="max-w-xl"> + <Modal onOutsideClick={onClose} width="w-full max-w-xl"> <p>Loading...</p> - </ModalWrapper> + </Modal> ); return ( - <ModalWrapper onClose={onClose} modalClassName="max-w-xl"> + <Modal onOutsideClick={onClose} width="w-full max-w-xl"> <Formik initialValues={{ prompt: promptData.prompt, @@ -52,7 +54,7 @@ const EditPromptModal = ({ }} > {({ isSubmitting, values }) => ( - <Form> + <Form className="items-stretch"> <h2 className="text-2xl text-emphasis font-bold mb-3 flex items-center"> <svg className="w-6 h-6 mr-2" @@ -73,7 +75,7 @@ const EditPromptModal = ({ Title </label> <Field - as={TextInput} + as={Input} id="prompt" name="prompt" placeholder="Title (e.g. 'Draft email')" @@ -117,7 +119,6 @@ const EditPromptModal = ({ <div className="mt-6"> <Button type="submit" - className="w-full" disabled={ isSubmitting || (values.prompt === promptData.prompt && @@ -131,7 +132,7 @@ const EditPromptModal = ({ </Form> )} </Formik> - </ModalWrapper> + </Modal> ); }; diff --git a/web/src/app/admin/prompt-library/promptLibrary.tsx b/web/src/app/admin/prompt-library/promptLibrary.tsx index 8a046893dfb..c4f535e9b0d 100644 --- a/web/src/app/admin/prompt-library/promptLibrary.tsx +++ b/web/src/app/admin/prompt-library/promptLibrary.tsx @@ -8,15 +8,15 @@ import { Table, TableHead, TableRow, - TableHeaderCell, TableBody, TableCell, -} from "@tremor/react"; +} from "@/components/ui/table"; import { FilterDropdown } from "@/components/search/filtering/FilterDropdown"; import { FiTag } from "react-icons/fi"; import { PageSelector } from "@/components/PageSelector"; import { InputPrompt } from "./interfaces"; -import { Modal } from "@/components/Modal"; +import { DeleteEntityModal } from "@/components/modals/DeleteEntityModal"; +import { TableHeader } from "@/components/ui/table"; const CategoryBubble = ({ name, @@ -120,6 +120,7 @@ export const PromptLibraryTable = ({ setPopup({ message: "Failed to delete input prompt", type: "error" }); } refresh(); + setConfirmDeletionId(null); }; const handleStatusSelect = (status: string) => { @@ -138,35 +139,16 @@ export const PromptLibraryTable = ({ return ( <div className="justify-center py-2"> {confirmDeletionId != null && ( - <Modal - onOutsideClick={() => setConfirmDeletionId(null)} - className="max-w-sm" - > - <> - <p className="text-lg mb-2"> - Are you sure you want to delete this prompt? You will not be able - to recover this prompt - </p> - <div className="mt-6 flex justify-between"> - <button - className="rounded py-1.5 px-2 bg-background-800 text-text-200" - onClick={async () => { - await handleDelete(confirmDeletionId); - setConfirmDeletionId(null); - }} - > - Yes - </button> - <button - onClick={() => setConfirmDeletionId(null)} - className="rounded py-1.5 px-2 bg-background-150 text-text-800" - > - {" "} - No - </button> - </div> - </> - </Modal> + <DeleteEntityModal + onClose={() => setConfirmDeletionId(null)} + onSubmit={() => handleDelete(confirmDeletionId)} + entityType="prompt" + entityName={ + paginatedPromptLibrary.find( + (prompt) => prompt.id === confirmDeletionId + )?.prompt ?? "" + } + /> )} <div className="flex items-center w-full border-2 border-border rounded-lg px-4 py-2 focus-within:border-accent"> @@ -192,7 +174,7 @@ export const PromptLibraryTable = ({ icon={<FiTag size={16} />} defaultDisplay="All Statuses" /> - <div className="flex flex-wrap pb-4 mt-3"> + <div className="flex flex-col items-stretch w-full flex-wrap pb-4 mt-3"> {selectedStatus.map((status) => ( <CategoryBubble key={status} @@ -204,15 +186,13 @@ export const PromptLibraryTable = ({ </div> <div className="mx-auto overflow-x-auto"> <Table> - <TableHead> + <TableHeader> <TableRow> {columns.map((column) => ( - <TableHeaderCell key={column.key}> - {column.name} - </TableHeaderCell> + <TableHead key={column.key}>{column.name}</TableHead> ))} </TableRow> - </TableHead> + </TableHeader> <TableBody> {paginatedPromptLibrary.length > 0 ? ( paginatedPromptLibrary diff --git a/web/src/app/admin/prompt-library/promptSection.tsx b/web/src/app/admin/prompt-library/promptSection.tsx index f719ad500bd..015408ef0f0 100644 --- a/web/src/app/admin/prompt-library/promptSection.tsx +++ b/web/src/app/admin/prompt-library/promptSection.tsx @@ -3,7 +3,9 @@ import { usePopup } from "@/components/admin/connectors/Popup"; import { ThreeDotsLoader } from "@/components/Loading"; import { ErrorCallout } from "@/components/ErrorCallout"; -import { Button, Divider, Text } from "@tremor/react"; +import { Button } from "@/components/ui/button"; +import { Separator } from "@/components/ui/separator"; +import Text from "@/components/ui/text"; import { useState } from "react"; import AddPromptModal from "./modals/AddPromptModal"; import EditPromptModal from "./modals/EditPromptModal"; @@ -91,7 +93,9 @@ export const PromptSection = ({ return ( <div - className={`w-full ${centering ? "flex-col flex justify-center" : ""} mb-8`} + className={`w-full ${ + centering ? "flex-col flex justify-center" : "" + } mb-8`} > {popup} @@ -124,13 +128,13 @@ export const PromptSection = ({ <Button onClick={() => setNewPrompt(true)} className={centering ? "mx-auto" : ""} - color="green" - size="xs" + variant="navigate" + size="sm" > New Prompt </Button> - <Divider /> + <Separator /> <div> <PromptLibraryTable diff --git a/web/src/app/admin/settings/SettingsForm.tsx b/web/src/app/admin/settings/SettingsForm.tsx index 03a0171363e..6f8f7dee031 100644 --- a/web/src/app/admin/settings/SettingsForm.tsx +++ b/web/src/app/admin/settings/SettingsForm.tsx @@ -2,15 +2,14 @@ import { Label, SubLabel } from "@/components/admin/connectors/Field"; import { usePopup } from "@/components/admin/connectors/Popup"; -import { Title } from "@tremor/react"; +import Title from "@/components/ui/title"; +import { Button } from "@/components/ui/button"; import { Settings } from "./interfaces"; import { useRouter } from "next/navigation"; import { DefaultDropdown, Option } from "@/components/Dropdown"; -import { useContext } from "react"; +import React, { useContext, useState, useEffect } from "react"; import { SettingsContext } from "@/components/settings/SettingsProvider"; -import React, { useState, useEffect } from "react"; import { usePaidEnterpriseFeaturesEnabled } from "@/components/settings/usePaidEnterpriseFeaturesEnabled"; -import { Button } from "@tremor/react"; function Checkbox({ label, @@ -273,13 +272,17 @@ export function SettingsForm() { /> <Button onClick={handleSetChatRetention} - color="green" - size="xs" + variant="submit" + size="sm" className="mr-3" > Set Retention Limit </Button> - <Button onClick={handleClearChatRetention} color="blue" size="xs"> + <Button + onClick={handleClearChatRetention} + variant="default" + size="sm" + > Retain All </Button> </> diff --git a/web/src/app/admin/settings/interfaces.ts b/web/src/app/admin/settings/interfaces.ts index 8327d69d448..38959fc8cd2 100644 --- a/web/src/app/admin/settings/interfaces.ts +++ b/web/src/app/admin/settings/interfaces.ts @@ -1,3 +1,9 @@ +export enum GatingType { + FULL = "full", + PARTIAL = "partial", + NONE = "none", +} + export interface Settings { chat_page_enabled: boolean; search_page_enabled: boolean; @@ -6,14 +12,24 @@ export interface Settings { notifications: Notification[]; needs_reindexing: boolean; gpu_enabled: boolean; + product_gating: GatingType; +} + +export enum NotificationType { + PERSONA_SHARED = "persona_shared", + REINDEX_NEEDED = "reindex_needed", + TRIAL_ENDS_TWO_DAYS = "two_day_trial_ending", } export interface Notification { id: number; notif_type: string; + time_created: string; dismissed: boolean; - last_shown: string; - first_shown: string; + additional_data?: { + persona_id?: number; + [key: string]: any; + }; } export interface NavigationItem { diff --git a/web/src/app/admin/settings/page.tsx b/web/src/app/admin/settings/page.tsx index 9cb2f630e29..d8104fb568a 100644 --- a/web/src/app/admin/settings/page.tsx +++ b/web/src/app/admin/settings/page.tsx @@ -1,7 +1,7 @@ import { AdminPageTitle } from "@/components/admin/Title"; import { SettingsForm } from "./SettingsForm"; -import { Text } from "@tremor/react"; +import Text from "@/components/ui/text"; import { SettingsIcon } from "@/components/icons/icons"; export default async function Page() { diff --git a/web/src/app/admin/token-rate-limits/CreateRateLimitModal.tsx b/web/src/app/admin/token-rate-limits/CreateRateLimitModal.tsx index 085114a836b..16779b552ab 100644 --- a/web/src/app/admin/token-rate-limits/CreateRateLimitModal.tsx +++ b/web/src/app/admin/token-rate-limits/CreateRateLimitModal.tsx @@ -1,7 +1,7 @@ "use client"; import * as Yup from "yup"; -import { Button } from "@tremor/react"; +import { Button } from "@/components/ui/button"; import { useEffect, useState } from "react"; import { Modal } from "@/components/Modal"; import { Form, Formik } from "formik"; @@ -72,7 +72,7 @@ export const CreateRateLimitModal = ({ <Modal title={"Create a Token Rate Limit"} onOutsideClick={() => setIsOpen(false)} - width="w-2/6" + width="max-w-2xl w-full" > <Formik initialValues={{ @@ -116,7 +116,7 @@ export const CreateRateLimitModal = ({ }} > {({ isSubmitting, values, setFieldValue }) => ( - <Form> + <Form className="overflow-visible px-2"> {!forSpecificScope && ( <SelectorFormField name="target_scope" @@ -158,8 +158,8 @@ export const CreateRateLimitModal = ({ /> <Button type="submit" - size="xs" - color="green" + variant="submit" + size="sm" disabled={isSubmitting} > Create! diff --git a/web/src/app/admin/token-rate-limits/TokenRateLimitTables.tsx b/web/src/app/admin/token-rate-limits/TokenRateLimitTables.tsx index ec5fdc60028..5607eb5fce6 100644 --- a/web/src/app/admin/token-rate-limits/TokenRateLimitTables.tsx +++ b/web/src/app/admin/token-rate-limits/TokenRateLimitTables.tsx @@ -4,12 +4,10 @@ import { Table, TableHead, TableRow, - TableHeaderCell, TableBody, TableCell, - Title, - Text, -} from "@tremor/react"; +} from "@/components/ui/table"; +import Title from "@/components/ui/title"; import { DeleteButton } from "@/components/DeleteButton"; import { deleteTokenRateLimit, updateTokenRateLimit } from "./lib"; import { ThreeDotsLoader } from "@/components/Loading"; @@ -17,6 +15,8 @@ import { TokenRateLimitDisplay } from "./types"; import { errorHandlingFetcher } from "@/lib/fetcher"; import useSWR, { mutate } from "swr"; import { CustomCheckbox } from "@/components/CustomCheckbox"; +import { TableHeader } from "@/components/ui/table"; +import Text from "@/components/ui/text"; type TokenRateLimitTableArgs = { tokenRateLimits: TokenRateLimitDisplay[]; @@ -82,19 +82,19 @@ export const TokenRateLimitTable = ({ <Text className="my-2">{description}</Text> )} <Table - className={`overflow-visible ${!hideHeading && "my-8"} [&_td]:text-center [&_th]:text-center`} + className={`overflow-visible ${ + !hideHeading && "my-8" + } [&_td]:text-center [&_th]:text-center`} > - <TableHead> + <TableHeader> <TableRow> - <TableHeaderCell>Enabled</TableHeaderCell> - {shouldRenderGroupName() && ( - <TableHeaderCell>Group Name</TableHeaderCell> - )} - <TableHeaderCell>Time Window (Hours)</TableHeaderCell> - <TableHeaderCell>Token Budget (Thousands)</TableHeaderCell> - {isAdmin && <TableHeaderCell>Delete</TableHeaderCell>} + <TableHead>Enabled</TableHead> + {shouldRenderGroupName() && <TableHead>Group Name</TableHead>} + <TableHead>Time Window (Hours)</TableHead> + <TableHead>Token Budget (Thousands)</TableHead> + {isAdmin && <TableHead>Delete</TableHead>} </TableRow> - </TableHead> + </TableHeader> <TableBody> {tokenRateLimits.map((tokenRateLimit) => { return ( diff --git a/web/src/app/admin/token-rate-limits/page.tsx b/web/src/app/admin/token-rate-limits/page.tsx index fb4b711a2e0..4afc43376e3 100644 --- a/web/src/app/admin/token-rate-limits/page.tsx +++ b/web/src/app/admin/token-rate-limits/page.tsx @@ -1,17 +1,11 @@ "use client"; import { AdminPageTitle } from "@/components/admin/Title"; -import { - Button, - Tab, - TabGroup, - TabList, - TabPanel, - TabPanels, - Text, -} from "@tremor/react"; +import { Button } from "@/components/ui/button"; +import { Tabs, TabsList, TabsTrigger, TabsContent } from "@/components/ui/tabs"; +import Text from "@/components/ui/text"; import { useState } from "react"; -import { FiGlobe, FiShield, FiUser, FiUsers } from "react-icons/fi"; +import { FiGlobe, FiUser, FiUsers } from "react-icons/fi"; import { insertGlobalTokenRateLimit, insertGroupTokenRateLimit, @@ -145,53 +139,63 @@ function Main() { </ul> <Button - color="green" - size="xs" - className="mt-3" + variant="navigate" + size="sm" + className="my-4" onClick={() => setModalIsOpen(true)} > Create a Token Rate Limit </Button> {isPaidEnterpriseFeaturesEnabled && ( - <TabGroup className="mt-6" index={tabIndex} onIndexChange={setTabIndex}> - <TabList variant="line"> - <Tab icon={FiGlobe}>Global</Tab> - <Tab icon={FiUser}>User</Tab> - <Tab icon={FiUsers}>User Groups</Tab> - </TabList> - <TabPanels className="mt-6"> - <TabPanel> - <GenericTokenRateLimitTable - fetchUrl={GLOBAL_TOKEN_FETCH_URL} - title={"Global Token Rate Limits"} - description={GLOBAL_DESCRIPTION} - /> - </TabPanel> - <TabPanel> - <GenericTokenRateLimitTable - fetchUrl={USER_TOKEN_FETCH_URL} - title={"User Token Rate Limits"} - description={USER_DESCRIPTION} - /> - </TabPanel> - <TabPanel> - <GenericTokenRateLimitTable - fetchUrl={USER_GROUP_FETCH_URL} - title={"User Group Token Rate Limits"} - description={USER_GROUP_DESCRIPTION} - responseMapper={(data: Record<string, TokenRateLimit[]>) => - Object.entries(data).flatMap(([group_name, elements]) => - elements.map((element) => ({ - ...element, - group_name, - })) - ) - } - /> - </TabPanel> - </TabPanels> - </TabGroup> + <Tabs + value={tabIndex.toString()} + onValueChange={(val) => setTabIndex(parseInt(val))} + > + <TabsList> + <TabsTrigger value="0" className="flex items-center gap-2"> + <FiGlobe /> + Global + </TabsTrigger> + <TabsTrigger value="1" className="flex items-center gap-2"> + <FiUser /> + User + </TabsTrigger> + <TabsTrigger value="2" className="flex items-center gap-2"> + <FiUsers /> + User Groups + </TabsTrigger> + </TabsList> + <TabsContent value="0"> + <GenericTokenRateLimitTable + fetchUrl={GLOBAL_TOKEN_FETCH_URL} + title={"Global Token Rate Limits"} + description={GLOBAL_DESCRIPTION} + /> + </TabsContent> + <TabsContent value="1"> + <GenericTokenRateLimitTable + fetchUrl={USER_TOKEN_FETCH_URL} + title={"User Token Rate Limits"} + description={USER_DESCRIPTION} + /> + </TabsContent> + <TabsContent value="2"> + <GenericTokenRateLimitTable + fetchUrl={USER_GROUP_FETCH_URL} + title={"User Group Token Rate Limits"} + description={USER_GROUP_DESCRIPTION} + responseMapper={(data: Record<string, TokenRateLimit[]>) => + Object.entries(data).flatMap(([group_name, elements]) => + elements.map((element) => ({ + ...element, + group_name, + })) + ) + } + /> + </TabsContent> + </Tabs> )} {!isPaidEnterpriseFeaturesEnabled && ( diff --git a/web/src/app/admin/tools/ToolEditor.tsx b/web/src/app/admin/tools/ToolEditor.tsx index b4df98f8623..354701d4c8a 100644 --- a/web/src/app/admin/tools/ToolEditor.tsx +++ b/web/src/app/admin/tools/ToolEditor.tsx @@ -13,7 +13,7 @@ import { import * as Yup from "yup"; import { MethodSpec, ToolSnapshot } from "@/lib/tools/interfaces"; import { TextFormField } from "@/components/admin/connectors/Field"; -import { Button, Divider, Text } from "@tremor/react"; +import { Button } from "@/components/ui/button"; import { createCustomTool, updateCustomTool, @@ -23,6 +23,7 @@ import { usePopup } from "@/components/admin/connectors/Popup"; import debounce from "lodash/debounce"; import { AdvancedOptionsToggle } from "@/components/AdvancedOptionsToggle"; import Link from "next/link"; +import { Separator } from "@/components/ui/separator"; function parseJsonWithTrailingCommas(jsonString: string) { // Regular expression to remove trailing commas before } or ] @@ -64,28 +65,31 @@ function ToolForm({ const [definitionError, setDefinitionError] = definitionErrorState; const [methodSpecs, setMethodSpecs] = methodSpecsState; const [showAdvancedOptions, setShowAdvancedOptions] = useState(false); - const debouncedValidateDefinition = useCallback( - debounce(async (definition: string) => { - try { - const parsedDefinition = parseJsonWithTrailingCommas(definition); - const response = await validateToolDefinition({ - definition: parsedDefinition, - }); - if (response.error) { + (definition: string) => { + const validateDefinition = async () => { + try { + const parsedDefinition = parseJsonWithTrailingCommas(definition); + const response = await validateToolDefinition({ + definition: parsedDefinition, + }); + if (response.error) { + setMethodSpecs(null); + setDefinitionError(response.error); + } else { + setMethodSpecs(response.data); + setDefinitionError(null); + } + } catch (error) { + console.log(error); setMethodSpecs(null); - setDefinitionError(response.error); - } else { - setMethodSpecs(response.data); - setDefinitionError(null); + setDefinitionError("Invalid JSON format"); } - } catch (error) { - console.log(error); - setMethodSpecs(null); - setDefinitionError("Invalid JSON format"); - } - }, 300), - [] + }; + + debounce(validateDefinition, 300)(); + }, + [setMethodSpecs, setDefinitionError] ); useEffect(() => { @@ -241,7 +245,7 @@ function ToolForm({ <Button type="button" onClick={() => arrayHelpers.remove(index)} - color="red" + variant="destructive" size="sm" className="transition-colors duration-200 hover:bg-red-600" > @@ -256,8 +260,8 @@ function ToolForm({ <Button type="button" onClick={() => arrayHelpers.push({ key: "", value: "" })} - color="blue" - size="md" + variant="secondary" + size="sm" className="transition-colors duration-200" > Add New Header @@ -268,13 +272,13 @@ function ToolForm({ </div> )} - <Divider /> + <Separator /> <div className="flex"> <Button className="mx-auto" - color="green" - size="md" + variant="submit" + size="sm" type="submit" disabled={isSubmitting || !!definitionError} > @@ -318,10 +322,11 @@ export function ToolEditor({ tool }: { tool?: ToolSnapshot }) { <Formik initialValues={{ definition: prettifiedDefinition, - customHeaders: tool?.custom_headers?.map((header) => ({ - key: header.key, - value: header.value, - })) ?? [{ key: "test", value: "value" }], + customHeaders: + tool?.custom_headers?.map((header) => ({ + key: header.key, + value: header.value, + })) ?? [], }} validationSchema={ToolSchema} onSubmit={async (values: ToolFormValues) => { diff --git a/web/src/app/admin/tools/ToolsTable.tsx b/web/src/app/admin/tools/ToolsTable.tsx index 88b91eddafa..30bb8b34f60 100644 --- a/web/src/app/admin/tools/ToolsTable.tsx +++ b/web/src/app/admin/tools/ToolsTable.tsx @@ -1,20 +1,19 @@ "use client"; import { - Text, Table, TableHead, TableRow, - TableHeaderCell, TableBody, TableCell, -} from "@tremor/react"; +} from "@/components/ui/table"; import { ToolSnapshot } from "@/lib/tools/interfaces"; import { useRouter } from "next/navigation"; import { usePopup } from "@/components/admin/connectors/Popup"; import { FiCheckCircle, FiEdit2, FiXCircle } from "react-icons/fi"; import { TrashIcon } from "@/components/icons/icons"; import { deleteCustomTool } from "@/lib/tools/edit"; +import { TableHeader } from "@/components/ui/table"; export function ToolsTable({ tools }: { tools: ToolSnapshot[] }) { const router = useRouter(); @@ -28,14 +27,14 @@ export function ToolsTable({ tools }: { tools: ToolSnapshot[] }) { {popup} <Table> - <TableHead> + <TableHeader> <TableRow> - <TableHeaderCell>Name</TableHeaderCell> - <TableHeaderCell>Description</TableHeaderCell> - <TableHeaderCell>Built In?</TableHeaderCell> - <TableHeaderCell>Delete</TableHeaderCell> + <TableHead>Name</TableHead> + <TableHead>Description</TableHead> + <TableHead>Built In?</TableHead> + <TableHead>Delete</TableHead> </TableRow> - </TableHead> + </TableHeader> <TableBody> {sortedTools.map((tool) => ( <TableRow key={tool.id.toString()}> diff --git a/web/src/app/admin/tools/edit/[toolId]/DeleteToolButton.tsx b/web/src/app/admin/tools/edit/[toolId]/DeleteToolButton.tsx index c02e141b54a..1c1c9528304 100644 --- a/web/src/app/admin/tools/edit/[toolId]/DeleteToolButton.tsx +++ b/web/src/app/admin/tools/edit/[toolId]/DeleteToolButton.tsx @@ -1,6 +1,6 @@ "use client"; -import { Button } from "@tremor/react"; +import { Button } from "@/components/ui/button"; import { FiTrash } from "react-icons/fi"; import { deleteCustomTool } from "@/lib/tools/edit"; import { useRouter } from "next/navigation"; @@ -10,8 +10,8 @@ export function DeleteToolButton({ toolId }: { toolId: number }) { return ( <Button - size="xs" - color="red" + variant="destructive" + size="sm" onClick={async () => { const response = await deleteCustomTool(toolId); if (response.data) { diff --git a/web/src/app/admin/tools/edit/[toolId]/page.tsx b/web/src/app/admin/tools/edit/[toolId]/page.tsx index 8ae1e908a2d..c88ca3862e6 100644 --- a/web/src/app/admin/tools/edit/[toolId]/page.tsx +++ b/web/src/app/admin/tools/edit/[toolId]/page.tsx @@ -1,14 +1,18 @@ import { ErrorCallout } from "@/components/ErrorCallout"; -import { Card, Text, Title } from "@tremor/react"; +import Text from "@/components/ui/text"; +import Title from "@/components/ui/title"; +import CardSection from "@/components/admin/CardSection"; import { ToolEditor } from "@/app/admin/tools/ToolEditor"; import { fetchToolByIdSS } from "@/lib/tools/fetchTools"; import { DeleteToolButton } from "./DeleteToolButton"; -import { FiTool } from "react-icons/fi"; import { AdminPageTitle } from "@/components/admin/Title"; import { BackButton } from "@/components/BackButton"; import { ToolIcon } from "@/components/icons/icons"; -export default async function Page({ params }: { params: { toolId: string } }) { +export default async function Page(props: { + params: Promise<{ toolId: string }>; +}) { + const params = await props.params; const tool = await fetchToolByIdSS(params.toolId); let body; @@ -26,9 +30,9 @@ export default async function Page({ params }: { params: { toolId: string } }) { <div className="w-full my-8"> <div> <div> - <Card> + <CardSection> <ToolEditor tool={tool} /> - </Card> + </CardSection> <Title className="mt-12">Delete Tool Click the button below to permanently delete this tool. diff --git a/web/src/app/admin/tools/new/page.tsx b/web/src/app/admin/tools/new/page.tsx index efff155be58..9146564e698 100644 --- a/web/src/app/admin/tools/new/page.tsx +++ b/web/src/app/admin/tools/new/page.tsx @@ -4,8 +4,7 @@ import { ToolEditor } from "@/app/admin/tools/ToolEditor"; import { BackButton } from "@/components/BackButton"; import { AdminPageTitle } from "@/components/admin/Title"; import { ToolIcon } from "@/components/icons/icons"; -import { Card } from "@tremor/react"; -import { FiTool } from "react-icons/fi"; +import CardSection from "@/components/admin/CardSection"; export default function NewToolPage() { return ( @@ -17,9 +16,9 @@ export default function NewToolPage() { icon={} /> - + - +
); } diff --git a/web/src/app/admin/tools/page.tsx b/web/src/app/admin/tools/page.tsx index 543f89ac367..6ccb2480163 100644 --- a/web/src/app/admin/tools/page.tsx +++ b/web/src/app/admin/tools/page.tsx @@ -1,8 +1,10 @@ import { ToolsTable } from "./ToolsTable"; import { ToolSnapshot } from "@/lib/tools/interfaces"; -import { FiPlusSquare, FiTool } from "react-icons/fi"; +import { FiPlusSquare } from "react-icons/fi"; import Link from "next/link"; -import { Divider, Text, Title } from "@tremor/react"; +import { Separator } from "@/components/ui/separator"; +import Text from "@/components/ui/text"; +import Title from "@/components/ui/title"; import { fetchSS } from "@/lib/utilsSS"; import { ErrorCallout } from "@/components/ErrorCallout"; import { AdminPageTitle } from "@/components/admin/Title"; @@ -34,7 +36,7 @@ export default async function Page() {
- + Create a Tool - + Existing Tools diff --git a/web/src/app/admin/users/page.tsx b/web/src/app/admin/users/page.tsx index bf86add743d..bc8efd232b3 100644 --- a/web/src/app/admin/users/page.tsx +++ b/web/src/app/admin/users/page.tsx @@ -6,13 +6,13 @@ import { useState } from "react"; import { FiPlusSquare } from "react-icons/fi"; import { Modal } from "@/components/Modal"; -import { Button, Text } from "@tremor/react"; +import { Button } from "@/components/ui/button"; +import Text from "@/components/ui/text"; import { LoadingAnimation } from "@/components/Loading"; import { AdminPageTitle } from "@/components/admin/Title"; import { usePopup, PopupSpec } from "@/components/admin/connectors/Popup"; import { UsersIcon } from "@/components/icons/icons"; import { errorHandlingFetcher } from "@/lib/fetcher"; -import { type User, UserStatus } from "@/lib/types"; import useSWR, { mutate } from "swr"; import { ErrorCallout } from "@/components/ErrorCallout"; import { HidableSection } from "@/app/admin/assistants/HidableSection"; diff --git a/web/src/app/api/[...path]/route.ts b/web/src/app/api/[...path]/route.ts index 6ca13aba146..0ebd8210a81 100644 --- a/web/src/app/api/[...path]/route.ts +++ b/web/src/app/api/[...path]/route.ts @@ -6,50 +6,57 @@ each request type >:( */ export async function GET( request: NextRequest, - { params }: { params: { path: string[] } } + props: { params: Promise<{ path: string[] }> } ) { + const params = await props.params; return handleRequest(request, params.path); } export async function POST( request: NextRequest, - { params }: { params: { path: string[] } } + props: { params: Promise<{ path: string[] }> } ) { + const params = await props.params; return handleRequest(request, params.path); } export async function PUT( request: NextRequest, - { params }: { params: { path: string[] } } + props: { params: Promise<{ path: string[] }> } ) { + const params = await props.params; return handleRequest(request, params.path); } export async function PATCH( request: NextRequest, - { params }: { params: { path: string[] } } + props: { params: Promise<{ path: string[] }> } ) { + const params = await props.params; return handleRequest(request, params.path); } export async function DELETE( request: NextRequest, - { params }: { params: { path: string[] } } + props: { params: Promise<{ path: string[] }> } ) { + const params = await props.params; return handleRequest(request, params.path); } export async function HEAD( request: NextRequest, - { params }: { params: { path: string[] } } + props: { params: Promise<{ path: string[] }> } ) { + const params = await props.params; return handleRequest(request, params.path); } export async function OPTIONS( request: NextRequest, - { params }: { params: { path: string[] } } + props: { params: Promise<{ path: string[] }> } ) { + const params = await props.params; return handleRequest(request, params.path); } diff --git a/web/src/app/assistants/SidebarWrapper.tsx b/web/src/app/assistants/SidebarWrapper.tsx index 2feae589240..a5a1cdb4148 100644 --- a/web/src/app/assistants/SidebarWrapper.tsx +++ b/web/src/app/assistants/SidebarWrapper.tsx @@ -6,7 +6,14 @@ import { Folder } from "@/app/chat/folders/interfaces"; import { User } from "@/lib/types"; import Cookies from "js-cookie"; import { SIDEBAR_TOGGLED_COOKIE_NAME } from "@/components/resizable/constants"; -import { ReactNode, useContext, useEffect, useRef, useState } from "react"; +import { + ReactNode, + useCallback, + useContext, + useEffect, + useRef, + useState, +} from "react"; import { useSidebarVisibility } from "@/components/chat_search/hooks"; import FunctionalHeader from "@/components/chat_search/Header"; import { useRouter } from "next/navigation"; @@ -19,14 +26,9 @@ interface SidebarWrapperProps { folders?: Folder[]; initiallyToggled: boolean; openedFolders?: { [key: number]: boolean }; - content: (props: T) => ReactNode; - headerProps: { - page: pageType; - user: User | null; - }; - contentProps: T; page: pageType; size?: "sm" | "lg"; + children: ReactNode; } export default function SidebarWrapper({ @@ -35,10 +37,8 @@ export default function SidebarWrapper({ folders, openedFolders, page, - headerProps, - contentProps, - content, size = "sm", + children, }: SidebarWrapperProps) { const [toggledSidebar, setToggledSidebar] = useState(initiallyToggled); const [showDocSidebar, setShowDocSidebar] = useState(false); // State to track if sidebar is open @@ -54,7 +54,7 @@ export default function SidebarWrapper({ }, 200); }; - const toggleSidebar = () => { + const toggleSidebar = useCallback(() => { Cookies.set( SIDEBAR_TOGGLED_COOKIE_NAME, String(!toggledSidebar).toLocaleLowerCase() @@ -63,7 +63,7 @@ export default function SidebarWrapper({ path: "/", }; setToggledSidebar((toggledSidebar) => !toggledSidebar); - }; + }, [toggledSidebar]); const sidebarElementRef = useRef(null); @@ -137,7 +137,6 @@ export default function SidebarWrapper({ sidebarToggled={toggledSidebar} toggleSidebar={toggleSidebar} page="assistants" - user={headerProps.user} />
({ />
- {content(contentProps)} + {children}
- +
); } diff --git a/web/src/app/assistants/edit/[id]/page.tsx b/web/src/app/assistants/edit/[id]/page.tsx index caf1d0cbf14..e8dedb80a8e 100644 --- a/web/src/app/assistants/edit/[id]/page.tsx +++ b/web/src/app/assistants/edit/[id]/page.tsx @@ -1,13 +1,16 @@ import { ErrorCallout } from "@/components/ErrorCallout"; -import { Card, Text, Title } from "@tremor/react"; +import Text from "@/components/ui/text"; +import CardSection from "@/components/admin/CardSection"; import { HeaderWrapper } from "@/components/header/HeaderWrapper"; import { AssistantEditor } from "@/app/admin/assistants/AssistantEditor"; import { SuccessfulPersonaUpdateRedirectType } from "@/app/admin/assistants/enums"; import { fetchAssistantEditorInfoSS } from "@/lib/assistants/fetchPersonaEditorInfoSS"; import { DeletePersonaButton } from "@/app/admin/assistants/[id]/DeletePersonaButton"; import { LargeBackButton } from "../../LargeBackButton"; +import Title from "@/components/ui/title"; -export default async function Page({ params }: { params: { id: string } }) { +export default async function Page(props: { params: Promise<{ id: string }> }) { + const params = await props.params; const [values, error] = await fetchAssistantEditorInfoSS(params.id); let body; @@ -22,14 +25,14 @@ export default async function Page({ params }: { params: { id: string } }) {
- + - + Delete Assistant Click the button below to permanently delete this assistant. diff --git a/web/src/app/assistants/gallery/AssistantsGallery.tsx b/web/src/app/assistants/gallery/AssistantsGallery.tsx index 8926238b454..cf5b81a9a28 100644 --- a/web/src/app/assistants/gallery/AssistantsGallery.tsx +++ b/web/src/app/assistants/gallery/AssistantsGallery.tsx @@ -3,8 +3,7 @@ import { Persona } from "@/app/admin/assistants/interfaces"; import { AssistantIcon } from "@/components/assistants/AssistantIcon"; import { User } from "@/lib/types"; -import { Button } from "@tremor/react"; -import Link from "next/link"; +import { Button } from "@/components/ui/button"; import { useState } from "react"; import { FiList, FiMinus, FiPlus } from "react-icons/fi"; import { AssistantsPageTitle } from "../AssistantsPageTitle"; @@ -16,6 +15,8 @@ import { PopupSpec, usePopup } from "@/components/admin/connectors/Popup"; import { useRouter } from "next/navigation"; import { AssistantTools } from "../ToolsDisplay"; import { classifyAssistants } from "@/lib/assistants/utils"; +import { useAssistants } from "@/components/context/AssistantsContext"; +import { useUser } from "@/components/user/UserProvider"; export function AssistantGalleryCard({ assistant, user, @@ -27,6 +28,7 @@ export function AssistantGalleryCard({ setPopup: (popup: PopupSpec) => void; selectedAssistant: boolean; }) { + const { refreshUser } = useUser(); const router = useRouter(); return (
Deselect @@ -109,7 +112,7 @@ export function AssistantGalleryCard({ message: `"${assistant.name}" has been added to your list.`, type: "success", }); - router.refresh(); + await refreshUser(); } else { setPopup({ message: `"${assistant.name}" could not be added to your list.`, @@ -117,8 +120,8 @@ export function AssistantGalleryCard({ }); } }} - size="xs" - color="green" + size="sm" + variant="submit" > Add @@ -137,14 +140,10 @@ export function AssistantGalleryCard({
); } -export function AssistantsGallery({ - assistants, - user, -}: { - assistants: Persona[]; +export function AssistantsGallery() { + const { assistants } = useAssistants(); + const { user } = useUser(); - user: User | null; -}) { const router = useRouter(); const [searchQuery, setSearchQuery] = useState(""); @@ -180,7 +179,8 @@ export function AssistantsGallery({
)}
+ {isUpdating && } ); } diff --git a/web/src/app/assistants/mine/AssistantsList.tsx b/web/src/app/assistants/mine/AssistantsList.tsx index a16c22d3ac8..0290973067e 100644 --- a/web/src/app/assistants/mine/AssistantsList.tsx +++ b/web/src/app/assistants/mine/AssistantsList.tsx @@ -1,15 +1,10 @@ "use client"; -import React, { - Dispatch, - ReactNode, - SetStateAction, - useEffect, - useState, -} from "react"; +import React, { Dispatch, SetStateAction, useEffect, useState } from "react"; import { MinimalUserSnapshot, User } from "@/lib/types"; import { Persona } from "@/app/admin/assistants/interfaces"; -import { Button, Divider, Text } from "@tremor/react"; +import { Button } from "@/components/ui/button"; +import { Separator } from "@/components/ui/separator"; import { FiEdit2, FiList, @@ -51,8 +46,8 @@ import { SortableContext, sortableKeyboardCoordinates, verticalListSortingStrategy, + useSortable, } from "@dnd-kit/sortable"; -import { useSortable } from "@dnd-kit/sortable"; import { DragHandle } from "@/components/table/DragHandle"; import { @@ -61,12 +56,9 @@ import { } from "@/app/admin/assistants/lib"; import { DeleteEntityModal } from "@/components/modals/DeleteEntityModal"; import { MakePublicAssistantModal } from "@/app/chat/modal/MakePublicAssistantModal"; -import { - classifyAssistants, - getUserCreatedAssistants, - orderAssistantsForUser, -} from "@/lib/assistants/utils"; import { CustomTooltip } from "@/components/tooltip/CustomTooltip"; +import { useAssistants } from "@/components/context/AssistantsContext"; +import { useUser } from "@/components/user/UserProvider"; function DraggableAssistantListItem(props: any) { const { @@ -118,6 +110,7 @@ function AssistantListItem({ setPopup: (popupSpec: PopupSpec | null) => void; isDragging?: boolean; }) { + const { refreshUser } = useUser(); const router = useRouter(); const [showSharingModal, setShowSharingModal] = useState(false); @@ -138,7 +131,9 @@ function AssistantListItem({ show={showSharingModal} />
@@ -212,7 +207,7 @@ function AssistantListItem({ message: `"${assistant.name}" has been removed from your list.`, type: "success", }); - router.refresh(); + await refreshUser(); } else { setPopup({ message: `"${assistant.name}" could not be removed from your list.`, @@ -235,7 +230,7 @@ function AssistantListItem({ message: `"${assistant.name}" has been added to your list.`, type: "success", }); - router.refresh(); + await refreshUser(); } else { setPopup({ message: `"${assistant.name}" could not be added to your list.`, @@ -290,32 +285,20 @@ function AssistantListItem({ ); } -export function AssistantsList({ - user, - assistants, -}: { - user: User | null; - assistants: Persona[]; -}) { - // Define the distinct groups of assistants - const { visibleAssistants, hiddenAssistants } = classifyAssistants( - user, - assistants - ); +export function AssistantsList() { + const { + assistants, + ownedButHiddenAssistants, + finalAssistants, + refreshAssistants, + } = useAssistants(); - const [currentlyVisibleAssistants, setCurrentlyVisibleAssistants] = useState< - Persona[] - >([]); + const [currentlyVisibleAssistants, setCurrentlyVisibleAssistants] = + useState(finalAssistants); useEffect(() => { - const orderedAssistants = orderAssistantsForUser(visibleAssistants, user); - setCurrentlyVisibleAssistants(orderedAssistants); - }, [assistants, user]); - - const ownedButHiddenAssistants = getUserCreatedAssistants( - user, - hiddenAssistants - ); + setCurrentlyVisibleAssistants(finalAssistants); + }, [finalAssistants]); const allAssistantIds = assistants.map((assistant) => assistant.id.toString() @@ -326,6 +309,8 @@ export function AssistantsList({ null ); + const { refreshUser, user } = useUser(); + const { popup, setPopup } = usePopup(); const router = useRouter(); const { data: users } = useSWR( @@ -344,18 +329,22 @@ export function AssistantsList({ const { active, over } = event; if (over && active.id !== over.id) { - setCurrentlyVisibleAssistants((assistants) => { - const oldIndex = assistants.findIndex( - (a) => a.id.toString() === active.id - ); - const newIndex = assistants.findIndex( - (a) => a.id.toString() === over.id - ); - const newAssistants = arrayMove(assistants, oldIndex, newIndex); - - updateUserAssistantList(newAssistants.map((a) => a.id)); - return newAssistants; - }); + const oldIndex = currentlyVisibleAssistants.findIndex( + (item) => item.id.toString() === active.id + ); + const newIndex = currentlyVisibleAssistants.findIndex( + (item) => item.id.toString() === over.id + ); + const updatedAssistants = arrayMove( + currentlyVisibleAssistants, + oldIndex, + newIndex + ); + + setCurrentlyVisibleAssistants(updatedAssistants); + await updateUserAssistantList(updatedAssistants.map((a) => a.id)); + await refreshUser(); + await refreshAssistants(); } } @@ -374,7 +363,7 @@ export function AssistantsList({ message: `"${deletingPersona.name}" has been deleted.`, type: "success", }); - router.refresh(); + await refreshUser(); } else { setPopup({ message: `"${deletingPersona.name}" could not be deleted.`, @@ -395,7 +384,7 @@ export function AssistantsList({ makePublicPersona.id, newPublicStatus ); - router.refresh(); + await refreshAssistants(); }} /> )} @@ -405,8 +394,9 @@ export function AssistantsList({
+ +
+

+ Have an account with a different email?{" "} + + Sign in + +

+
+ + ); +}; + +export default Page; diff --git a/web/src/app/auth/error/page.tsx b/web/src/app/auth/error/page.tsx index 4f288cd205f..3bee4984161 100644 --- a/web/src/app/auth/error/page.tsx +++ b/web/src/app/auth/error/page.tsx @@ -1,21 +1,49 @@ "use client"; -import { Button } from "@tremor/react"; +import AuthFlowContainer from "@/components/auth/AuthFlowContainer"; +import { Button } from "@/components/ui/button"; import Link from "next/link"; import { FiLogIn } from "react-icons/fi"; const Page = () => { return ( -
-
- Unable to login, please try again and/or contact an administrator. + +
+

+ Authentication Error +

+

+ We encountered an issue while attempting to log you in. +

+
+

Possible Issues:

+
    +
  • +
    + Incorrect or expired login credentials +
  • +
  • +
    + Temporary authentication system disruption +
  • +
  • +
    + Account access restrictions or permissions +
  • +
+
+ + + + +

+ We recommend trying again. If you continue to experience problems, + please reach out to your system administrator for assistance. +

- - - -
+ ); }; diff --git a/web/src/app/auth/impersonate/page.tsx b/web/src/app/auth/impersonate/page.tsx new file mode 100644 index 00000000000..1a2c77d2cdb --- /dev/null +++ b/web/src/app/auth/impersonate/page.tsx @@ -0,0 +1,132 @@ +"use client"; +import AuthFlowContainer from "@/components/auth/AuthFlowContainer"; +import { HealthCheckBanner } from "@/components/health/healthcheck"; +import { useUser } from "@/components/user/UserProvider"; +import { redirect, useRouter } from "next/navigation"; +import { Formik, Form, Field } from "formik"; +import * as Yup from "yup"; +import { usePopup } from "@/components/admin/connectors/Popup"; + +const ImpersonateSchema = Yup.object().shape({ + email: Yup.string().email("Invalid email").required("Required"), + apiKey: Yup.string().required("Required"), +}); + +export default function ImpersonatePage() { + const router = useRouter(); + const { user, isLoadingUser, isCloudSuperuser } = useUser(); + const { popup, setPopup } = usePopup(); + + if (isLoadingUser) { + return null; + } + + if (!user) { + redirect("/auth/login"); + } + + if (!isCloudSuperuser) { + redirect("/search"); + } + + const handleImpersonate = async (values: { + email: string; + apiKey: string; + }) => { + try { + const response = await fetch("/api/tenants/impersonate", { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: `Bearer ${values.apiKey}`, + }, + body: JSON.stringify({ email: values.email }), + credentials: "same-origin", + }); + + if (!response.ok) { + const errorData = await response.json(); + setPopup({ + message: errorData.detail || "Failed to impersonate user", + type: "error", + }); + } else { + router.push("/search"); + } + } catch (error) { + setPopup({ + message: + error instanceof Error ? error.message : "Failed to impersonate user", + type: "error", + }); + } + }; + + return ( + + {popup} +
+ +
+ +
+

+ Impersonate User +

+ + + {({ errors, touched }) => ( + +
+ +
+ {errors.email && touched.email && ( +
+ {errors.email} +
+ )} +
+
+ +
+ +
+ {errors.apiKey && touched.apiKey && ( +
+ {errors.apiKey} +
+ )} +
+
+ + + + )} +
+ +
+ Note: This feature is only available for @danswer.ai administrators +
+
+
+ ); +} diff --git a/web/src/app/auth/login/EmailPasswordForm.tsx b/web/src/app/auth/login/EmailPasswordForm.tsx index 6862baa600c..334c74d14f7 100644 --- a/web/src/app/auth/login/EmailPasswordForm.tsx +++ b/web/src/app/auth/login/EmailPasswordForm.tsx @@ -3,7 +3,7 @@ import { TextFormField } from "@/components/admin/connectors/Field"; import { usePopup } from "@/components/admin/connectors/Popup"; import { basicLogin, basicSignup } from "@/lib/user"; -import { Button } from "@tremor/react"; +import { Button } from "@/components/ui/button"; import { Form, Formik } from "formik"; import { useRouter } from "next/navigation"; import * as Yup from "yup"; diff --git a/web/src/app/auth/login/LoginText.tsx b/web/src/app/auth/login/LoginText.tsx index b465ad33530..7b5eb97fb8e 100644 --- a/web/src/app/auth/login/LoginText.tsx +++ b/web/src/app/auth/login/LoginText.tsx @@ -1,17 +1,15 @@ "use client"; -import React from "react"; -import { useContext } from "react"; +import React, { useContext } from "react"; import { SettingsContext } from "@/components/settings/SettingsProvider"; export const LoginText = () => { const settings = useContext(SettingsContext); - - if (!settings) { - throw new Error("SettingsContext is not available"); - } - return ( - <>Log In to {settings?.enterpriseSettings?.application_name || "Danswer"} + <> + Log In to{" "} + {(settings && settings?.enterpriseSettings?.application_name) || + "Danswer"} + ); }; diff --git a/web/src/app/auth/login/SignInButton.tsx b/web/src/app/auth/login/SignInButton.tsx index 9d04321e80a..128f5790c6e 100644 --- a/web/src/app/auth/login/SignInButton.tsx +++ b/web/src/app/auth/login/SignInButton.tsx @@ -9,7 +9,7 @@ export function SignInButton({ authType: AuthType; }) { let button; - if (authType === "google_oauth") { + if (authType === "google_oauth" || authType === "cloud") { button = (
@@ -42,7 +42,7 @@ export function SignInButton({ return ( {button} diff --git a/web/src/app/auth/login/page.tsx b/web/src/app/auth/login/page.tsx index 623c2e06a7e..ee3f17555f9 100644 --- a/web/src/app/auth/login/page.tsx +++ b/web/src/app/auth/login/page.tsx @@ -9,19 +9,20 @@ import { import { redirect } from "next/navigation"; import { SignInButton } from "./SignInButton"; import { EmailPasswordForm } from "./EmailPasswordForm"; -import { Card, Title, Text } from "@tremor/react"; +import Title from "@/components/ui/title"; +import Text from "@/components/ui/text"; import Link from "next/link"; -import { Logo } from "@/components/Logo"; import { LoginText } from "./LoginText"; import { getSecondsUntilExpiration } from "@/lib/time"; import { headers } from 'next/headers'; import { HeaderLoginLoading } from "./HeaderLogin"; +import AuthFlowContainer from "@/components/auth/AuthFlowContainer"; +import CardSection from "@/components/admin/CardSection"; -const Page = async ({ - searchParams, -}: { - searchParams?: { [key: string]: string | string[] | undefined }; +const Page = async (props: { + searchParams?: Promise<{ [key: string]: string | string[] | undefined }>; }) => { + const searchParams = await props.searchParams; const autoRedirectDisabled = searchParams?.disableAutoRedirect === "true"; // catch cases where the backend is completely unreachable here @@ -38,6 +39,10 @@ const Page = async ({ console.log(`Some fetch failed for the login page - ${e}`); } + const nextUrl = Array.isArray(searchParams?.next) + ? searchParams?.next[0] + : searchParams?.next || null; + // simply take the user to the home page if Auth is disabled if (authTypeMetadata?.authType === "disabled") { return redirect("/"); @@ -53,7 +58,6 @@ const Page = async ({ if (authTypeMetadata?.requiresVerification && !currentUser.is_verified) { return redirect("/auth/waiting-on-verification"); } - return redirect("/"); } @@ -61,7 +65,7 @@ const Page = async ({ let authUrl: string | null = null; if (authTypeMetadata) { try { - authUrl = await getAuthUrlSS(authTypeMetadata.authType); + authUrl = await getAuthUrlSS(authTypeMetadata.authType, nextUrl!); } catch (e) { console.log(`Some fetch failed for the login page - ${e}`); } @@ -71,53 +75,71 @@ const Page = async ({ return redirect(authUrl); } - const userHeader = headers().get('x-remote-user'); - const groupsHeader = headers().get('x-remote-group'); + const userHeader = (await headers()).get('x-remote-user'); + const groupsHeader = (await headers()).get('x-remote-group'); return ( -
+
-
-
- - {authUrl && authTypeMetadata && ( - <> -

- -

- - - )} - {/* TODO: Make header login it's own auth type */} - {authTypeMetadata?.authType === "basic" && ( - (userHeader && groupsHeader) ? - : ( - -
- - <LoginText /> - -
- -
- - Don't have an account?{" "} - - Create an account - - -
-
- ))} -
+
+ {authUrl && authTypeMetadata && ( + <> +

+ +

+ + + + )} + + {authTypeMetadata?.authType === "cloud" && ( +
+
+
+ or +
+
+ + +
+ + Don't have an account?{" "} + + Create an account + + +
+
+ )} + + {authTypeMetadata?.authType === "basic" && ( + (userHeader && groupsHeader) ? + : ( + +
+ + <LoginText /> + +
+ +
+ + Don't have an account?{" "} + + Create an account + + +
+
+ ))}
-
+ ); }; diff --git a/web/src/app/auth/logout/route.ts b/web/src/app/auth/logout/route.ts index 7de902c7acf..9a7d22ae0dc 100644 --- a/web/src/app/auth/logout/route.ts +++ b/web/src/app/auth/logout/route.ts @@ -1,3 +1,4 @@ +import { NEXT_PUBLIC_CLOUD_ENABLED } from "@/lib/constants"; import { getAuthTypeMetadataSS, logoutSS } from "@/lib/userSS"; import { NextRequest } from "next/server"; @@ -6,8 +7,38 @@ export const POST = async (request: NextRequest) => { // Needed since env variables don't work well on the client-side const authTypeMetadata = await getAuthTypeMetadataSS(); const response = await logoutSS(authTypeMetadata.authType, request.headers); - if (!response || response.ok) { + + if (response && !response.ok) { + return new Response(response.body, { status: response?.status }); + } + + // Delete cookies only if cloud is enabled (jwt auth) + if (NEXT_PUBLIC_CLOUD_ENABLED) { + const cookiesToDelete = ["fastapiusersauth"]; + const cookieOptions = { + path: "/", + secure: process.env.NODE_ENV === "production", + httpOnly: true, + sameSite: "lax" as const, + }; + + // Logout successful, delete cookies + const headers = new Headers(); + + cookiesToDelete.forEach((cookieName) => { + headers.append( + "Set-Cookie", + `${cookieName}=; Max-Age=0; ${Object.entries(cookieOptions) + .map(([key, value]) => `${key}=${value}`) + .join("; ")}` + ); + }); + + return new Response(null, { + status: 204, + headers: headers, + }); + } else { return new Response(null, { status: 204 }); } - return new Response(response.body, { status: response?.status }); }; diff --git a/web/src/app/auth/oauth/callback/route.ts b/web/src/app/auth/oauth/callback/route.ts index 0b4157731a1..ca5a82743d3 100644 --- a/web/src/app/auth/oauth/callback/route.ts +++ b/web/src/app/auth/oauth/callback/route.ts @@ -8,16 +8,27 @@ export const GET = async (request: NextRequest) => { const url = new URL(buildUrl("/auth/oauth/callback")); url.search = request.nextUrl.search; - const response = await fetch(url.toString()); + // Set 'redirect' to 'manual' to prevent automatic redirection + const response = await fetch(url.toString(), { redirect: "manual" }); const setCookieHeader = response.headers.get("set-cookie"); + if (response.status === 401) { + return NextResponse.redirect( + new URL("/auth/create-account", getDomain(request)) + ); + } + if (!setCookieHeader) { return NextResponse.redirect(new URL("/auth/error", getDomain(request))); } + // Get the redirect URL from the backend's 'Location' header, or default to '/' + const redirectUrl = response.headers.get("location") || "/"; + const redirectResponse = NextResponse.redirect( - new URL("/", getDomain(request)) + new URL(redirectUrl, getDomain(request)) ); + redirectResponse.headers.set("set-cookie", setCookieHeader); return redirectResponse; }; diff --git a/web/src/app/auth/oidc/callback/route.ts b/web/src/app/auth/oidc/callback/route.ts index 353119409b9..1bdf2b61db1 100644 --- a/web/src/app/auth/oidc/callback/route.ts +++ b/web/src/app/auth/oidc/callback/route.ts @@ -7,17 +7,27 @@ export const GET = async (request: NextRequest) => { // which adds back a redirect to the main app. const url = new URL(buildUrl("/auth/oidc/callback")); url.search = request.nextUrl.search; - - const response = await fetch(url.toString()); + // Set 'redirect' to 'manual' to prevent automatic redirection + const response = await fetch(url.toString(), { redirect: "manual" }); const setCookieHeader = response.headers.get("set-cookie"); + if (response.status === 401) { + return NextResponse.redirect( + new URL("/auth/create-account", getDomain(request)) + ); + } + if (!setCookieHeader) { return NextResponse.redirect(new URL("/auth/error", getDomain(request))); } + // Get the redirect URL from the backend's 'Location' header, or default to '/' + const redirectUrl = response.headers.get("location") || "/"; + const redirectResponse = NextResponse.redirect( - new URL("/", getDomain(request)) + new URL(redirectUrl, getDomain(request)) ); + redirectResponse.headers.set("set-cookie", setCookieHeader); return redirectResponse; }; diff --git a/web/src/app/auth/signup/page.tsx b/web/src/app/auth/signup/page.tsx index 9a2631c4350..29c2f97ea16 100644 --- a/web/src/app/auth/signup/page.tsx +++ b/web/src/app/auth/signup/page.tsx @@ -4,12 +4,14 @@ import { getCurrentUserSS, getAuthTypeMetadataSS, AuthTypeMetadata, + getAuthUrlSS, } from "@/lib/userSS"; import { redirect } from "next/navigation"; import { EmailPasswordForm } from "../login/EmailPasswordForm"; -import { Card, Title, Text } from "@tremor/react"; +import Text from "@/components/ui/text"; import Link from "next/link"; -import { Logo } from "@/components/Logo"; +import { SignInButton } from "../login/SignInButton"; +import AuthFlowContainer from "@/components/auth/AuthFlowContainer"; const Page = async () => { // catch cases where the backend is completely unreachable here @@ -38,44 +40,56 @@ const Page = async () => { } return redirect("/auth/waiting-on-verification"); } + const cloud = authTypeMetadata?.authType === "cloud"; // only enable this page if basic login is enabled - if (authTypeMetadata?.authType !== "basic") { + if (authTypeMetadata?.authType !== "basic" && !cloud) { return redirect("/"); } + let authUrl: string | null = null; + if (cloud && authTypeMetadata) { + authUrl = await getAuthUrlSS(authTypeMetadata.authType, null); + } + return ( -
-
- -
-
-
- + + - -
- - Sign Up for Danswer - -
- + <> +
+
+

+ {cloud ? "Complete your sign up" : "Sign Up for Danswer"} +

-
- - Already have an account?{" "} - - Log In - - + {cloud && authUrl && ( +
+ +
+
+ or +
+
- + )} + + + +
+ + Already have an account?{" "} + + Log In + + +
-
-
+ + ); }; diff --git a/web/src/app/auth/verify-email/Verify.tsx b/web/src/app/auth/verify-email/Verify.tsx index aea4d1bfefb..7456231cc2b 100644 --- a/web/src/app/auth/verify-email/Verify.tsx +++ b/web/src/app/auth/verify-email/Verify.tsx @@ -2,8 +2,8 @@ import { HealthCheckBanner } from "@/components/health/healthcheck"; import { useRouter, useSearchParams } from "next/navigation"; -import { useEffect, useState } from "react"; -import { Text } from "@tremor/react"; +import { useCallback, useEffect, useState } from "react"; +import Text from "@/components/ui/text"; import { RequestNewVerificationEmail } from "../waiting-on-verification/RequestNewVerificationEmail"; import { User } from "@/lib/types"; import { Logo } from "@/components/Logo"; @@ -14,7 +14,7 @@ export function Verify({ user }: { user: User | null }) { const [error, setError] = useState(""); - async function verify() { + const verify = useCallback(async () => { const token = searchParams.get("token"); if (!token) { setError( @@ -39,11 +39,11 @@ export function Verify({ user }: { user: User | null }) { `Failed to verify your email - ${errorDetail}. Please try requesting a new verification email.` ); } - } + }, [searchParams, router]); useEffect(() => { verify(); - }, []); + }, [verify]); return (
diff --git a/web/src/app/auth/waiting-on-verification/page.tsx b/web/src/app/auth/waiting-on-verification/page.tsx index b1e1f831470..3cd8035ae17 100644 --- a/web/src/app/auth/waiting-on-verification/page.tsx +++ b/web/src/app/auth/waiting-on-verification/page.tsx @@ -6,7 +6,7 @@ import { import { redirect } from "next/navigation"; import { HealthCheckBanner } from "@/components/health/healthcheck"; import { User } from "@/lib/types"; -import { Text } from "@tremor/react"; +import Text from "@/components/ui/text"; import { RequestNewVerificationEmail } from "./RequestNewVerificationEmail"; import { Logo } from "@/components/Logo"; diff --git a/web/src/app/chat/ChatBanner.tsx b/web/src/app/chat/ChatBanner.tsx index 72a2cac5652..59fc8bd32d5 100644 --- a/web/src/app/chat/ChatBanner.tsx +++ b/web/src/app/chat/ChatBanner.tsx @@ -16,9 +16,13 @@ export function ChatBanner() { useLayoutEffect(() => { const checkOverflow = () => { if (contentRef.current && fullContentRef.current) { - setIsOverflowing( - fullContentRef.current.scrollHeight > contentRef.current.clientHeight - ); + const contentRect = contentRef.current.getBoundingClientRect(); + const fullContentRect = fullContentRef.current.getBoundingClientRect(); + + const isWidthOverflowing = fullContentRect.width > contentRect.width; + const isHeightOverflowing = fullContentRect.height > contentRect.height; + + setIsOverflowing(isWidthOverflowing || isHeightOverflowing); } }; @@ -53,23 +57,27 @@ export function ChatBanner() { >
-
- +
+
+ +
-
- +
+
+ +
{isOverflowing && ( diff --git a/web/src/app/chat/ChatIntro.tsx b/web/src/app/chat/ChatIntro.tsx index 3703655d7f9..a1c15bf6269 100644 --- a/web/src/app/chat/ChatIntro.tsx +++ b/web/src/app/chat/ChatIntro.tsx @@ -1,96 +1,44 @@ -import { getSourceMetadataForSources } from "@/lib/sources"; -import { ValidSources } from "@/lib/types"; import { Persona } from "../admin/assistants/interfaces"; -import { Divider } from "@tremor/react"; -import { FiBookmark, FiInfo } from "react-icons/fi"; -import { HoverPopup } from "@/components/HoverPopup"; +import { AssistantIcon } from "@/components/assistants/AssistantIcon"; +import { useState } from "react"; +import { DisplayAssistantCard } from "@/components/assistants/AssistantCards"; -export function ChatIntro({ - availableSources, - selectedPersona, -}: { - availableSources: ValidSources[]; - selectedPersona: Persona; -}) { - const availableSourceMetadata = getSourceMetadataForSources(availableSources); +export function ChatIntro({ selectedPersona }: { selectedPersona: Persona }) { + const [hoveredAssistant, setHoveredAssistant] = useState(false); return ( <> -
-
-
-
-
- {selectedPersona?.name || "How can I help you today?"} +
+
+
+
+
setHoveredAssistant(true)} + onMouseLeave={() => setHoveredAssistant(false)} + className="p-4 scale-[.8] cursor-pointer border-dashed rounded-full flex border border-border border-2 border-dashed" + style={{ + borderStyle: "dashed", + borderWidth: "1.5px", + borderSpacing: "4px", + }} + > + +
+
+ {hoveredAssistant && ( + + )}
- {selectedPersona && ( -
{selectedPersona.description}
- )}
- {selectedPersona && selectedPersona.num_chunks !== 0 && ( - <> - -
- {selectedPersona.document_sets.length > 0 && ( -
-

- Knowledge Sets:{" "} -

-
- {selectedPersona.document_sets.map((documentSet) => ( -
- -
- -
- {documentSet.name} - - } - popupContent={ -
- -
- {documentSet.description} -
-
- } - direction="top" - /> -
- ))} -
-
- )} - - {availableSources.length > 0 && ( -
-

- Connected Sources:{" "} -

-
- {availableSourceMetadata.map((sourceMetadata) => ( - -
- {sourceMetadata.icon({})} -
-
- {sourceMetadata.displayName} -
-
- ))} -
-
- )} -
- - )} +
+ {selectedPersona?.name || "How can I help you today?"} +
diff --git a/web/src/app/chat/ChatPage.tsx b/web/src/app/chat/ChatPage.tsx index 0d13b8607d2..53df687f2ff 100644 --- a/web/src/app/chat/ChatPage.tsx +++ b/web/src/app/chat/ChatPage.tsx @@ -10,7 +10,7 @@ import { ChatSessionSharedStatus, DocumentsResponse, FileDescriptor, - ImageGenerationDisplay, + FileChatDisplay, Message, MessageResponseIDInfo, RetrievalType, @@ -100,11 +100,10 @@ import ExceptionTraceModal from "@/components/modals/ExceptionTraceModal"; import { SEARCH_TOOL_NAME } from "./tools/constants"; import { useUser } from "@/components/user/UserProvider"; import { ApiKeyModal } from "@/components/llm/ApiKeyModal"; -import { - classifyAssistants, - orderAssistantsForUser, -} from "@/lib/assistants/utils"; import BlurBackground from "./shared_chat_search/BlurBackground"; +import { NoAssistantModal } from "@/components/modals/NoAssistantModal"; +import { useAssistants } from "@/components/context/AssistantsContext"; +import { Separator } from "@/components/ui/separator"; const TEMP_USER_MESSAGE_ID = -1; const TEMP_ASSISTANT_MESSAGE_ID = -2; @@ -122,11 +121,10 @@ export function ChatPage({ const router = useRouter(); const searchParams = useSearchParams(); - let { + const { chatSessions, availableSources, availableDocumentSets, - availableAssistants, llmProviders, folders, openedFolders, @@ -136,35 +134,54 @@ export function ChatPage({ refreshChatSessions, } = useChatContext(); - const [showApiKeyModal, setShowApiKeyModal] = useState(true); + const { assistants: availableAssistants, finalAssistants } = useAssistants(); + + const [showApiKeyModal, setShowApiKeyModal] = useState( + !shouldShowWelcomeModal + ); - const { user, refreshUser, isLoadingUser } = useUser(); + const { user, isAdmin, isLoadingUser } = useUser(); const existingChatIdRaw = searchParams.get("chatId"); + const [sendOnLoad, setSendOnLoad] = useState( + searchParams.get(SEARCH_PARAM_NAMES.SEND_ON_LOAD) + ); + const currentPersonaId = searchParams.get(SEARCH_PARAM_NAMES.PERSONA_ID); + const modelVersionFromSearchParams = searchParams.get( + SEARCH_PARAM_NAMES.STRUCTURED_MODEL + ); + + // Effect to handle sendOnLoad + useEffect(() => { + if (sendOnLoad) { + const newSearchParams = new URLSearchParams(searchParams.toString()); + newSearchParams.delete(SEARCH_PARAM_NAMES.SEND_ON_LOAD); - const existingChatSessionId = existingChatIdRaw - ? parseInt(existingChatIdRaw) - : null; + // Update the URL without the send-on-load parameter + router.replace(`?${newSearchParams.toString()}`, { scroll: false }); + + // Update our local state to reflect the change + setSendOnLoad(null); + + // If there's a message, submit it + if (message) { + onSubmit({ messageOverride: message }); + } + } + }, [sendOnLoad, searchParams, router]); + + const existingChatSessionId = existingChatIdRaw ? existingChatIdRaw : null; const selectedChatSession = chatSessions.find( (chatSession) => chatSession.id === existingChatSessionId ); - const chatSessionIdRef = useRef(existingChatSessionId); + const chatSessionIdRef = useRef(existingChatSessionId); // Only updates on session load (ie. rename / switching chat session) // Useful for determining which session has been loaded (i.e. still on `new, empty session` or `previous session`) - const loadedIdSessionRef = useRef(existingChatSessionId); - - // Assistants - const { visibleAssistants, hiddenAssistants: _ } = classifyAssistants( - user, - availableAssistants - ); - const finalAssistants = user - ? orderAssistantsForUser(visibleAssistants, user) - : visibleAssistants; + const loadedIdSessionRef = useRef(existingChatSessionId); const existingChatSessionAssistantId = selectedChatSession?.persona_id; const [selectedAssistant, setSelectedAssistant] = useState< @@ -183,11 +200,11 @@ export function ChatPage({ ) : undefined ); - // Gather default temperature settings const search_param_temperature = searchParams.get( SEARCH_PARAM_NAMES.TEMPERATURE ); + const defaultTemperature = search_param_temperature ? parseFloat(search_param_temperature) : selectedAssistant?.tools.some( @@ -208,7 +225,7 @@ export function ChatPage({ }; const llmOverrideManager = useLlmOverride( - user?.preferences.default_model, + modelVersionFromSearchParams || (user?.preferences.default_model ?? null), selectedChatSession, defaultTemperature ); @@ -222,6 +239,8 @@ export function ChatPage({ finalAssistants[0] || availableAssistants[0]; + const noAssistants = liveAssistant == null || liveAssistant == undefined; + useEffect(() => { if (!loadedIdSessionRef.current && !currentPersonaId) { return; @@ -239,7 +258,8 @@ export function ChatPage({ destructureValue(user?.preferences.default_model) ); } - }, [liveAssistant]); + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [liveAssistant, llmProviders, user?.preferences.default_model]); const stopGenerating = () => { const currentSession = currentSessionId(); @@ -257,13 +277,13 @@ export function ChatPage({ if ( lastMessage && lastMessage.type === "assistant" && - lastMessage.toolCalls[0] && - lastMessage.toolCalls[0].tool_result === undefined + lastMessage.toolCall && + lastMessage.toolCall.tool_result === undefined ) { const newCompleteMessageMap = new Map( currentMessageMap(completeMessageDetail) ); - const updatedMessage = { ...lastMessage, toolCalls: [] }; + const updatedMessage = { ...lastMessage, toolCall: null }; newCompleteMessageMap.set(lastMessage.messageId, updatedMessage); updateCompleteMessageDetail(currentSession, newCompleteMessageMap); } @@ -432,6 +452,7 @@ export function ChatPage({ } initialSessionFetch(); + // eslint-disable-next-line react-hooks/exhaustive-deps }, [existingChatSessionId]); const [message, setMessage] = useState( @@ -439,11 +460,11 @@ export function ChatPage({ ); const [completeMessageDetail, setCompleteMessageDetail] = useState< - Map> + Map> >(new Map()); const updateCompleteMessageDetail = ( - sessionId: number | null, + sessionId: string | null, messageMap: Map ) => { setCompleteMessageDetail((prevState) => { @@ -454,13 +475,13 @@ export function ChatPage({ }; const currentMessageMap = ( - messageDetail: Map> + messageDetail: Map> ) => { return ( messageDetail.get(chatSessionIdRef.current) || new Map() ); }; - const currentSessionId = (): number => { + const currentSessionId = (): string => { return chatSessionIdRef.current!; }; @@ -475,7 +496,7 @@ export function ChatPage({ // if calling this function repeatedly with short delay, stay may not update in time // and result in weird behavipr completeMessageMapOverride?: Map | null; - chatSessionId?: number; + chatSessionId?: string; replacementsMap?: Map | null; makeLatestChildMessage?: boolean; }) => { @@ -492,7 +513,7 @@ export function ChatPage({ message: "", type: "system", files: [], - toolCalls: [], + toolCall: null, parentMessageId: null, childrenMessageIds: [firstMessageId], latestChildMessageId: firstMessageId, @@ -550,23 +571,23 @@ export function ChatPage({ const [submittedMessage, setSubmittedMessage] = useState(""); - const [chatState, setChatState] = useState>( + const [chatState, setChatState] = useState>( new Map([[chatSessionIdRef.current, "input"]]) ); const [regenerationState, setRegenerationState] = useState< - Map + Map >(new Map([[null, null]])); const [abortControllers, setAbortControllers] = useState< - Map + Map >(new Map()); // Updates "null" session values to new session id for // regeneration, chat, and abort controller state, messagehistory - const updateStatesWithNewSessionId = (newSessionId: number) => { + const updateStatesWithNewSessionId = (newSessionId: string) => { const updateState = ( - setState: Dispatch>>, + setState: Dispatch>>, defaultValue?: any ) => { setState((prevState) => { @@ -601,7 +622,7 @@ export function ChatPage({ chatSessionIdRef.current = newSessionId; }; - const updateChatState = (newState: ChatState, sessionId?: number | null) => { + const updateChatState = (newState: ChatState, sessionId?: string | null) => { setChatState((prevState) => { const newChatState = new Map(prevState); newChatState.set( @@ -626,7 +647,7 @@ export function ChatPage({ const updateRegenerationState = ( newState: RegenerationState | null, - sessionId?: number | null + sessionId?: string | null ) => { setRegenerationState((prevState) => { const newRegenerationState = new Map(prevState); @@ -638,18 +659,18 @@ export function ChatPage({ }); }; - const resetRegenerationState = (sessionId?: number | null) => { + const resetRegenerationState = (sessionId?: string | null) => { updateRegenerationState(null, sessionId); }; const currentRegenerationState = (): RegenerationState | null => { return regenerationState.get(currentSessionId()) || null; }; - const [canContinue, setCanContinue] = useState>( + const [canContinue, setCanContinue] = useState>( new Map([[null, false]]) ); - const updateCanContinue = (newState: boolean, sessionId?: number | null) => { + const updateCanContinue = (newState: boolean, sessionId?: string | null) => { setCanContinue((prevState) => { const newCanContinueState = new Map(prevState); newCanContinueState.set( @@ -688,11 +709,12 @@ export function ChatPage({ useEffect(() => { if (messageHistory.length === 0 && chatSessionIdRef.current === null) { + // Select from available assistants so shared assistants appear. setSelectedAssistant( - finalAssistants.find((persona) => persona.id === defaultAssistantId) + availableAssistants.find((persona) => persona.id === defaultAssistantId) ); } - }, [defaultAssistantId]); + }, [defaultAssistantId, availableAssistants, messageHistory.length]); const [ selectedDocuments, @@ -720,12 +742,6 @@ export function ChatPage({ }, [liveAssistant]); const filterManager = useFilters(); - const [finalAvailableSources, finalAvailableDocumentSets] = - computeAvailableFilters({ - selectedPersona: selectedAssistant, - availableSources, - availableDocumentSets, - }); const [currentFeedback, setCurrentFeedback] = useState< [FeedbackType, number] | null @@ -755,12 +771,24 @@ export function ChatPage({ setAboveHorizon(scrollDist.current > 500); }; - scrollableDivRef?.current?.addEventListener("scroll", updateScrollTracking); + useEffect(() => { + const scrollableDiv = scrollableDivRef.current; + if (scrollableDiv) { + scrollableDiv.addEventListener("scroll", updateScrollTracking); + return () => { + scrollableDiv.removeEventListener("scroll", updateScrollTracking); + }; + } + }, []); const handleInputResize = () => { setTimeout(() => { - if (inputRef.current && lastMessageRef.current) { - let newHeight: number = + if ( + inputRef.current && + lastMessageRef.current && + !waitForScrollRef.current + ) { + const newHeight: number = inputRef.current?.getBoundingClientRect().height!; const heightDifference = newHeight - previousHeight.current; if ( @@ -788,8 +816,11 @@ export function ChatPage({ }; const clientScrollToBottom = (fast?: boolean) => { + waitForScrollRef.current = true; + setTimeout(() => { if (!endDivRef.current || !scrollableDivRef.current) { + console.error("endDivRef or scrollableDivRef not found"); return; } @@ -817,8 +848,9 @@ export function ChatPage({ behavior: fast ? "auto" : "smooth", }); setHasPerformedInitialScroll(true); - }, 0); + }, 100); } else { + console.log("All messages are already rendered, scrolling immediately"); // If all messages are already rendered, scroll immediately endDivRef.current.scrollIntoView({ behavior: fast ? "auto" : "smooth", @@ -826,6 +858,11 @@ export function ChatPage({ setHasPerformedInitialScroll(true); } }, 50); + + // Reset waitForScrollRef after 1.5 seconds + setTimeout(() => { + waitForScrollRef.current = false; + }, 1500); }; const distance = 500; // distance that should "engage" the scroll @@ -971,8 +1008,8 @@ export function ChatPage({ setAlternativeGeneratingAssistant(alternativeAssistantOverride); clientScrollToBottom(); - let currChatSessionId: number; - let isNewSession = chatSessionIdRef.current === null; + let currChatSessionId: string; + const isNewSession = chatSessionIdRef.current === null; const searchParamBasedChatSessionName = searchParams.get(SEARCH_PARAM_NAMES.TITLE) || null; @@ -982,7 +1019,7 @@ export function ChatPage({ searchParamBasedChatSessionName ); } else { - currChatSessionId = chatSessionIdRef.current as number; + currChatSessionId = chatSessionIdRef.current as string; } frozenSessionId = currChatSessionId; @@ -1055,7 +1092,7 @@ export function ChatPage({ let answer = ""; - let stopReason: StreamStopReason | null = null; + const stopReason: StreamStopReason | null = null; let query: string | null = null; let retrievalType: RetrievalType = selectedDocuments.length > 0 @@ -1067,7 +1104,7 @@ export function ChatPage({ let stackTrace: string | null = null; let finalMessage: BackendMessage | null = null; - let toolCalls: ToolCallMetadata[] = []; + let toolCall: ToolCallMetadata | null = null; let initialFetchDetails: null | { user_message_id: number; @@ -1133,7 +1170,9 @@ export function ChatPage({ await delay(50); while (!stack.isComplete || !stack.isEmpty()) { - await delay(0.5); + if (stack.isEmpty()) { + await delay(0.5); + } if (!stack.isEmpty() && !controller.signal.aborted) { const packet = stack.nextPacket(); @@ -1170,7 +1209,7 @@ export function ChatPage({ message: currMessage, type: "user", files: currentMessageFiles, - toolCalls: [], + toolCall: null, parentMessageId: parentMessage?.messageId || SYSTEM_MESSAGE_ID, }, ]; @@ -1223,17 +1262,14 @@ export function ChatPage({ setSelectedMessageForDocDisplay(user_message_id); } } else if (Object.hasOwn(packet, "tool_name")) { - toolCalls = [ - { - tool_name: (packet as ToolCallMetadata).tool_name, - tool_args: (packet as ToolCallMetadata).tool_args, - tool_result: (packet as ToolCallMetadata).tool_result, - }, - ]; - if ( - !toolCalls[0].tool_result || - toolCalls[0].tool_result == undefined - ) { + // Will only ever be one tool call per message + toolCall = { + tool_name: (packet as ToolCallMetadata).tool_name, + tool_args: (packet as ToolCallMetadata).tool_args, + tool_result: (packet as ToolCallMetadata).tool_result, + }; + + if (!toolCall.tool_result || toolCall.tool_result == undefined) { updateChatState("toolBuilding", frozenSessionId); } else { updateChatState("streaming", frozenSessionId); @@ -1241,11 +1277,11 @@ export function ChatPage({ // This will be consolidated in upcoming tool calls udpate, // but for now, we need to set query as early as possible - if (toolCalls[0].tool_name == SEARCH_TOOL_NAME) { - query = toolCalls[0].tool_args["query"]; + if (toolCall.tool_name == SEARCH_TOOL_NAME) { + query = toolCall.tool_args["query"]; } } else if (Object.hasOwn(packet, "file_ids")) { - aiMessageImages = (packet as ImageGenerationDisplay).file_ids.map( + aiMessageImages = (packet as FileChatDisplay).file_ids.map( (fileId) => { return { id: fileId, @@ -1300,7 +1336,7 @@ export function ChatPage({ message: currMessage, type: "user", files: currentMessageFiles, - toolCalls: [], + toolCall: null, parentMessageId: error ? null : lastSuccessfulMessageId, childrenMessageIds: [ ...(regenerationRequest?.parentMessage?.childrenMessageIds || @@ -1319,7 +1355,7 @@ export function ChatPage({ finalMessage?.context_docs?.top_documents || documents, citations: finalMessage?.citations || {}, files: finalMessage?.files || aiMessageImages || [], - toolCalls: finalMessage?.tool_calls || toolCalls, + toolCall: finalMessage?.tool_call || toolCall, parentMessageId: regenerationRequest ? regenerationRequest?.parentMessage?.messageId! : initialFetchDetails.user_message_id, @@ -1342,7 +1378,7 @@ export function ChatPage({ message: currMessage, type: "user", files: currentMessageFiles, - toolCalls: [], + toolCall: null, parentMessageId: parentMessage?.messageId || SYSTEM_MESSAGE_ID, }, { @@ -1352,7 +1388,7 @@ export function ChatPage({ message: errorMsg, type: "error", files: aiMessageImages || [], - toolCalls: [], + toolCall: null, parentMessageId: initialFetchDetails?.user_message_id || TEMP_USER_MESSAGE_ID, }, @@ -1451,6 +1487,7 @@ export function ChatPage({ const imageFiles = acceptedFiles.filter((file) => file.type.startsWith("image/") ); + if (imageFiles.length > 0 && !llmAcceptsImages) { setPopup({ type: "error", @@ -1533,6 +1570,7 @@ export function ChatPage({ toggle(false); }; + const waitForScrollRef = useRef(false); const sidebarElementRef = useRef(null); useSidebarVisibility({ @@ -1551,6 +1589,7 @@ export function ChatPage({ endDivRef, distance, debounceNumber, + waitForScrollRef, }); // Virtualization + Scrolling related effects and functions @@ -1562,7 +1601,7 @@ export function ChatPage({ } const [visibleRange, setVisibleRange] = useState< - Map + Map >(() => { const initialRange: VisibleRange = { start: 0, @@ -1654,17 +1693,22 @@ export function ChatPage({ useEffect(() => { initializeVisibleRange(); - }, [router, messageHistory, chatSessionIdRef.current]); + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [router, messageHistory]); useLayoutEffect(() => { + const scrollableDiv = scrollableDivRef.current; + const handleScroll = () => { updateVisibleRangeBasedOnScroll(); }; - scrollableDivRef.current?.addEventListener("scroll", handleScroll); + + scrollableDiv?.addEventListener("scroll", handleScroll); return () => { - scrollableDivRef.current?.removeEventListener("scroll", handleScroll); + scrollableDiv?.removeEventListener("scroll", handleScroll); }; + // eslint-disable-next-line react-hooks/exhaustive-deps }, [messageHistory]); const currentVisibleRange = visibleRange.get(currentSessionId()) || { @@ -1674,6 +1718,9 @@ export function ChatPage({ }; useEffect(() => { + if (noAssistants) { + return; + } const includes = checkAnyAssistantHasSearch( messageHistory, availableAssistants, @@ -1683,6 +1730,9 @@ export function ChatPage({ }, [messageHistory, availableAssistants, liveAssistant]); const [retrievalEnabled, setRetrievalEnabled] = useState(() => { + if (noAssistants) { + return false; + } return checkAnyAssistantHasSearch( messageHistory, availableAssistants, @@ -1714,6 +1764,7 @@ export function ChatPage({ return () => { window.removeEventListener("keydown", handleKeyDown); }; + // eslint-disable-next-line react-hooks/exhaustive-deps }, [router]); const [sharedChatSession, setSharedChatSession] = useState(); @@ -1752,8 +1803,13 @@ export function ChatPage({ <> - {showApiKeyModal && !shouldShowWelcomeModal && ( - setShowApiKeyModal(false)} /> + {showApiKeyModal && !shouldShowWelcomeModal ? ( + setShowApiKeyModal(false)} + setPopup={setPopup} + /> + ) : ( + noAssistants && )} {/* ChatPopup is a custom popup that displays a admin-specified message on initial user visit. @@ -1779,9 +1835,9 @@ export function ChatPage({ {settingsToggled && ( setSettingsToggled(false)} /> @@ -1818,6 +1874,9 @@ export function ChatPage({ {sharedChatSession && ( setSharedChatSession(null)} @@ -1832,6 +1891,9 @@ export function ChatPage({ )} {sharingModalVisible && chatSessionIdRef.current !== null && ( setSharingModalVisible(false)} @@ -1901,7 +1963,6 @@ export function ChatPage({ : undefined } toggleSidebar={toggleSidebar} - user={user} currentChatSession={selectedChatSession} /> )} @@ -1934,7 +1995,7 @@ export function ChatPage({ {...getRootProps()} >
{/* ChatBanner is a custom banner that displays a admin-specified message at @@ -1944,11 +2005,51 @@ export function ChatPage({ !isFetchingChatMessages && currentSessionChatState == "input" && !loadingError && ( - +
+ + +
+ {currentPersona?.starter_messages && + currentPersona.starter_messages.length > + 0 && ( + <> + + + {currentPersona.starter_messages + .slice(0, 4) + .map((starterMessage, i) => ( +
+ + onSubmit({ + messageOverride: + starterMessage.message, + }) + } + /> +
+ ))} + + )} +
+
)} +
{ if (isShowingRetrieved) { setSelectedMessageForDocDisplay(null); @@ -2245,7 +2339,6 @@ export function ChatPage({ {message.message} @@ -2293,7 +2386,6 @@ export function ChatPage({ alternativeAssistant } messageId={null} - personaName={liveAssistant.name} content={
{loadingError} @@ -2322,48 +2413,10 @@ export function ChatPage({ />
)} - {currentPersona && - currentPersona.starter_messages && - currentPersona.starter_messages.length > 0 && - selectedAssistant && - messageHistory.length === 0 && - !isFetchingChatMessages && ( -
- {currentPersona.starter_messages.map( - (starterMessage, i) => ( -
- - onSubmit({ - messageOverride: - starterMessage.message, - }) - } - /> -
- ) - )} -
- )} {/* Some padding at the bottom so the search bar has space at the bottom to not cover the last message*/}
+
@@ -2393,7 +2446,6 @@ export function ChatPage({ showDocs={() => setDocumentSelection(true)} selectedDocuments={selectedDocuments} // assistant stuff - assistantOptions={finalAssistants} selectedAssistant={liveAssistant} setSelectedAssistant={onAssistantChange} setAlternativeAssistant={setAlternativeAssistant} @@ -2455,7 +2507,7 @@ export function ChatPage({ )}
- +
{isConsentScreen && ( )} + )}
); } @@ -120,16 +140,23 @@ export function InputDocumentPreview({
- -
- {fileName} -
-
+ + + +
+ {fileName} +
+
+ + {fileName} + +
+
); diff --git a/web/src/app/chat/files/images/InputBarPreviewImage.tsx b/web/src/app/chat/files/images/InputBarPreviewImage.tsx index 51260af1d2e..a46d3c09814 100644 --- a/web/src/app/chat/files/images/InputBarPreviewImage.tsx +++ b/web/src/app/chat/files/images/InputBarPreviewImage.tsx @@ -30,6 +30,7 @@ export function InputBarPreviewImage({ fileId }: { fileId: string }) { `} > preview setFullImageShowing(true)} className="h-8 w-8 object-cover rounded-lg bg-background cursor-pointer" src={buildImgUrl(fileId)} diff --git a/web/src/app/chat/folders/FolderList.tsx b/web/src/app/chat/folders/FolderList.tsx index 01e69b3a1a4..047cd1e9b32 100644 --- a/web/src/app/chat/folders/FolderList.tsx +++ b/web/src/app/chat/folders/FolderList.tsx @@ -30,7 +30,7 @@ const FolderItem = ({ initiallySelected, }: { folder: Folder; - currentChatId?: number; + currentChatId?: string; isInitiallyExpanded: boolean; initiallySelected: boolean; }) => { @@ -145,10 +145,7 @@ const FolderItem = ({ const handleDrop = async (event: React.DragEvent) => { event.preventDefault(); setIsDragOver(false); - const chatSessionId = parseInt( - event.dataTransfer.getData(CHAT_SESSION_ID_KEY), - 10 - ); + const chatSessionId = event.dataTransfer.getData(CHAT_SESSION_ID_KEY); try { await addChatToFolder(folder.folder_id, chatSessionId); router.refresh(); // Refresh to show the updated folder contents @@ -302,7 +299,7 @@ export const FolderList = ({ newFolderId, }: { folders: Folder[]; - currentChatId?: number; + currentChatId?: string; openedFolders?: { [key: number]: boolean }; newFolderId: number | null; }) => { diff --git a/web/src/app/chat/folders/FolderManagement.tsx b/web/src/app/chat/folders/FolderManagement.tsx index b1d245147ce..417bb903f65 100644 --- a/web/src/app/chat/folders/FolderManagement.tsx +++ b/web/src/app/chat/folders/FolderManagement.tsx @@ -1,5 +1,3 @@ -import { useState, useEffect, FC } from "react"; - // Function to create a new folder export async function createFolder(folderName: string): Promise { const response = await fetch("/api/folder", { @@ -19,7 +17,7 @@ export async function createFolder(folderName: string): Promise { // Function to add a chat session to a folder export async function addChatToFolder( folderId: number, - chatSessionId: number + chatSessionId: string ): Promise { const response = await fetch(`/api/folder/${folderId}/add-chat-session`, { method: "POST", @@ -36,7 +34,7 @@ export async function addChatToFolder( // Function to remove a chat session from a folder export async function removeChatFromFolder( folderId: number, - chatSessionId: number + chatSessionId: string ): Promise { const response = await fetch(`/api/folder/${folderId}/remove-chat-session`, { method: "POST", diff --git a/web/src/app/chat/input/ChatInputAssistant.tsx b/web/src/app/chat/input/ChatInputAssistant.tsx index d2d062eb2f1..26c0894f5bb 100644 --- a/web/src/app/chat/input/ChatInputAssistant.tsx +++ b/web/src/app/chat/input/ChatInputAssistant.tsx @@ -2,7 +2,12 @@ import { Persona } from "@/app/admin/assistants/interfaces"; import { AssistantIcon } from "@/components/assistants/AssistantIcon"; -import { Tooltip } from "@/components/tooltip/Tooltip"; +import { + Tooltip, + TooltipContent, + TooltipProvider, + TooltipTrigger, +} from "@/components/ui/tooltip"; import { ForwardedRef, forwardRef, useState } from "react"; import { FiX } from "react-icons/fi"; @@ -23,27 +28,30 @@ export const ChatInputAssistant = forwardRef< onMouseLeave={() => setIsHovered(false)} className="flex-none h-10 duration-300 h-10 items-center rounded-lg bg-background-150" > - {alternativeAssistant.description}

- } - > -
- -

- {alternativeAssistant.name} -

-
- -
-
-
+ + + +
+ +

+ {alternativeAssistant.name} +

+
+ +
+
+
+ +

{alternativeAssistant.description}

+
+
+
); }); diff --git a/web/src/app/chat/input/ChatInputBar.tsx b/web/src/app/chat/input/ChatInputBar.tsx index 64535d82b20..9dd3d5274c4 100644 --- a/web/src/app/chat/input/ChatInputBar.tsx +++ b/web/src/app/chat/input/ChatInputBar.tsx @@ -29,11 +29,17 @@ import { LlmTab } from "../modal/configuration/LlmTab"; import { AssistantsTab } from "../modal/configuration/AssistantsTab"; import { DanswerDocument } from "@/lib/search/interfaces"; import { AssistantIcon } from "@/components/assistants/AssistantIcon"; -import { Tooltip } from "@/components/tooltip/Tooltip"; +import { + Tooltip, + TooltipContent, + TooltipProvider, + TooltipTrigger, +} from "@/components/ui/tooltip"; import { Hoverable } from "@/components/Hoverable"; import { SettingsContext } from "@/components/settings/SettingsProvider"; import { ChatState } from "../types"; import UnconfiguredProviderText from "@/components/chat_search/UnconfiguredProviderText"; +import { useAssistants } from "@/components/context/AssistantsContext"; const MAX_INPUT_HEIGHT = 200; @@ -52,7 +58,6 @@ export function ChatInputBar({ // assistants selectedAssistant, - assistantOptions, setSelectedAssistant, setAlternativeAssistant, @@ -70,7 +75,6 @@ export function ChatInputBar({ stopGenerating: () => void; showDocs: () => void; selectedDocuments: DanswerDocument[]; - assistantOptions: Persona[]; setAlternativeAssistant: (alternativeAssistant: Persona | null) => void; setSelectedAssistant: (assistant: Persona) => void; inputPrompts: InputPrompt[]; @@ -85,7 +89,7 @@ export function ChatInputBar({ setFiles: (files: FileDescriptor[]) => void; handleFileUpload: (files: File[]) => void; textAreaRef: React.RefObject; - chatSessionId?: number; + chatSessionId?: string; }) { useEffect(() => { const textarea = textAreaRef.current; @@ -96,7 +100,7 @@ export function ChatInputBar({ MAX_INPUT_HEIGHT )}px`; } - }, [message]); + }, [message, textAreaRef]); const handlePaste = (event: React.ClipboardEvent) => { const items = event.clipboardData?.items; @@ -116,6 +120,7 @@ export function ChatInputBar({ }; const settings = useContext(SettingsContext); + const { finalAssistants: assistantOptions } = useAssistants(); const { llmProviders } = useChatContext(); const [_, llmName] = getFinalLLM(llmProviders, selectedAssistant, null); @@ -399,17 +404,20 @@ export function ChatInputBar({ {alternativeAssistant.name}

- - {alternativeAssistant.description} -

- } - > - -
+ + + + + + +

+ {alternativeAssistant.description} +

+
+
+
( { @@ -614,7 +621,11 @@ export function ChatInputBar({ chatState == "toolBuilding" || chatState == "loading" ? ( )} diff --git a/web/src/app/chat/input/ChatInputOption.tsx b/web/src/app/chat/input/ChatInputOption.tsx index d2d7bc5fde9..5d5e9d45f47 100644 --- a/web/src/app/chat/input/ChatInputOption.tsx +++ b/web/src/app/chat/input/ChatInputOption.tsx @@ -1,9 +1,5 @@ import React, { useState, useRef, useEffect } from "react"; -import { - ChevronDownIcon, - ChevronRightIcon, - IconProps, -} from "@/components/icons/icons"; +import { ChevronDownIcon, IconProps } from "@/components/icons/icons"; interface ChatInputOptionProps { name?: string; diff --git a/web/src/app/chat/input/SelectedFilterDisplay.tsx b/web/src/app/chat/input/SelectedFilterDisplay.tsx index 56b92d9998f..92d398ce984 100644 --- a/web/src/app/chat/input/SelectedFilterDisplay.tsx +++ b/web/src/app/chat/input/SelectedFilterDisplay.tsx @@ -2,7 +2,7 @@ import { SourceIcon } from "@/components/SourceIcon"; import React from "react"; import { FiBookmark, FiTag, FiX } from "react-icons/fi"; import { FilterManager } from "@/lib/hooks"; -import { DateRangePickerValue } from "@tremor/react"; +import { DateRangePickerValue } from "@/app/ee/admin/performance/DateRangeSelector"; const displayTimeRange = (timeRange: DateRangePickerValue) => { if (timeRange.selectValue) { diff --git a/web/src/app/chat/interfaces.ts b/web/src/app/chat/interfaces.ts index dfc24aaa692..dd736837402 100644 --- a/web/src/app/chat/interfaces.ts +++ b/web/src/app/chat/interfaces.ts @@ -32,6 +32,7 @@ export enum ChatFileType { IMAGE = "image", DOCUMENT = "document", PLAIN_TEXT = "plain_text", + CSV = "csv", } export interface FileDescriptor { @@ -60,7 +61,7 @@ export interface ToolCallFinalResult { } export interface ChatSession { - id: number; + id: string; name: string; persona_id: number; time_created: string; @@ -70,7 +71,7 @@ export interface ChatSession { } export interface SearchSession { - search_session_id: number; + search_session_id: string; documents: SearchDanswerDocument[]; messages: BackendMessage[]; description: string; @@ -85,7 +86,7 @@ export interface Message { documents?: DanswerDocument[] | null; citations?: CitationMap; files: FileDescriptor[]; - toolCalls: ToolCallMetadata[]; + toolCall: ToolCallMetadata | null; // for rebuilding the message tree parentMessageId: number | null; childrenMessageIds?: number[]; @@ -97,7 +98,7 @@ export interface Message { } export interface BackendChatSession { - chat_session_id: number; + chat_session_id: string; description: string; persona_id: number; persona_name: string; @@ -110,7 +111,7 @@ export interface BackendChatSession { export interface BackendMessage { message_id: number; comments: any; - chat_session_id: number; + chat_session_id: string; parent_message: number | null; latest_child_message: number | null; message: string; @@ -120,7 +121,7 @@ export interface BackendMessage { time_sent: string; citations: CitationMap; files: FileDescriptor[]; - tool_calls: ToolCallFinalResult[]; + tool_call: ToolCallFinalResult | null; alternate_assistant_id?: number | null; overridden_model?: string; } @@ -135,7 +136,7 @@ export interface DocumentsResponse { rephrased_query: string | null; } -export interface ImageGenerationDisplay { +export interface FileChatDisplay { file_ids: string[]; } diff --git a/web/src/app/chat/lib.tsx b/web/src/app/chat/lib.tsx index 01090aa5637..41a83eee1b8 100644 --- a/web/src/app/chat/lib.tsx +++ b/web/src/app/chat/lib.tsx @@ -4,22 +4,15 @@ import { Filters, StreamStopInfo, } from "@/lib/search/interfaces"; -import { handleSSEStream, handleStream } from "@/lib/search/streamingUtils"; +import { handleSSEStream } from "@/lib/search/streamingUtils"; import { ChatState, FeedbackType } from "./types"; -import { - Dispatch, - MutableRefObject, - RefObject, - SetStateAction, - useEffect, - useRef, -} from "react"; +import { MutableRefObject, RefObject, useEffect, useRef } from "react"; import { BackendMessage, ChatSession, DocumentsResponse, FileDescriptor, - ImageGenerationDisplay, + FileChatDisplay, Message, MessageResponseIDInfo, RetrievalType, @@ -62,7 +55,7 @@ export function getChatRetentionInfo( } export async function updateModelOverrideForChatSession( - chatSessionId: number, + chatSessionId: string, newAlternateModel: string ) { const response = await fetch("/api/chat/update-chat-session-model", { @@ -81,7 +74,7 @@ export async function updateModelOverrideForChatSession( export async function createChatSession( personaId: number, description: string | null -): Promise { +): Promise { const createChatSessionResponse = await fetch( "/api/chat/create-chat-session", { @@ -110,7 +103,7 @@ export type PacketType = | BackendMessage | AnswerPiecePacket | DocumentsResponse - | ImageGenerationDisplay + | FileChatDisplay | StreamingError | MessageResponseIDInfo | StreamStopInfo; @@ -138,7 +131,7 @@ export async function* sendMessage({ message: string; fileDescriptors: FileDescriptor[]; parentMessageId: number | null; - chatSessionId: number; + chatSessionId: string; promptId: number | null | undefined; filters: Filters | null; selectedDocumentIds: number[] | null; @@ -210,7 +203,7 @@ export async function* sendMessage({ yield* handleSSEStream(response); } -export async function nameChatSession(chatSessionId: number, message: string) { +export async function nameChatSession(chatSessionId: string, message: string) { const response = await fetch("/api/chat/rename-chat-session", { method: "PUT", headers: { @@ -259,7 +252,7 @@ export async function handleChatFeedback( return response; } export async function renameChatSession( - chatSessionId: number, + chatSessionId: string, newName: string ) { const response = await fetch(`/api/chat/rename-chat-session`, { @@ -276,7 +269,7 @@ export async function renameChatSession( return response; } -export async function deleteChatSession(chatSessionId: number) { +export async function deleteChatSession(chatSessionId: string) { const response = await fetch( `/api/chat/delete-chat-session/${chatSessionId}`, { @@ -435,7 +428,7 @@ export function processRawChatHistory( citations: messageInfo?.citations || {}, } : {}), - toolCalls: messageInfo.tool_calls, + toolCall: messageInfo.tool_call, parentMessageId: messageInfo.parent_message, childrenMessageIds: [], latestChildMessageId: messageInfo.latest_child_message, @@ -591,7 +584,7 @@ const PARAMS_TO_SKIP = [ export function buildChatUrl( existingSearchParams: ReadonlyURLSearchParams, - chatSessionId: number | null, + chatSessionId: string | null, personaId: number | null, search?: boolean ) { @@ -648,9 +641,11 @@ export async function useScrollonStream({ endDivRef, distance, debounceNumber, + waitForScrollRef, }: { chatState: ChatState; scrollableDivRef: RefObject; + waitForScrollRef: RefObject; scrollDist: MutableRefObject; endDivRef: RefObject; distance: number; @@ -664,7 +659,7 @@ export async function useScrollonStream({ useEffect(() => { if (chatState != "input" && scrollableDivRef && scrollableDivRef.current) { - let newHeight: number = scrollableDivRef.current?.scrollTop!; + const newHeight: number = scrollableDivRef.current?.scrollTop!; const heightDifference = newHeight - previousScroll.current; previousScroll.current = newHeight; @@ -729,5 +724,5 @@ export async function useScrollonStream({ }); } } - }, [chatState]); + }, [chatState, distance, scrollDist, scrollableDivRef]); } diff --git a/web/src/app/chat/message/CodeBlock.tsx b/web/src/app/chat/message/CodeBlock.tsx index 55a6ea7be32..5ab6b73b56e 100644 --- a/web/src/app/chat/message/CodeBlock.tsx +++ b/web/src/app/chat/message/CodeBlock.tsx @@ -1,20 +1,22 @@ import React, { useState, ReactNode, useCallback, useMemo, memo } from "react"; import { FiCheck, FiCopy } from "react-icons/fi"; -const CODE_BLOCK_PADDING_TYPE = { padding: "1rem" }; +const CODE_BLOCK_PADDING = { padding: "1rem" }; interface CodeBlockProps { - className?: string | undefined; + className?: string; children?: ReactNode; - content: string; - [key: string]: any; + codeText: string; } +const MemoizedCodeLine = memo(({ content }: { content: ReactNode }) => ( + <>{content} +)); + export const CodeBlock = memo(function CodeBlock({ className = "", children, - content, - ...props + codeText, }: CodeBlockProps) { const [copied, setCopied] = useState(false); @@ -26,132 +28,99 @@ export const CodeBlock = memo(function CodeBlock({ .join(" "); }, [className]); - const codeText = useMemo(() => { - let codeText: string | null = null; - if ( - props.node?.position?.start?.offset && - props.node?.position?.end?.offset - ) { - codeText = content.slice( - props.node.position.start.offset, - props.node.position.end.offset - ); - codeText = codeText.trim(); - - // Find the last occurrence of closing backticks - const lastBackticksIndex = codeText.lastIndexOf("```"); - if (lastBackticksIndex !== -1) { - codeText = codeText.slice(0, lastBackticksIndex + 3); - } - - // Remove the language declaration and trailing backticks - const codeLines = codeText.split("\n"); - if ( - codeLines.length > 1 && - (codeLines[0].startsWith("```") || - codeLines[0].trim().startsWith("```")) - ) { - codeLines.shift(); // Remove the first line with the language declaration - if ( - codeLines[codeLines.length - 1] === "```" || - codeLines[codeLines.length - 1]?.trim() === "```" - ) { - codeLines.pop(); // Remove the last line with the trailing backticks - } - - const minIndent = codeLines - .filter((line) => line.trim().length > 0) - .reduce((min, line) => { - const match = line.match(/^\s*/); - return Math.min(min, match ? match[0].length : 0); - }, Infinity); - - const formattedCodeLines = codeLines.map((line) => - line.slice(minIndent) - ); - codeText = formattedCodeLines.join("\n"); - } - } - - // handle unknown languages. They won't have a `node.position.start.offset` - if (!codeText) { - const findTextNode = (node: any): string | null => { - if (node.type === "text") { - return node.value; - } - let finalResult = ""; - if (node.children) { - for (const child of node.children) { - const result = findTextNode(child); - if (result) { - finalResult += result; - } - } - } - return finalResult; - }; - - codeText = findTextNode(props.node); - } - - return codeText; - }, [content, props.node]); - - const handleCopy = useCallback( - (event: React.MouseEvent) => { - event.preventDefault(); - if (!codeText) { - return; - } - - navigator.clipboard.writeText(codeText).then(() => { - setCopied(true); - setTimeout(() => setCopied(false), 2000); - }); - }, - [codeText] + const handleCopy = useCallback(() => { + if (!codeText) return; + navigator.clipboard.writeText(codeText).then(() => { + setCopied(true); + setTimeout(() => setCopied(false), 2000); + }); + }, [codeText]); + + const CopyButton = () => ( +
+ {copied ? ( +
+ + Copied! +
+ ) : ( +
+ + Copy code +
+ )} +
); - if (!language) { - if (typeof children === "string") { - return {children}; + if (typeof children === "string") { + return ( + + {children} + + ); + } + + const CodeContent = () => { + if (!language) { + return ( +
+          
+            {Array.isArray(children)
+              ? children.map((child, index) => (
+                  
+                ))
+              : children}
+          
+        
+ ); } return ( -
-        
-          {children}
+      
+        
+          {Array.isArray(children)
+            ? children.map((child, index) => (
+                
+              ))
+            : children}
         
       
); - } + }; return (
-
- {language} - {codeText && ( -
- {copied ? ( -
- - Copied! -
- ) : ( -
- - Copy code -
- )} -
- )} -
-
-        {children}
-      
+ {language && ( +
+ {language} + {codeText && } +
+ )} + +
); }); + +CodeBlock.displayName = "CodeBlock"; +MemoizedCodeLine.displayName = "MemoizedCodeLine"; diff --git a/web/src/app/chat/message/ContinueMessage.tsx b/web/src/app/chat/message/ContinueMessage.tsx index 097b3e57e33..e60a13a5323 100644 --- a/web/src/app/chat/message/ContinueMessage.tsx +++ b/web/src/app/chat/message/ContinueMessage.tsx @@ -1,6 +1,6 @@ import { EmphasizedClickable } from "@/components/BasicClickable"; import { useEffect, useState } from "react"; -import { FiBook, FiPlayCircle } from "react-icons/fi"; +import { FiPlayCircle } from "react-icons/fi"; export function ContinueGenerating({ handleContinueGenerating, diff --git a/web/src/app/chat/message/MemoizedTextComponents.tsx b/web/src/app/chat/message/MemoizedTextComponents.tsx index 4ab8bc810b2..9ab0e28e3ca 100644 --- a/web/src/app/chat/message/MemoizedTextComponents.tsx +++ b/web/src/app/chat/message/MemoizedTextComponents.tsx @@ -25,9 +25,9 @@ export const MemoizedLink = memo((props: any) => { } }); -export const MemoizedParagraph = memo(({ node, ...props }: any) => ( -

-)); +export const MemoizedParagraph = memo(({ ...props }: any) => { + return

; +}); MemoizedLink.displayName = "MemoizedLink"; MemoizedParagraph.displayName = "MemoizedParagraph"; diff --git a/web/src/app/chat/message/Messages.tsx b/web/src/app/chat/message/Messages.tsx index edb18138c79..c420a3f53e9 100644 --- a/web/src/app/chat/message/Messages.tsx +++ b/web/src/app/chat/message/Messages.tsx @@ -45,7 +45,12 @@ import { TooltipGroup, } from "@/components/tooltip/CustomTooltip"; import { ValidSources } from "@/lib/types"; -import { Tooltip } from "@/components/tooltip/Tooltip"; +import { + Tooltip, + TooltipContent, + TooltipProvider, + TooltipTrigger, +} from "@/components/ui/tooltip"; import { useMouseTracking } from "./hooks"; import { InternetSearchIcon } from "@/components/InternetSearchIcon"; import { SettingsContext } from "@/components/settings/SettingsProvider"; @@ -54,6 +59,9 @@ import RegenerateOption from "../RegenerateOption"; import { LlmOverride } from "@/lib/hooks"; import { ContinueGenerating } from "./ContinueMessage"; import { MemoizedLink, MemoizedParagraph } from "./MemoizedTextComponents"; +import { extractCodeText } from "./codeUtils"; +import ToolResult from "../../../components/tools/ToolResult"; +import CsvContent from "../../../components/tools/CSVContent"; const TOOLS_WITH_CUSTOM_HANDLING = [ SEARCH_TOOL_NAME, @@ -68,8 +76,13 @@ function FileDisplay({ files: FileDescriptor[]; alignBubble?: boolean; }) { + const [close, setClose] = useState(true); const imageFiles = files.filter((file) => file.type === ChatFileType.IMAGE); - const nonImgFiles = files.filter((file) => file.type !== ChatFileType.IMAGE); + const nonImgFiles = files.filter( + (file) => file.type !== ChatFileType.IMAGE && file.type !== ChatFileType.CSV + ); + + const csvImgFiles = files.filter((file) => file.type == ChatFileType.CSV); return ( <> @@ -93,6 +106,7 @@ function FileDisplay({

)} + {imageFiles && imageFiles.length > 0 && (
)} + + {csvImgFiles && csvImgFiles.length > 0 && ( +
+
+ {csvImgFiles.map((file) => { + return ( +
+ {close ? ( + <> + setClose(false)} + contentComponent={CsvContent} + /> + + ) : ( + setClose(true)} + fileName={file.name || file.id} + maxWidth="max-w-64" + alignBubble={alignBubble} + /> + )} +
+ ); + })} +
+
+ )} ); } @@ -123,13 +166,11 @@ export const AIMessage = ({ files, selectedDocuments, query, - personaName, citedDocuments, toolCall, isComplete, hasDocs, handleFeedback, - isCurrentlyShowingRetrieved, handleShowRetrieved, handleSearchQueryEdit, handleForceSearch, @@ -152,13 +193,11 @@ export const AIMessage = ({ content: string | JSX.Element; files?: FileDescriptor[]; query?: string; - personaName?: string; citedDocuments?: [string, DanswerDocument][] | null; - toolCall?: ToolCallMetadata; + toolCall?: ToolCallMetadata | null; isComplete?: boolean; hasDocs?: boolean; handleFeedback?: (feedbackType: FeedbackType) => void; - isCurrentlyShowingRetrieved?: boolean; handleShowRetrieved?: (messageNumber: number | null) => void; handleSearchQueryEdit?: (query: string) => void; handleForceSearch?: () => void; @@ -194,6 +233,8 @@ export const AIMessage = ({ const finalContent = processContent(content as string); const [isRegenerateHovered, setIsRegenerateHovered] = useState(false); + const [isRegenerateDropdownVisible, setIsRegenerateDropdownVisible] = + useState(false); const { isHovering, trackedElementRef, hoverElementRef } = useMouseTracking(); const settings = useContext(SettingsContext); @@ -201,7 +242,7 @@ export const AIMessage = ({ const selectedDocumentIds = selectedDocuments?.map((document) => document.document_id) || []; - let citedDocumentIds: string[] = []; + const citedDocumentIds: string[] = []; citedDocuments?.forEach((doc) => { citedDocumentIds.push(doc[1].document_id); @@ -253,6 +294,40 @@ export const AIMessage = ({ new Set((docs || []).map((doc) => doc.source_type)) ).slice(0, 3); + const markdownComponents = useMemo( + () => ({ + a: MemoizedLink, + p: MemoizedParagraph, + code: ({ node, className, children, ...props }: any) => { + const codeText = extractCodeText( + node, + finalContent as string, + children + ); + + return ( + + {children} + + ); + }, + }), + [finalContent] + ); + + const renderedMarkdown = useMemo(() => { + return ( + + {finalContent as string} + + ); + }, [finalContent, markdownComponents]); + const includeMessageSwitcher = currentMessageInd !== undefined && onMessageSelection && @@ -266,7 +341,9 @@ export const AIMessage = ({ className={"py-5 ml-4 px-5 relative flex "} >
@@ -352,27 +429,7 @@ export const AIMessage = ({ {typeof content === "string" ? (
- ( - - ), - }} - remarkPlugins={[remarkGfm]} - rehypePlugins={[ - [rehypePrism, { ignoreMissing: true }], - ]} - > - {finalContent as string} - + {renderedMarkdown}
) : ( content @@ -397,6 +454,7 @@ export const AIMessage = ({ href={doc.link || undefined} target="_blank" className="text-sm flex w-full pt-1 gap-x-1.5 overflow-hidden justify-between font-semibold text-text-700" + rel="noreferrer" >

@@ -506,12 +564,22 @@ export const AIMessage = ({ /> {regenerate && ( - + + + )}

@@ -521,9 +589,21 @@ export const AIMessage = ({ className={` absolute -bottom-5 z-10 - invisible ${(isHovering || isRegenerateHovered || settings?.isMobile) && "!visible"} - opacity-0 ${(isHovering || isRegenerateHovered || settings?.isMobile) && "!opacity-100"} - translate-y-2 ${(isHovering || settings?.isMobile) && "!translate-y-0"} + invisible ${ + (isHovering || + isRegenerateHovered || + settings?.isMobile) && + "!visible" + } + opacity-0 ${ + (isHovering || + isRegenerateHovered || + settings?.isMobile) && + "!opacity-100" + } + translate-y-2 ${ + (isHovering || settings?.isMobile) && "!translate-y-0" + } transition-transform duration-300 ease-in-out flex md:flex-row gap-x-0.5 bg-background-125/40 -mx-1.5 p-1.5 rounded-lg `} @@ -571,12 +651,22 @@ export const AIMessage = ({ /> {regenerate && ( - + + + )}
@@ -655,7 +745,7 @@ export const HumanMessage = ({ if (!isEditing) { setEditedContent(content); } - }, [content]); + }, [content, isEditing]); useEffect(() => { if (textareaRef.current) { @@ -669,9 +759,7 @@ export const HumanMessage = ({ }, [isEditing]); const handleEditSubmit = () => { - if (editedContent.trim() !== content.trim()) { - onEdit?.(editedContent); - } + onEdit?.(editedContent); setIsEditing(false); }; @@ -687,7 +775,9 @@ export const HumanMessage = ({ onMouseLeave={() => setIsHovered(false)} >
@@ -810,17 +900,22 @@ export const HumanMessage = ({ isHovered && !isEditing && (!files || files.length === 0) ? ( - - - + + + + + + Edit + + ) : (
)} diff --git a/web/src/app/chat/message/SearchSummary.tsx b/web/src/app/chat/message/SearchSummary.tsx index 66f12aa2dcb..f86212fd290 100644 --- a/web/src/app/chat/message/SearchSummary.tsx +++ b/web/src/app/chat/message/SearchSummary.tsx @@ -4,7 +4,12 @@ import { } from "@/components/BasicClickable"; import { HoverPopup } from "@/components/HoverPopup"; import { Hoverable } from "@/components/Hoverable"; -import { Tooltip } from "@/components/tooltip/Tooltip"; +import { + Tooltip, + TooltipContent, + TooltipProvider, + TooltipTrigger, +} from "@/components/ui/tooltip"; import { useEffect, useRef, useState } from "react"; import { FiCheck, FiEdit2, FiSearch, FiX } from "react-icons/fi"; @@ -83,7 +88,7 @@ export function SearchSummary({ if (!isEditing) { setFinalQuery(query); } - }, [query]); + }, [query, isEditing]); const searchingForDisplay = (
@@ -169,16 +174,21 @@ export function SearchSummary({ )}
{handleSearchQueryEdit && ( - - - + + + + + + Edit Search + + )} )} diff --git a/web/src/app/chat/message/SkippedSearch.tsx b/web/src/app/chat/message/SkippedSearch.tsx index b339ac784ab..05dc8f2d8e4 100644 --- a/web/src/app/chat/message/SkippedSearch.tsx +++ b/web/src/app/chat/message/SkippedSearch.tsx @@ -1,5 +1,5 @@ import { EmphasizedClickable } from "@/components/BasicClickable"; -import { FiArchive, FiBook, FiSearch } from "react-icons/fi"; +import { FiBook } from "react-icons/fi"; function ForceSearchButton({ messageId, diff --git a/web/src/app/chat/message/codeUtils.ts b/web/src/app/chat/message/codeUtils.ts new file mode 100644 index 00000000000..a9cd13944f0 --- /dev/null +++ b/web/src/app/chat/message/codeUtils.ts @@ -0,0 +1,61 @@ +import React from "react"; + +export function extractCodeText( + node: any, + content: string, + children: React.ReactNode +): string { + let codeText: string | null = null; + + if ( + node?.position?.start?.offset != null && + node?.position?.end?.offset != null + ) { + codeText = content + .slice(node.position.start.offset, node.position.end.offset) + .trim(); + + // Match code block with optional language declaration + const codeBlockMatch = codeText.match(/^```[^\n]*\n([\s\S]*?)\n?```$/); + if (codeBlockMatch) { + codeText = codeBlockMatch[1]; + } + + // Normalize indentation + const codeLines = codeText.split("\n"); + const minIndent = codeLines + .filter((line) => line.trim().length > 0) + .reduce((min, line) => { + const match = line.match(/^\s*/); + return Math.min(min, match ? match[0].length : min); + }, Infinity); + + const formattedCodeLines = codeLines.map((line) => line.slice(minIndent)); + codeText = formattedCodeLines.join("\n").trim(); + } else { + // Fallback if position offsets are not available + const extractTextFromReactNode = (node: React.ReactNode): string => { + if (typeof node === "string") return node; + if (typeof node === "number") return String(node); + if (!node) return ""; + + if (React.isValidElement(node)) { + const children = node.props.children; + if (Array.isArray(children)) { + return children.map(extractTextFromReactNode).join(""); + } + return extractTextFromReactNode(children); + } + + if (Array.isArray(node)) { + return node.map(extractTextFromReactNode).join(""); + } + + return ""; + }; + + codeText = extractTextFromReactNode(children); + } + + return codeText || ""; +} diff --git a/web/src/app/chat/modal/FeedbackModal.tsx b/web/src/app/chat/modal/FeedbackModal.tsx index 6b3df8793cf..39c3253b76a 100644 --- a/web/src/app/chat/modal/FeedbackModal.tsx +++ b/web/src/app/chat/modal/FeedbackModal.tsx @@ -2,13 +2,8 @@ import { useState } from "react"; import { FeedbackType } from "../types"; -import { FiThumbsDown, FiThumbsUp } from "react-icons/fi"; -import { ModalWrapper } from "@/components/modals/ModalWrapper"; -import { - DislikeFeedbackIcon, - FilledLikeIcon, - LikeFeedbackIcon, -} from "@/components/icons/icons"; +import { Modal } from "@/components/Modal"; +import { FilledLikeIcon } from "@/components/icons/icons"; const predefinedPositiveFeedbackOptions = process.env.NEXT_PUBLIC_POSITIVE_PREDEFINED_FEEDBACK_OPTIONS?.split(",") || @@ -54,7 +49,7 @@ export const FeedbackModal = ({ : predefinedNegativeFeedbackOptions; return ( - + <>

@@ -117,6 +112,6 @@ export const FeedbackModal = ({
- + ); }; diff --git a/web/src/app/chat/modal/MakePublicAssistantModal.tsx b/web/src/app/chat/modal/MakePublicAssistantModal.tsx index a234050a52b..a410453f6b0 100644 --- a/web/src/app/chat/modal/MakePublicAssistantModal.tsx +++ b/web/src/app/chat/modal/MakePublicAssistantModal.tsx @@ -1,5 +1,7 @@ -import { ModalWrapper } from "@/components/modals/ModalWrapper"; -import { Button, Divider, Text } from "@tremor/react"; +import { Modal } from "@/components/Modal"; +import { Button } from "@/components/ui/button"; +import { Separator } from "@/components/ui/separator"; +import Text from "@/components/ui/text"; export function MakePublicAssistantModal({ isPublic, @@ -11,7 +13,7 @@ export function MakePublicAssistantModal({ onClose: () => void; }) { return ( - +

{isPublic ? "Public Assistant" : "Make Assistant Public"} @@ -28,7 +30,7 @@ export function MakePublicAssistantModal({ : " Only you can access this assistant."} - + {isPublic ? (
@@ -42,7 +44,7 @@ export function MakePublicAssistantModal({ onClose(); }} size="sm" - color="red" + variant="destructive" > Make Assistant Private @@ -60,13 +62,13 @@ export function MakePublicAssistantModal({ onClose(); }} size="sm" - color="green" + variant="submit" > Make Assistant Public
)}

-
+ ); } diff --git a/web/src/app/chat/modal/SetDefaultModelModal.tsx b/web/src/app/chat/modal/SetDefaultModelModal.tsx index e5536f19c61..27696c46916 100644 --- a/web/src/app/chat/modal/SetDefaultModelModal.tsx +++ b/web/src/app/chat/modal/SetDefaultModelModal.tsx @@ -1,28 +1,29 @@ -import { Dispatch, SetStateAction, useState, useEffect, useRef } from "react"; -import { ModalWrapper } from "@/components/modals/ModalWrapper"; -import { Badge, Text } from "@tremor/react"; +import { Dispatch, SetStateAction, useEffect, useRef } from "react"; +import { Modal } from "@/components/Modal"; +import Text from "@/components/ui/text"; import { getDisplayNameForModel, LlmOverride } from "@/lib/hooks"; import { LLMProviderDescriptor } from "@/app/admin/configuration/llm/interfaces"; import { destructureValue, structureValue } from "@/lib/llm/utils"; import { setUserDefaultModel } from "@/lib/users/UserSettings"; import { useRouter } from "next/navigation"; -import { usePopup } from "@/components/admin/connectors/Popup"; +import { PopupSpec } from "@/components/admin/connectors/Popup"; +import { useUser } from "@/components/user/UserProvider"; export function SetDefaultModelModal({ + setPopup, llmProviders, onClose, setLlmOverride, defaultModel, - refreshUser, }: { + setPopup: (popupSpec: PopupSpec | null) => void; llmProviders: LLMProviderDescriptor[]; setLlmOverride: Dispatch>; onClose: () => void; defaultModel: string | null; - refreshUser: () => void; }) { - const { popup, setPopup } = usePopup(); + const { refreshUser } = useUser(); const containerRef = useRef(null); const messageRef = useRef(null); @@ -122,12 +123,8 @@ export function SetDefaultModelModal({ ); return ( - + <> - {popup}

Set Default Model @@ -169,7 +166,9 @@ export function SetDefaultModelModal({

} @@ -203,6 +202,6 @@ export function SetDefaultModelModal({ - + ); } diff --git a/web/src/app/chat/modal/ShareChatSessionModal.tsx b/web/src/app/chat/modal/ShareChatSessionModal.tsx index 6c287a6ceb5..ffff8bb9714 100644 --- a/web/src/app/chat/modal/ShareChatSessionModal.tsx +++ b/web/src/app/chat/modal/ShareChatSessionModal.tsx @@ -1,17 +1,25 @@ import { useState } from "react"; -import { ModalWrapper } from "@/components/modals/ModalWrapper"; -import { Button, Callout, Divider, Text } from "@tremor/react"; -import { Spinner } from "@/components/Spinner"; +import { Modal } from "@/components/Modal"; +import { Button } from "@/components/ui/button"; +import { Callout } from "@/components/ui/callout"; + +import Text from "@/components/ui/text"; + import { ChatSessionSharedStatus } from "../interfaces"; -import { FiCopy, FiX } from "react-icons/fi"; +import { FiCopy } from "react-icons/fi"; import { CopyButton } from "@/components/CopyButton"; +import { SEARCH_PARAM_NAMES } from "../searchParams"; +import { usePopup } from "@/components/admin/connectors/Popup"; +import { structureValue } from "@/lib/llm/utils"; +import { LlmOverride } from "@/lib/hooks"; +import { Separator } from "@/components/ui/separator"; -function buildShareLink(chatSessionId: number) { +function buildShareLink(chatSessionId: string) { const baseUrl = `${window.location.protocol}//${window.location.host}`; return `${baseUrl}/chat/shared/${chatSessionId}`; } -async function generateShareLink(chatSessionId: number) { +async function generateShareLink(chatSessionId: string) { const response = await fetch(`/api/chat/chat-session/${chatSessionId}`, { method: "PATCH", headers: { @@ -26,7 +34,37 @@ async function generateShareLink(chatSessionId: number) { return null; } -async function deleteShareLink(chatSessionId: number) { +async function generateCloneLink( + message?: string, + assistantId?: number, + modelOverride?: LlmOverride +) { + const baseUrl = `${window.location.protocol}//${window.location.host}`; + const model = modelOverride + ? structureValue( + modelOverride.name, + modelOverride.provider, + modelOverride.modelName + ) + : null; + return `${baseUrl}/chat${ + message + ? `?${SEARCH_PARAM_NAMES.USER_PROMPT}=${encodeURIComponent(message)}` + : "" + }${ + assistantId + ? `${message ? "&" : "?"}${SEARCH_PARAM_NAMES.PERSONA_ID}=${assistantId}` + : "" + }${ + model + ? `${message || assistantId ? "&" : "?"}${ + SEARCH_PARAM_NAMES.STRUCTURED_MODEL + }=${encodeURIComponent(model)}` + : "" + }${message ? `&${SEARCH_PARAM_NAMES.SEND_ON_LOAD}=true` : ""}`; +} + +async function deleteShareLink(chatSessionId: string) { const response = await fetch(`/api/chat/chat-session/${chatSessionId}`, { method: "PATCH", headers: { @@ -43,116 +81,162 @@ export function ShareChatSessionModal({ existingSharedStatus, onShare, onClose, + message, + assistantId, + modelOverride, }: { - chatSessionId: number; + chatSessionId: string; existingSharedStatus: ChatSessionSharedStatus; onShare?: (shared: boolean) => void; onClose: () => void; + message?: string; + assistantId?: number; + modelOverride?: LlmOverride; }) { - const [linkGenerating, setLinkGenerating] = useState(false); const [shareLink, setShareLink] = useState( existingSharedStatus === ChatSessionSharedStatus.Public ? buildShareLink(chatSessionId) : "" ); + const { popup, setPopup } = usePopup(); return ( - - <> -
-

- Share link to Chat -

-
- - {linkGenerating && } - -
- - + } catch (e) { + console.error(e); + alert("Failed to generate or copy link."); + } + }} + size="sm" + variant="secondary" + > + Generate and Copy Clone Link + + + + + ); } diff --git a/web/src/app/chat/modal/configuration/AssistantsTab.tsx b/web/src/app/chat/modal/configuration/AssistantsTab.tsx index dcf31138ccf..c62fab443ba 100644 --- a/web/src/app/chat/modal/configuration/AssistantsTab.tsx +++ b/web/src/app/chat/modal/configuration/AssistantsTab.tsx @@ -13,28 +13,32 @@ import { sortableKeyboardCoordinates, verticalListSortingStrategy, } from "@dnd-kit/sortable"; -import { CSS } from "@dnd-kit/utilities"; import { Persona } from "@/app/admin/assistants/interfaces"; import { LLMProviderDescriptor } from "@/app/admin/configuration/llm/interfaces"; import { getFinalLLM } from "@/lib/llm/utils"; -import React, { useState } from "react"; +import React, { useEffect, useState } from "react"; import { updateUserAssistantList } from "@/lib/assistants/updateAssistantPreferences"; import { DraggableAssistantCard } from "@/components/assistants/AssistantCards"; -import { orderAssistantsForUser } from "@/lib/assistants/utils"; +import { useAssistants } from "@/components/context/AssistantsContext"; +import { useUser } from "@/components/user/UserProvider"; export function AssistantsTab({ selectedAssistant, - availableAssistants, llmProviders, onSelect, }: { selectedAssistant: Persona; - availableAssistants: Persona[]; llmProviders: LLMProviderDescriptor[]; onSelect: (assistant: Persona) => void; }) { + const { refreshUser } = useUser(); const [_, llmName] = getFinalLLM(llmProviders, null, null); - const [assistants, setAssistants] = useState(availableAssistants); + const { finalAssistants, refreshAssistants } = useAssistants(); + const [assistants, setAssistants] = useState(finalAssistants); + + useEffect(() => { + setAssistants(finalAssistants); + }, [finalAssistants]); const sensors = useSensors( useSensor(PointerSensor), @@ -43,23 +47,22 @@ export function AssistantsTab({ }) ); - function handleDragEnd(event: DragEndEvent) { + async function handleDragEnd(event: DragEndEvent) { const { active, over } = event; if (over && active.id !== over.id) { - setAssistants((items) => { - const oldIndex = items.findIndex( - (item) => item.id.toString() === active.id - ); - const newIndex = items.findIndex( - (item) => item.id.toString() === over.id - ); - const updatedAssistants = arrayMove(items, oldIndex, newIndex); - - updateUserAssistantList(updatedAssistants.map((a) => a.id)); + const oldIndex = assistants.findIndex( + (item) => item.id.toString() === active.id + ); + const newIndex = assistants.findIndex( + (item) => item.id.toString() === over.id + ); + const updatedAssistants = arrayMove(assistants, oldIndex, newIndex); - return updatedAssistants; - }); + setAssistants(updatedAssistants); + await updateUserAssistantList(updatedAssistants.map((a) => a.id)); + await refreshUser(); + await refreshAssistants(); } } diff --git a/web/src/app/chat/modal/configuration/LlmTab.tsx b/web/src/app/chat/modal/configuration/LlmTab.tsx index 86ead4309f3..4e51a21933e 100644 --- a/web/src/app/chat/modal/configuration/LlmTab.tsx +++ b/web/src/app/chat/modal/configuration/LlmTab.tsx @@ -1,14 +1,10 @@ import { useChatContext } from "@/components/context/ChatContext"; -import { getDisplayNameForModel, LlmOverrideManager } from "@/lib/hooks"; +import { LlmOverrideManager } from "@/lib/hooks"; import React, { forwardRef, useCallback, useState } from "react"; import { debounce } from "lodash"; -import { Text } from "@tremor/react"; +import Text from "@/components/ui/text"; import { Persona } from "@/app/admin/assistants/interfaces"; -import { - checkLLMSupportsImageInput, - destructureValue, - structureValue, -} from "@/lib/llm/utils"; +import { destructureValue } from "@/lib/llm/utils"; import { updateModelOverrideForChatSession } from "../../lib"; import { GearIcon } from "@/components/icons/icons"; import { LlmList } from "@/components/llm/LLMList"; @@ -18,7 +14,7 @@ interface LlmTabProps { llmOverrideManager: LlmOverrideManager; currentLlm: string; openModelSettings: () => void; - chatSessionId?: number; + chatSessionId?: string; close: () => void; currentAssistant: Persona; } @@ -44,12 +40,14 @@ export const LlmTab = forwardRef( const [localTemperature, setLocalTemperature] = useState( temperature || 0 ); - const debouncedSetTemperature = useCallback( - debounce((value) => { - setTemperature(value); - }, 300), - [] + (value: number) => { + const debouncedFunction = debounce((value: number) => { + setTemperature(value); + }, 300); + return debouncedFunction(value); + }, + [setTemperature] ); const handleTemperatureChange = (value: number) => { diff --git a/web/src/app/chat/modifiers/SearchTypeSelector.tsx b/web/src/app/chat/modifiers/SearchTypeSelector.tsx index c9f1f8a5ce3..94c1ec7047a 100644 --- a/web/src/app/chat/modifiers/SearchTypeSelector.tsx +++ b/web/src/app/chat/modifiers/SearchTypeSelector.tsx @@ -1,7 +1,7 @@ import { BasicClickable } from "@/components/BasicClickable"; import { ControlledPopup, DefaultDropdownElement } from "@/components/Dropdown"; import { useState } from "react"; -import { FiCpu, FiFilter, FiSearch } from "react-icons/fi"; +import { FiCpu, FiSearch } from "react-icons/fi"; export const QA = "Question Answering"; export const SEARCH = "Search Only"; diff --git a/web/src/app/chat/modifiers/SelectedDocuments.tsx b/web/src/app/chat/modifiers/SelectedDocuments.tsx index be3b81f37f1..fbae7029ce4 100644 --- a/web/src/app/chat/modifiers/SelectedDocuments.tsx +++ b/web/src/app/chat/modifiers/SelectedDocuments.tsx @@ -1,7 +1,6 @@ import { BasicClickable } from "@/components/BasicClickable"; import { DanswerDocument } from "@/lib/search/interfaces"; -import { useState } from "react"; -import { FiBook, FiFilter } from "react-icons/fi"; +import { FiBook } from "react-icons/fi"; export function SelectedDocuments({ selectedDocuments, diff --git a/web/src/app/chat/page.tsx b/web/src/app/chat/page.tsx index 72a1cf1aa57..7894ce651fc 100644 --- a/web/src/app/chat/page.tsx +++ b/web/src/app/chat/page.tsx @@ -5,16 +5,14 @@ import { WelcomeModal } from "@/components/initialSetup/welcome/WelcomeModalWrap import { ChatProvider } from "@/components/context/ChatContext"; import { fetchChatData } from "@/lib/chat/fetchChatData"; import WrappedChat from "./WrappedChat"; -import { ProviderContextProvider } from "@/components/chat_search/ProviderContext"; -import { orderAssistantsForUser } from "@/lib/assistants/utils"; +import { cookies } from "next/headers"; -export default async function Page({ - searchParams, -}: { - searchParams: { [key: string]: string }; +export default async function Page(props: { + searchParams: Promise<{ [key: string]: string }>; }) { + const searchParams = await props.searchParams; noStore(); - + const requestCookies = await cookies(); const data = await fetchChatData(searchParams); if ("redirect" in data) { @@ -26,7 +24,6 @@ export default async function Page({ chatSessions, availableSources, documentSets, - assistants, tags, llmProviders, folders, @@ -40,14 +37,14 @@ export default async function Page({ return ( <> - {shouldShowWelcomeModal && } - + {shouldShowWelcomeModal && ( + + )}

)} @@ -180,7 +180,9 @@ export function ChatSessionDisplay({ This chat will expire{" "} {daysUntilExpiration < 1 ? "today" - : `in ${daysUntilExpiration} day${daysUntilExpiration !== 1 ? "s" : ""}`} + : `in ${daysUntilExpiration} day${ + daysUntilExpiration !== 1 ? "s" : "" + }`}

} > diff --git a/web/src/app/chat/sessionSidebar/HistorySidebar.tsx b/web/src/app/chat/sessionSidebar/HistorySidebar.tsx index beb45e1775e..9dad5c05079 100644 --- a/web/src/app/chat/sessionSidebar/HistorySidebar.tsx +++ b/web/src/app/chat/sessionSidebar/HistorySidebar.tsx @@ -1,7 +1,7 @@ "use client"; import { FiEdit, FiFolderPlus } from "react-icons/fi"; -import { ForwardedRef, forwardRef, useContext, useState } from "react"; +import React, { ForwardedRef, forwardRef, useContext, useState } from "react"; import Link from "next/link"; import { useRouter } from "next/navigation"; import { ChatSession } from "../interfaces"; @@ -11,7 +11,6 @@ import { createFolder } from "../folders/FolderManagement"; import { usePopup } from "@/components/admin/connectors/Popup"; import { SettingsContext } from "@/components/settings/SettingsProvider"; -import React from "react"; import { AssistantsIconSkeleton, ClosedBookIcon, @@ -95,7 +94,7 @@ export const HistorySidebar = forwardRef( bg-background-sidebar w-full border-r - border-border + border-sidebar-border flex flex-col relative h-screen diff --git a/web/src/app/chat/sessionSidebar/PagesTab.tsx b/web/src/app/chat/sessionSidebar/PagesTab.tsx index e6612a96c8d..be8de43ee23 100644 --- a/web/src/app/chat/sessionSidebar/PagesTab.tsx +++ b/web/src/app/chat/sessionSidebar/PagesTab.tsx @@ -7,7 +7,7 @@ import { Folder } from "../folders/interfaces"; import { CHAT_SESSION_ID_KEY, FOLDER_ID_KEY } from "@/lib/drag/constants"; import { usePopup } from "@/components/admin/connectors/Popup"; import { useRouter } from "next/navigation"; -import { useEffect, useState } from "react"; +import { useState } from "react"; import { pageType } from "./types"; export function PagesTab({ @@ -23,7 +23,7 @@ export function PagesTab({ }: { page: pageType; existingChats?: ChatSession[]; - currentChatId?: number; + currentChatId?: string; folders?: Folder[]; openedFolders?: { [key: number]: boolean }; closeSidebar?: () => void; @@ -44,10 +44,7 @@ export function PagesTab({ ) => { event.preventDefault(); setIsDragOver(false); // Reset drag over state on drop - const chatSessionId = parseInt( - event.dataTransfer.getData(CHAT_SESSION_ID_KEY), - 10 - ); + const chatSessionId = event.dataTransfer.getData(CHAT_SESSION_ID_KEY); const folderId = event.dataTransfer.getData(FOLDER_ID_KEY); if (folderId) { diff --git a/web/src/app/chat/shared/[chatId]/SharedChatDisplay.tsx b/web/src/app/chat/shared/[chatId]/SharedChatDisplay.tsx index 489163aa3c8..f0acaa0ace3 100644 --- a/web/src/app/chat/shared/[chatId]/SharedChatDisplay.tsx +++ b/web/src/app/chat/shared/[chatId]/SharedChatDisplay.tsx @@ -9,12 +9,14 @@ import { processRawChatHistory, } from "../../lib"; import { AIMessage, HumanMessage } from "../../message/Messages"; -import { Button, Callout, Divider } from "@tremor/react"; +import { Callout } from "@/components/ui/callout"; +import { Separator } from "@/components/ui/separator"; import { useRouter } from "next/navigation"; -import { Persona } from "@/app/admin/assistants/interfaces"; import { useContext, useEffect, useState } from "react"; import { SettingsContext } from "@/components/settings/SettingsProvider"; import { DanswerInitializingLoader } from "@/components/DanswerInitializingLoader"; +import { Persona } from "@/app/admin/assistants/interfaces"; +import { Button } from "@/components/ui/button"; function BackToDanswerButton() { const router = useRouter(); @@ -27,17 +29,16 @@ function BackToDanswerButton() { Back to {enterpriseSettings?.application_name || "Danswer Chat"} - pr ); } export function SharedChatDisplay({ chatSession, - availableAssistants, + persona, }: { chatSession: BackendChatSession | null; - availableAssistants: Persona[]; + persona: Persona; }) { const [isReady, setIsReady] = useState(false); useEffect(() => { @@ -48,7 +49,7 @@ export function SharedChatDisplay({ return (
- + Did not find a shared chat with the specified ID.
@@ -56,9 +57,6 @@ export function SharedChatDisplay({
); } - const currentPersona = availableAssistants.find( - (persona) => persona.id === chatSession.persona_id - ); const messages = buildLatestMessageChain( processRawChatHistory(chatSession.messages) @@ -78,7 +76,7 @@ export function SharedChatDisplay({ {humanReadableFormat(chatSession.time_created)}

- + {isReady ? (
@@ -96,12 +94,11 @@ export function SharedChatDisplay({ return ( diff --git a/web/src/app/chat/shared/[chatId]/page.tsx b/web/src/app/chat/shared/[chatId]/page.tsx index 9e8ce58432e..11e624ff687 100644 --- a/web/src/app/chat/shared/[chatId]/page.tsx +++ b/web/src/app/chat/shared/[chatId]/page.tsx @@ -9,8 +9,12 @@ import { redirect } from "next/navigation"; import { BackendChatSession } from "../../interfaces"; import { SharedChatDisplay } from "./SharedChatDisplay"; import { Persona } from "@/app/admin/assistants/interfaces"; -import { fetchAssistantsSS } from "@/lib/assistants/fetchAssistantsSS"; +import { + FetchAssistantsResponse, + fetchAssistantsSS, +} from "@/lib/assistants/fetchAssistantsSS"; import FunctionalHeader from "@/components/chat_search/Header"; +import { defaultPersona } from "@/app/admin/assistants/lib"; async function getSharedChat(chatId: string) { const response = await fetchSS( @@ -22,7 +26,10 @@ async function getSharedChat(chatId: string) { return null; } -export default async function Page({ params }: { params: { chatId: string } }) { +export default async function Page(props: { + params: Promise<{ chatId: string }>; +}) { + const params = await props.params; const tasks = [ getAuthTypeMetadataSS(), getCurrentUserSS(), @@ -43,7 +50,8 @@ export default async function Page({ params }: { params: { chatId: string } }) { const authTypeMetadata = results[0] as AuthTypeMetadata | null; const user = results[1] as User | null; const chatSession = results[2] as BackendChatSession | null; - const [availableAssistants, _] = results[3] as [Persona[], string | null]; + const assistantsResponse = results[3] as FetchAssistantsResponse | null; + const [availableAssistants, error] = assistantsResponse ?? [[], null]; const authDisabled = authTypeMetadata?.authType === "disabled"; if (!authDisabled && !user) { @@ -53,18 +61,21 @@ export default async function Page({ params }: { params: { chatId: string } }) { if (user && !user.is_verified && authTypeMetadata?.requiresVerification) { return redirect("/auth/waiting-on-verification"); } + // prettier-ignore + const persona: Persona = + chatSession?.persona_id && availableAssistants?.length + ? (availableAssistants.find((p) => p.id === chatSession.persona_id) ?? + defaultPersona) + : (availableAssistants?.[0] ?? defaultPersona); return (
- +
- +
); diff --git a/web/src/app/chat/shared_chat_search/FixedLogo.tsx b/web/src/app/chat/shared_chat_search/FixedLogo.tsx index 2961db9df5b..921220adbd7 100644 --- a/web/src/app/chat/shared_chat_search/FixedLogo.tsx +++ b/web/src/app/chat/shared_chat_search/FixedLogo.tsx @@ -8,7 +8,12 @@ import Link from "next/link"; import { useContext } from "react"; import { FiSidebar } from "react-icons/fi"; -export default function FixedLogo() { +export default function FixedLogo({ + // Whether the sidebar is toggled or not + backgroundToggled, +}: { + backgroundToggled?: boolean; +}) { const combinedSettings = useContext(SettingsContext); const settings = combinedSettings?.settings; const enterpriseSettings = combinedSettings?.enterpriseSettings; @@ -28,13 +33,17 @@ export default function FixedLogo() {
{enterpriseSettings && enterpriseSettings.application_name ? (
- {enterpriseSettings.application_name} + + {enterpriseSettings.application_name} + {!NEXT_PUBLIC_DO_NOT_USE_TOGGLE_OFF_DANSWER_POWERED && (

Powered by Danswer

)}
) : ( - Danswer + + Danswer + )}
diff --git a/web/src/app/chat/shared_chat_search/FunctionalWrapper.tsx b/web/src/app/chat/shared_chat_search/FunctionalWrapper.tsx index 4f8d31d39ee..bfa8ef934f3 100644 --- a/web/src/app/chat/shared_chat_search/FunctionalWrapper.tsx +++ b/web/src/app/chat/shared_chat_search/FunctionalWrapper.tsx @@ -36,7 +36,7 @@ const ToggleSwitch = () => { }; return ( -
+
-
+ <>

{isUpdate ? "Update API Key" : "Create a new API Key"} -
- -

- + {isUpdate ? "Update!" : "Create!"} @@ -126,7 +118,7 @@ export const DanswerApiKeyForm = ({ )} -
+ ); }; diff --git a/web/src/app/ee/admin/api-key/page.tsx b/web/src/app/ee/admin/api-key/page.tsx index 4c5447932c5..1420a1c518b 100644 --- a/web/src/app/ee/admin/api-key/page.tsx +++ b/web/src/app/ee/admin/api-key/page.tsx @@ -6,20 +6,21 @@ import { KeyIcon } from "@/components/icons/icons"; import { errorHandlingFetcher } from "@/lib/fetcher"; import { ErrorCallout } from "@/components/ErrorCallout"; import useSWR, { mutate } from "swr"; +import { Button } from "@/components/ui/button"; +import { Separator } from "@/components/ui/separator"; import { - Button, - Divider, TableBody, TableCell, TableHead, - TableHeaderCell, + TableHeader, TableRow, - Text, - Title, -} from "@tremor/react"; + Table, +} from "@/components/ui/table"; + +import Text from "@/components/ui/text"; +import Title from "@/components/ui/title"; import { usePopup } from "@/components/admin/connectors/Popup"; import { useState } from "react"; -import { Table } from "@tremor/react"; import { DeleteButton } from "@/components/DeleteButton"; import { FiCopy, FiEdit2, FiRefreshCw, FiX } from "react-icons/fi"; import { Modal } from "@/components/Modal"; @@ -116,8 +117,8 @@ function Main() { const newApiKeyButton = (
- {shareLink ? ( -
- - This chat session is currently shared. Anyone at your - organization can view the message history using the following - link: - - -
- - + {popup} + + <> +
+

+ Share link to Chat +

+
+ +
+ {shareLink ? ( +
+ + This chat session is currently shared. Anyone at your + organization can view the message history using the following + link: + + + + + + + + Click the button below to make the chat private again. + + +
- - - - - Click the button below to make the chat private again. - - - +
+
+ )} +
+ + +
+ + Generate a link to clone this chat session with the current query. + This allows others to start a new chat with the same initial + message and settings. + +
+
+ -
- ) : ( -
- - Ensure that all content in the chat is safe to share with the - whole organization. The content of the retrieved documents will - not be visible, but the names of cited documents as well as the - AI and human messages will be visible. - - - -
- )} -
System default{" "} {defaultProvider?.default_model_name && - `(${getDisplayNameForModel(defaultProvider?.default_model_name)})`} + `(${getDisplayNameForModel( + defaultProvider?.default_model_name + )})`}
- + - Name - API Key - Role - Regenerate - Delete + Name + API Key + Role + Regenerate + Delete - + {apiKeys.map((apiKey) => ( diff --git a/web/src/app/ee/admin/cloud-settings/BillingInformationPage.tsx b/web/src/app/ee/admin/cloud-settings/BillingInformationPage.tsx new file mode 100644 index 00000000000..de2e4142947 --- /dev/null +++ b/web/src/app/ee/admin/cloud-settings/BillingInformationPage.tsx @@ -0,0 +1,222 @@ +"use client"; + +import { CreditCard, ArrowFatUp } from "@phosphor-icons/react"; +import { useState } from "react"; +import { useRouter } from "next/navigation"; +import { loadStripe } from "@stripe/stripe-js"; +import { usePopup } from "@/components/admin/connectors/Popup"; +import { SettingsIcon } from "@/components/icons/icons"; +import { + updateSubscriptionQuantity, + fetchCustomerPortal, + statusToDisplay, + useBillingInformation, +} from "./utils"; +import { useEffect } from "react"; + +export default function BillingInformationPage() { + const router = useRouter(); + const { popup, setPopup } = usePopup(); + const stripePromise = loadStripe( + process.env.NEXT_PUBLIC_STRIPE_PUBLISHABLE_KEY! + ); + + const { + data: billingInformation, + error, + isLoading, + refreshBillingInformation, + } = useBillingInformation(); + + const [seats, setSeats] = useState(1); + + useEffect(() => { + if (billingInformation?.seats) { + setSeats(billingInformation.seats); + } + }, [billingInformation?.seats]); + + if (error) { + console.error("Failed to fetch billing information:", error); + } + useEffect(() => { + const url = new URL(window.location.href); + if (url.searchParams.has("session_id")) { + setPopup({ + message: + "Congratulations! Your subscription has been updated successfully.", + type: "success", + }); + // Remove the session_id from the URL + url.searchParams.delete("session_id"); + window.history.replaceState({}, "", url.toString()); + // You might want to refresh the billing information here + // by calling an API endpoint to get the latest data + } + }, [setPopup]); + + if (isLoading) { + return
Loading...
; + } + + const handleManageSubscription = async () => { + try { + const response = await fetchCustomerPortal(); + + if (!response.ok) { + const errorData = await response.json(); + throw new Error( + `Failed to create customer portal session: ${errorData.message || response.statusText}` + ); + } + + const { url } = await response.json(); + + if (!url) { + throw new Error("No portal URL returned from the server"); + } + + router.push(url); + } catch (error) { + console.error("Error creating customer portal session:", error); + setPopup({ + message: "Error creating customer portal session", + type: "error", + }); + } + }; + if (!billingInformation) { + return
Loading...
; + } + + return ( +
+
+ {popup} + +

+ + Billing Information +

+ +
+
+
+
+

Seats

+

+ Number of licensed users +

+
+

+ {billingInformation.seats} +

+
+
+ +
+
+
+

+ Subscription Status +

+

+ Current state of your subscription +

+
+

+ {statusToDisplay(billingInformation.subscription_status)} +

+
+
+ +
+
+
+

+ Billing Start +

+

+ Start date of current billing cycle +

+
+

+ {new Date( + billingInformation.billing_start + ).toLocaleDateString()} +

+
+
+ +
+
+
+

Billing End

+

+ End date of current billing cycle +

+
+

+ {new Date(billingInformation.billing_end).toLocaleDateString()} +

+
+
+
+ + {!billingInformation.payment_method_enabled && ( +
+

Notice:

+

+ You'll need to add a payment method before your trial ends to + continue using the service. +

+
+ )} + + {billingInformation.subscription_status === "trialing" ? ( +
+

+ No cap on users during trial +

+
+ ) : ( +
+
+

+ Current Seats: +

+

+ {billingInformation.seats} +

+
+

+ Seats automatically update based on adding, removing, or inviting + users. +

+
+ )} +
+ +
+
+
+

+ Manage Subscription +

+

+ View your plan, update payment, or change subscription +

+
+ +
+ +
+
+ ); +} diff --git a/web/src/app/ee/admin/cloud-settings/page.tsx b/web/src/app/ee/admin/cloud-settings/page.tsx new file mode 100644 index 00000000000..6566e069ba7 --- /dev/null +++ b/web/src/app/ee/admin/cloud-settings/page.tsx @@ -0,0 +1,23 @@ +import { AdminPageTitle } from "@/components/admin/Title"; +import BillingInformationPage from "./BillingInformationPage"; +import { FaCloud } from "react-icons/fa"; + +export interface BillingInformation { + seats: number; + subscription_status: string; + billing_start: Date; + billing_end: Date; + payment_method_enabled: boolean; +} + +export default function page() { + return ( +
+ } + /> + +
+ ); +} diff --git a/web/src/app/ee/admin/cloud-settings/utils.ts b/web/src/app/ee/admin/cloud-settings/utils.ts new file mode 100644 index 00000000000..1f2aaa8e8eb --- /dev/null +++ b/web/src/app/ee/admin/cloud-settings/utils.ts @@ -0,0 +1,46 @@ +import { BillingInformation } from "./page"; +import useSWR, { mutate } from "swr"; + +export const updateSubscriptionQuantity = async (seats: number) => { + return await fetch("/api/tenants/update-subscription-quantity", { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + body: JSON.stringify({ quantity: seats }), + }); +}; + +export const fetchCustomerPortal = async () => { + return await fetch("/api/tenants/create-customer-portal-session", { + method: "POST", + headers: { + "Content-Type": "application/json", + }, + }); +}; + +export const statusToDisplay = (status: string) => { + switch (status) { + case "trialing": + return "Trialing"; + case "active": + return "Active"; + case "canceled": + return "Canceled"; + default: + return "Unknown"; + } +}; + +export const useBillingInformation = () => { + const url = "/api/tenants/billing-information"; + const swrResponse = useSWR(url, (url: string) => + fetch(url).then((res) => res.json()) + ); + + return { + ...swrResponse, + refreshBillingInformation: () => mutate(url), + }; +}; diff --git a/web/src/app/ee/admin/groups/ConnectorEditor.tsx b/web/src/app/ee/admin/groups/ConnectorEditor.tsx index ab6bbb0bec9..2283b71fb63 100644 --- a/web/src/app/ee/admin/groups/ConnectorEditor.tsx +++ b/web/src/app/ee/admin/groups/ConnectorEditor.tsx @@ -19,7 +19,7 @@ export const ConnectorEditor = ({ .filter((ccPair) => !(ccPair.access_type === "public")) .map((ccPair) => { const ind = selectedCCPairIds.indexOf(ccPair.cc_pair_id); - let isSelected = ind !== -1; + const isSelected = ind !== -1; return (
void; @@ -35,21 +34,12 @@ export const UserGroupCreationForm = ({ return ( -
+ <>

{isUpdate ? "Update a User Group" : "Create a new User Group"} -
- -

- + {({ isSubmitting, values, setFieldValue }) => (
-
+
- +

Select which private connectors this group has access to: @@ -116,7 +106,7 @@ export const UserGroupCreationForm = ({ } /> - +

Select which Users should be a part of this Group. @@ -138,8 +128,8 @@ export const UserGroupCreationForm = ({
+ ); }; diff --git a/web/src/app/ee/admin/groups/UserGroupsTable.tsx b/web/src/app/ee/admin/groups/UserGroupsTable.tsx index 49a84104a6c..e1197031a6d 100644 --- a/web/src/app/ee/admin/groups/UserGroupsTable.tsx +++ b/web/src/app/ee/admin/groups/UserGroupsTable.tsx @@ -4,10 +4,9 @@ import { Table, TableHead, TableRow, - TableHeaderCell, TableBody, TableCell, -} from "@tremor/react"; +} from "@/components/ui/table"; import { PopupSpec } from "@/components/admin/connectors/Popup"; import { LoadingAnimation } from "@/components/Loading"; import { BasicTable } from "@/components/admin/connectors/BasicTable"; @@ -19,6 +18,7 @@ import { FiEdit2, FiUser } from "react-icons/fi"; import { User, UserGroup } from "@/lib/types"; import Link from "next/link"; import { DeleteButton } from "@/components/DeleteButton"; +import { TableHeader } from "@/components/ui/table"; const MAX_USERS_TO_DISPLAY = 6; @@ -57,15 +57,15 @@ export const UserGroupsTable = ({ return (

- + - Name - Connectors - Users - Status - Delete + Name + Connectors + Users + Status + Delete - + {userGroups .filter((userGroup) => !userGroup.is_up_for_deletion) diff --git a/web/src/app/ee/admin/groups/[groupId]/AddConnectorForm.tsx b/web/src/app/ee/admin/groups/[groupId]/AddConnectorForm.tsx index 8b5166ed851..dee6d5ee17b 100644 --- a/web/src/app/ee/admin/groups/[groupId]/AddConnectorForm.tsx +++ b/web/src/app/ee/admin/groups/[groupId]/AddConnectorForm.tsx @@ -1,7 +1,6 @@ import { Button } from "@/components/Button"; import { SearchMultiSelectDropdown } from "@/components/Dropdown"; import { Modal } from "@/components/Modal"; -import { UsersIcon } from "@/components/icons/icons"; import { useState } from "react"; import { FiPlus, FiX } from "react-icons/fi"; import { updateUserGroup } from "./lib"; diff --git a/web/src/app/ee/admin/groups/[groupId]/GroupDisplay.tsx b/web/src/app/ee/admin/groups/[groupId]/GroupDisplay.tsx index b49ffdce16e..29b325a42e2 100644 --- a/web/src/app/ee/admin/groups/[groupId]/GroupDisplay.tsx +++ b/web/src/app/ee/admin/groups/[groupId]/GroupDisplay.tsx @@ -14,19 +14,24 @@ import { USER_ROLE_LABELS, } from "@/lib/types"; import { AddConnectorForm } from "./AddConnectorForm"; +import { Separator } from "@/components/ui/separator"; +import { + Select, + SelectContent, + SelectItem, + SelectTrigger, + SelectValue, +} from "@/components/ui/select"; +import Text from "@/components/ui/text"; import { Table, - TableHead, - TableRow, - TableHeaderCell, TableBody, TableCell, - Divider, - Button, - Text, - Select, - SelectItem, -} from "@tremor/react"; + TableHead, + TableHeader, + TableRow, +} from "@/components/ui/table"; +import { Button } from "@/components/ui/button"; import { DeleteButton } from "@/components/DeleteButton"; import { Bubble } from "@/components/Bubble"; import { BookmarkIcon, RobotIcon } from "@/components/icons/icons"; @@ -96,13 +101,19 @@ const UserRoleDropdown = ({ if (isEditable) { return (
+ Select group
); @@ -151,7 +162,7 @@ export const GroupDisplay = ({ )} - +

Users

@@ -161,15 +172,15 @@ export const GroupDisplay = ({ {userGroup.users.length > 0 ? ( <>
- + - Email - Role - + Email + Role +
Remove User
-
+
- + {userGroup.users.map((groupMember) => { return ( @@ -247,8 +258,8 @@ export const GroupDisplay = ({
- + - Connector - + Connector +
Remove Connector
-
+
- + {userGroup.cc_pairs.map((ccPair) => { return ( @@ -347,8 +358,8 @@ export const GroupDisplay = ({ )} {data.length > 0 && (
- {isAdmin && } void; + value: DateRange; + onValueChange: (value: DateRange) => void; }) { + const [isOpen, setIsOpen] = useState(false); + + const presets = [ + { + label: "Last 30 days", + value: { + from: getXDaysAgo(30), + to: getXDaysAgo(0), + }, + }, + { + label: "Today", + value: { + from: getXDaysAgo(1), + to: getXDaysAgo(0), + }, + }, + ]; + return ( -
- Date Range - - - Last 30 days - - - Today - - +
+ + + + + + { + if (range?.from && range?.to) { + onValueChange({ from: range.from, to: range.to }); + } + }} + numberOfMonths={2} + /> +
+ {presets.map((preset) => ( + + ))} +
+
+
); } diff --git a/web/src/app/ee/admin/performance/custom-analytics/CustomAnalyticsUpdateForm.tsx b/web/src/app/ee/admin/performance/custom-analytics/CustomAnalyticsUpdateForm.tsx index c542a69eba1..debd1504462 100644 --- a/web/src/app/ee/admin/performance/custom-analytics/CustomAnalyticsUpdateForm.tsx +++ b/web/src/app/ee/admin/performance/custom-analytics/CustomAnalyticsUpdateForm.tsx @@ -3,7 +3,9 @@ import { Label, SubLabel } from "@/components/admin/connectors/Field"; import { usePopup } from "@/components/admin/connectors/Popup"; import { SettingsContext } from "@/components/settings/SettingsProvider"; -import { Button, Callout, Text } from "@tremor/react"; +import { Button } from "@/components/ui/button"; +import { Callout } from "@/components/ui/callout"; +import Text from "@/components/ui/text"; import { useContext, useState } from "react"; export function CustomAnalyticsUpdateForm() { @@ -17,7 +19,7 @@ export function CustomAnalyticsUpdateForm() { const { popup, setPopup } = usePopup(); if (!settings) { - return ; + return ; } return ( diff --git a/web/src/app/ee/admin/performance/custom-analytics/page.tsx b/web/src/app/ee/admin/performance/custom-analytics/page.tsx index b0fc619f24f..d4e1258ea0f 100644 --- a/web/src/app/ee/admin/performance/custom-analytics/page.tsx +++ b/web/src/app/ee/admin/performance/custom-analytics/page.tsx @@ -1,7 +1,8 @@ import { AdminPageTitle } from "@/components/admin/Title"; import { CUSTOM_ANALYTICS_ENABLED } from "@/lib/constants"; -import { Callout, Text } from "@tremor/react"; +import { Callout } from "@/components/ui/callout"; import { FiBarChart2 } from "react-icons/fi"; +import Text from "@/components/ui/text"; import { CustomAnalyticsUpdateForm } from "./CustomAnalyticsUpdateForm"; function Main() { @@ -9,7 +10,7 @@ function Main() { return (
- + To set up custom analytics scripts, please work with the team who setup Danswer in your organization to set the{" "} CUSTOM_ANALYTICS_SECRET_KEY environment variable. diff --git a/web/src/app/ee/admin/performance/lib.ts b/web/src/app/ee/admin/performance/lib.ts index a603166139d..d9068b466ff 100644 --- a/web/src/app/ee/admin/performance/lib.ts +++ b/web/src/app/ee/admin/performance/lib.ts @@ -9,13 +9,14 @@ import { import { useState } from "react"; import { buildApiPath } from "@/lib/urlBuilder"; import { Feedback } from "@/lib/types"; -import { DateRangePickerValue } from "@tremor/react"; + import { convertDateToEndOfDay, convertDateToStartOfDay, getXDaysAgo, } from "./dateUtils"; import { THIRTY_DAYS } from "./DateRangeSelector"; +import { DateRangePickerValue } from "@/app/ee/admin/performance/DateRangeSelector"; export const useTimeRange = () => { return useState({ diff --git a/web/src/app/ee/admin/performance/query-history/FeedbackBadge.tsx b/web/src/app/ee/admin/performance/query-history/FeedbackBadge.tsx index ebe75e7a02b..b4b778b06e9 100644 --- a/web/src/app/ee/admin/performance/query-history/FeedbackBadge.tsx +++ b/web/src/app/ee/admin/performance/query-history/FeedbackBadge.tsx @@ -1,5 +1,5 @@ +import { Badge } from "@/components/ui/badge"; import { Feedback } from "@/lib/types"; -import { Badge } from "@tremor/react"; export function FeedbackBadge({ feedback, @@ -10,28 +10,28 @@ export function FeedbackBadge({ switch (feedback) { case "like": feedbackBadge = ( - + Like ); break; case "dislike": feedbackBadge = ( - + Dislike ); break; case "mixed": feedbackBadge = ( - + Mixed ); break; default: feedbackBadge = ( - + N/A ); diff --git a/web/src/app/ee/admin/performance/query-history/QueryHistoryTable.tsx b/web/src/app/ee/admin/performance/query-history/QueryHistoryTable.tsx index 0ee3522b4fa..b46d77c27d5 100644 --- a/web/src/app/ee/admin/performance/query-history/QueryHistoryTable.tsx +++ b/web/src/app/ee/admin/performance/query-history/QueryHistoryTable.tsx @@ -1,17 +1,22 @@ import { useQueryHistory } from "../lib"; - +import { Separator } from "@/components/ui/separator"; import { - Card, Table, TableHead, TableRow, - TableHeaderCell, TableBody, TableCell, - Text, -} from "@tremor/react"; -import { Divider } from "@tremor/react"; -import { Select, SelectItem } from "@tremor/react"; + TableHeader, +} from "@/components/ui/table"; +import Text from "@/components/ui/text"; + +import { + Select, + SelectItem, + SelectValue, + SelectTrigger, + SelectContent, +} from "@/components/ui/select"; import { ThreeDotsLoader } from "@/components/Loading"; import { ChatSessionMinimal } from "../usage/types"; import { timestampToReadableDate } from "@/lib/dateUtils"; @@ -23,6 +28,7 @@ import { PageSelector } from "@/components/PageSelector"; import Link from "next/link"; import { FeedbackBadge } from "./FeedbackBadge"; import { DownloadAsCSV } from "./DownloadAsCSV"; +import CardSection from "@/components/admin/CardSection"; const NUM_IN_PAGE = 20; @@ -79,17 +85,30 @@ function SelectFeedbackType({
@@ -108,7 +127,7 @@ export function QueryHistoryTable() { const [page, setPage] = useState(1); return ( - + {chatSessionData ? ( <>
@@ -120,24 +139,31 @@ export function QueryHistoryTable() { { + if (value) { + setTimeRange({ + ...value, + selectValue: timeRange.selectValue, + }); + } + }} />
- +
- + - First User Message - First AI Response - Feedback - User - Persona - Date + First User Message + First AI Response + Feedback + User + Persona + Date - + {chatSessionData .slice(NUM_IN_PAGE * (page - 1), NUM_IN_PAGE * page) @@ -172,6 +198,6 @@ export function QueryHistoryTable() { )} - + ); } diff --git a/web/src/app/ee/admin/performance/query-history/[id]/page.tsx b/web/src/app/ee/admin/performance/query-history/[id]/page.tsx index f96d070aefe..ee5a03f28f6 100644 --- a/web/src/app/ee/admin/performance/query-history/[id]/page.tsx +++ b/web/src/app/ee/admin/performance/query-history/[id]/page.tsx @@ -1,6 +1,9 @@ "use client"; +import { use } from "react"; -import { Bold, Text, Card, Title, Divider } from "@tremor/react"; +import Text from "@/components/ui/text"; +import Title from "@/components/ui/title"; +import { Separator } from "@/components/ui/separator"; import { ChatSessionSnapshot, MessageSnapshot } from "../../usage/types"; import { FiBook } from "react-icons/fi"; import { timestampToReadableDate } from "@/lib/dateUtils"; @@ -10,17 +13,18 @@ import { errorHandlingFetcher } from "@/lib/fetcher"; import useSWR from "swr"; import { ErrorCallout } from "@/components/ErrorCallout"; import { ThreeDotsLoader } from "@/components/Loading"; +import CardSection from "@/components/admin/CardSection"; function MessageDisplay({ message }: { message: MessageSnapshot }) { return (
- +

{message.message_type === "user" ? "User" : "AI"} - +

{message.message} {message.documents.length > 0 && (
- Reference Documents +

Reference Documents

{message.documents.slice(0, 5).map((document) => { return ( @@ -30,7 +34,12 @@ function MessageDisplay({ message }: { message: MessageSnapshot }) { } /> {document.link ? ( - + {document.semantic_identifier} ) : ( @@ -43,19 +52,20 @@ function MessageDisplay({ message }: { message: MessageSnapshot }) { )} {message.feedback_type && (
- Feedback +

Feedback

{message.feedback_text && {message.feedback_text}}
)} - +
); } -export default function QueryPage({ params }: { params: { id: string } }) { +export default function QueryPage(props: { params: Promise<{ id: string }> }) { + const params = use(props.params); const { data: chatSessionSnapshot, isLoading, @@ -82,7 +92,7 @@ export default function QueryPage({ params }: { params: { id: string } }) {
- + Chat Session Details @@ -92,7 +102,7 @@ export default function QueryPage({ params }: { params: { id: string } }) { {chatSessionSnapshot.flow_type} - +
{chatSessionSnapshot.messages.map((message) => { @@ -101,7 +111,7 @@ export default function QueryPage({ params }: { params: { id: string } }) { ); })}
-
+
); } diff --git a/web/src/app/ee/admin/performance/usage/DanswerBotChart.tsx b/web/src/app/ee/admin/performance/usage/DanswerBotChart.tsx index 9d15abcf545..3e4966b5454 100644 --- a/web/src/app/ee/admin/performance/usage/DanswerBotChart.tsx +++ b/web/src/app/ee/admin/performance/usage/DanswerBotChart.tsx @@ -1,12 +1,10 @@ import { ThreeDotsLoader } from "@/components/Loading"; import { getDatesList, useDanswerBotAnalytics } from "../lib"; -import { - AreaChart, - Card, - Title, - Text, - DateRangePickerValue, -} from "@tremor/react"; +import { DateRangePickerValue } from "@/app/ee/admin/performance/DateRangeSelector"; +import Text from "@/components/ui/text"; +import Title from "@/components/ui/title"; +import CardSection from "@/components/admin/CardSection"; +import { AreaChartDisplay } from "@/components/ui/areaChart"; export function DanswerBotChart({ timeRange, @@ -45,8 +43,8 @@ export function DanswerBotChart({ ); chart = ( - { const danswerBotAnalyticsForDate = dateToDanswerBotAnalytics.get(dateStr); @@ -69,10 +67,10 @@ export function DanswerBotChart({ } return ( - + Slack Bot Total Queries vs Auto Resolved {chart} - + ); } diff --git a/web/src/app/ee/admin/performance/usage/FeedbackChart.tsx b/web/src/app/ee/admin/performance/usage/FeedbackChart.tsx index a466d866e06..c254f1cec5f 100644 --- a/web/src/app/ee/admin/performance/usage/FeedbackChart.tsx +++ b/web/src/app/ee/admin/performance/usage/FeedbackChart.tsx @@ -1,12 +1,11 @@ import { ThreeDotsLoader } from "@/components/Loading"; import { getDatesList, useQueryAnalytics } from "../lib"; -import { - AreaChart, - Card, - Title, - Text, - DateRangePickerValue, -} from "@tremor/react"; +import Text from "@/components/ui/text"; +import Title from "@/components/ui/title"; + +import { DateRangePickerValue } from "@/app/ee/admin/performance/DateRangeSelector"; +import CardSection from "@/components/admin/CardSection"; +import { AreaChartDisplay } from "@/components/ui/areaChart"; export function FeedbackChart({ timeRange, @@ -44,8 +43,8 @@ export function FeedbackChart({ ); chart = ( - { const queryAnalyticsForDate = dateToQueryAnalytics.get(dateStr); return { @@ -66,10 +65,10 @@ export function FeedbackChart({ } return ( - + Feedback Thumbs Up / Thumbs Down over time {chart} - + ); } diff --git a/web/src/app/ee/admin/performance/usage/QueryPerformanceChart.tsx b/web/src/app/ee/admin/performance/usage/QueryPerformanceChart.tsx index b16e80bf698..4a3aaaf168c 100644 --- a/web/src/app/ee/admin/performance/usage/QueryPerformanceChart.tsx +++ b/web/src/app/ee/admin/performance/usage/QueryPerformanceChart.tsx @@ -1,14 +1,12 @@ "use client"; -import { - Card, - AreaChart, - Title, - Text, - DateRangePickerValue, -} from "@tremor/react"; +import { DateRangePickerValue } from "@/app/ee/admin/performance/DateRangeSelector"; import { getDatesList, useQueryAnalytics, useUserAnalytics } from "../lib"; import { ThreeDotsLoader } from "@/components/Loading"; +import { AreaChartDisplay } from "@/components/ui/areaChart"; +import Title from "@/components/ui/title"; +import Text from "@/components/ui/text"; +import CardSection from "@/components/admin/CardSection"; export function QueryPerformanceChart({ timeRange, @@ -62,8 +60,8 @@ export function QueryPerformanceChart({ ); chart = ( - { const queryAnalyticsForDate = dateToQueryAnalytics.get(dateStr); const userAnalyticsForDate = dateToUserAnalytics.get(dateStr); @@ -85,10 +83,10 @@ export function QueryPerformanceChart({ } return ( - + Usage Usage over time {chart} - + ); } diff --git a/web/src/app/ee/admin/performance/usage/UsageReports.tsx b/web/src/app/ee/admin/performance/usage/UsageReports.tsx index 9dbeb6ca2f3..539ff6ee2d7 100644 --- a/web/src/app/ee/admin/performance/usage/UsageReports.tsx +++ b/web/src/app/ee/admin/performance/usage/UsageReports.tsx @@ -1,24 +1,21 @@ "use client"; +import { format } from "date-fns"; import { errorHandlingFetcher } from "@/lib/fetcher"; import { FiDownload, FiDownloadCloud } from "react-icons/fi"; import { - DateRangePicker, - DateRangePickerItem, - DateRangePickerValue, - Divider, Table, TableBody, TableCell, TableHead, - TableHeaderCell, + TableHeader, TableRow, - Text, - Title, -} from "@tremor/react"; +} from "@/components/ui/table"; +import Text from "@/components/ui/text"; +import Title from "@/components/ui/title"; +import { Button } from "@/components/ui/button"; import useSWR from "swr"; -import { Button } from "@tremor/react"; import { useState } from "react"; import { UsageReport } from "./types"; import { ThreeDotsLoader } from "@/components/Loading"; @@ -26,6 +23,16 @@ import Link from "next/link"; import { humanReadableFormat, humanReadableFormatWithTime } from "@/lib/time"; import { ErrorCallout } from "@/components/ErrorCallout"; import { PageSelector } from "@/components/PageSelector"; +import { Separator } from "@/components/ui/separator"; +import { DateRangePickerValue } from "../DateRangeSelector"; +import { + Popover, + PopoverContent, + PopoverTrigger, +} from "@/components/ui/popover"; +import { CalendarIcon } from "lucide-react"; +import { Calendar } from "@/components/ui/calendar"; +import { cn } from "@/lib/utils"; function GenerateReportInput() { const [dateRange, setDateRange] = useState( @@ -36,7 +43,7 @@ function GenerateReportInput() { const [errorOccurred, setErrorOccurred] = useState(null); const download = (bytes: Blob) => { - let elm = document.createElement("a"); + const elm = document.createElement("a"); elm.href = URL.createObjectURL(bytes); elm.setAttribute("download", "usage_reports.zip"); elm.click(); @@ -101,41 +108,109 @@ function GenerateReportInput() { Generate usage statistics for users in the workspace. - - - Last 7 days - - - Last 30 days - - - Last year - - - All time - - +
+ + + + + + + range?.from && + setDateRange({ + from: range.from, + to: range.to ?? range.from, + selectValue: "custom", + }) + } + numberOfMonths={2} + disabled={(date) => date > new Date()} + /> +
+ + + + +
+
+
+
- + - Report - Period - Generated By - Time Generated - Download + Report + Period + Generated By + Time Generated + Download - + {paginatedReports.map((r) => ( @@ -256,7 +331,7 @@ export default function UsageReports() { return (
- +
); diff --git a/web/src/app/ee/admin/performance/usage/page.tsx b/web/src/app/ee/admin/performance/usage/page.tsx index 4fd287eccc6..e1fffc323a2 100644 --- a/web/src/app/ee/admin/performance/usage/page.tsx +++ b/web/src/app/ee/admin/performance/usage/page.tsx @@ -4,30 +4,29 @@ import { DateRangeSelector } from "../DateRangeSelector"; import { DanswerBotChart } from "./DanswerBotChart"; import { FeedbackChart } from "./FeedbackChart"; import { QueryPerformanceChart } from "./QueryPerformanceChart"; -import { BarChartIcon } from "@/components/icons/icons"; import { useTimeRange } from "../lib"; import { AdminPageTitle } from "@/components/admin/Title"; import { FiActivity } from "react-icons/fi"; import UsageReports from "./UsageReports"; -import { Divider } from "@tremor/react"; +import { Separator } from "@/components/ui/separator"; export default function AnalyticsPage() { const [timeRange, setTimeRange] = useTimeRange(); return (
- {/* TODO: remove this `dark` once we have a mode selector */} } /> - - - + setTimeRange(value as any)} + /> - +
); diff --git a/web/src/app/ee/admin/standard-answer/StandardAnswerCreationForm.tsx b/web/src/app/ee/admin/standard-answer/StandardAnswerCreationForm.tsx index 15574701fb4..a1f42d7896d 100644 --- a/web/src/app/ee/admin/standard-answer/StandardAnswerCreationForm.tsx +++ b/web/src/app/ee/admin/standard-answer/StandardAnswerCreationForm.tsx @@ -2,7 +2,8 @@ import { usePopup } from "@/components/admin/connectors/Popup"; import { StandardAnswerCategory, StandardAnswer } from "@/lib/types"; -import { Button, Card } from "@tremor/react"; +import CardSection from "@/components/admin/CardSection"; +import { Button } from "@/components/ui/button"; import { Form, Formik } from "formik"; import { useRouter } from "next/navigation"; import * as Yup from "yup"; @@ -41,7 +42,7 @@ export const StandardAnswerCreationForm = ({ return (
- + {popup}
-
+
); }; diff --git a/web/src/app/ee/admin/standard-answer/[id]/page.tsx b/web/src/app/ee/admin/standard-answer/[id]/page.tsx index 6d949331b19..edacd785b31 100644 --- a/web/src/app/ee/admin/standard-answer/[id]/page.tsx +++ b/web/src/app/ee/admin/standard-answer/[id]/page.tsx @@ -3,11 +3,11 @@ import { StandardAnswerCreationForm } from "@/app/ee/admin/standard-answer/Stand import { fetchSS } from "@/lib/utilsSS"; import { ErrorCallout } from "@/components/ErrorCallout"; import { BackButton } from "@/components/BackButton"; -import { Text } from "@tremor/react"; import { ClipboardIcon } from "@/components/icons/icons"; import { StandardAnswer, StandardAnswerCategory } from "@/lib/types"; -async function Page({ params }: { params: { id: string } }) { +async function Page(props: { params: Promise<{ id: string }> }) { + const params = await props.params; const tasks = [ fetchSS("/manage/admin/standard-answer"), fetchSS(`/manage/admin/standard-answer/category`), diff --git a/web/src/app/ee/admin/standard-answer/new/page.tsx b/web/src/app/ee/admin/standard-answer/new/page.tsx index e671f5e1ae9..5a5cccb9c0a 100644 --- a/web/src/app/ee/admin/standard-answer/new/page.tsx +++ b/web/src/app/ee/admin/standard-answer/new/page.tsx @@ -3,7 +3,6 @@ import { StandardAnswerCreationForm } from "@/app/ee/admin/standard-answer/Stand import { fetchSS } from "@/lib/utilsSS"; import { ErrorCallout } from "@/components/ErrorCallout"; import { BackButton } from "@/components/BackButton"; -import { Text } from "@tremor/react"; import { ClipboardIcon } from "@/components/icons/icons"; import { StandardAnswerCategory } from "@/lib/types"; diff --git a/web/src/app/ee/admin/standard-answer/page.tsx b/web/src/app/ee/admin/standard-answer/page.tsx index 867770ede48..6bc921fdefa 100644 --- a/web/src/app/ee/admin/standard-answer/page.tsx +++ b/web/src/app/ee/admin/standard-answer/page.tsx @@ -6,27 +6,29 @@ import { PopupSpec, usePopup } from "@/components/admin/connectors/Popup"; import { useStandardAnswers, useStandardAnswerCategories } from "./hooks"; import { ThreeDotsLoader } from "@/components/Loading"; import { ErrorCallout } from "@/components/ErrorCallout"; -import { Button, Divider, Text } from "@tremor/react"; -import Link from "next/link"; -import { StandardAnswer, StandardAnswerCategory } from "@/lib/types"; -import { MagnifyingGlass } from "@phosphor-icons/react"; -import { useState } from "react"; +import { Button } from "@/components/ui/button"; +import { Separator } from "@/components/ui/separator"; import { Table, TableHead, TableRow, - TableHeaderCell, TableBody, TableCell, -} from "@tremor/react"; +} from "@/components/ui/table"; + +import Link from "next/link"; +import { StandardAnswer, StandardAnswerCategory } from "@/lib/types"; +import { MagnifyingGlass } from "@phosphor-icons/react"; +import { useState } from "react"; import ReactMarkdown from "react-markdown"; import remarkGfm from "remark-gfm"; import { deleteStandardAnswer } from "./lib"; import { FilterDropdown } from "@/components/search/filtering/FilterDropdown"; import { FiTag } from "react-icons/fi"; -import { SelectedBubble } from "@/components/search/filtering/Filters"; import { PageSelector } from "@/components/PageSelector"; import { CustomCheckbox } from "@/components/CustomCheckbox"; +import Text from "@/components/ui/text"; +import { TableHeader } from "@/components/ui/table"; const NUM_RESULTS_PER_PAGE = 10; @@ -289,16 +291,14 @@ const StandardAnswersTable = ({
-
- +
+ {columns.map((column) => ( - - {column.name} - + {column.name} ))} - + {paginatedStandardAnswers.length > 0 ? ( @@ -403,12 +403,12 @@ const Main = () => {
- - +
Uploaded Image: - + Uploaded Image
)} diff --git a/web/src/app/ee/admin/whitelabeling/WhitelabelingForm.tsx b/web/src/app/ee/admin/whitelabeling/WhitelabelingForm.tsx index 954d0ab8e49..475c689441a 100644 --- a/web/src/app/ee/admin/whitelabeling/WhitelabelingForm.tsx +++ b/web/src/app/ee/admin/whitelabeling/WhitelabelingForm.tsx @@ -12,10 +12,12 @@ import { SubLabel, TextFormField, } from "@/components/admin/connectors/Field"; -import { Button, Divider, Text } from "@tremor/react"; +import { Button } from "@/components/ui/button"; +import Text from "@/components/ui/text"; import { ImageUpload } from "./ImageUpload"; import { AdvancedOptionsToggle } from "@/components/AdvancedOptionsToggle"; import Link from "next/link"; +import { Separator } from "@/components/ui/separator"; export function WhitelabelingForm() { const router = useRouter(); @@ -151,8 +153,8 @@ export function WhitelabelingForm() { />