diff --git a/.github/workflows/backend-core-tests.yml b/.github/workflows/backend-core-tests.yml index ffb7021bc7e5..1cdbb52fd8fc 100644 --- a/.github/workflows/backend-core-tests.yml +++ b/.github/workflows/backend-core-tests.yml @@ -39,6 +39,5 @@ jobs: OPENAI_API_KEY: this-is-a-test-key run: | sudo apt-get update - sudo apt-get install -y libmagic-dev poppler-utils libreoffice tesseract-ocr pandoc cd core rye test -p quivr-core diff --git a/core/Dockerfile.test b/core/Dockerfile.test index 4ec16699fdfa..01f755efd090 100644 --- a/core/Dockerfile.test +++ b/core/Dockerfile.test @@ -4,25 +4,15 @@ FROM python:3.11.6-slim-bullseye # Install GEOS library, Rust, and other dependencies, then clean up RUN apt-get clean && apt-get update && apt-get install -y \ curl \ - gcc \ - autoconf \ - automake \ build-essential \ - # Additional dependencies for document handling - libmagic-dev \ - tesseract-ocr \ - poppler-utils \ - libreoffice \ - pandoc && \ - rm -rf /var/lib/apt/lists/* - + libgeos-dev \ + && apt-get clean && rm -rf /var/lib/apt/lists/* # Set the working directory WORKDIR /code -# Install Poetry -RUN curl -sSL https://install.python-poetry.org | POETRY_HOME=/opt/poetry python && \ +RUN curl -sSL https://install.python-poetry.org | python3 - && \ cd /usr/local/bin && \ - ln -s /opt/poetry/bin/poetry && \ + ln -s /root/.local/bin/poetry && \ poetry config virtualenvs.create false # Add Poetry to PATH diff --git a/core/README.md b/core/README.md index f7f137bee3ea..92ec8cf4e612 100644 --- a/core/README.md +++ b/core/README.md @@ -12,6 +12,18 @@ This project is licensed under the Apache 2.0 License pip install quivr-core ``` +### Use with [MegaParse](https://github.com/QuivrHQ/MegaParse) + +By **default** megaparse-core uses MegaParse to parse files, in order to use it we need to either : + +**Use Quivr hosted Megaparse API** +* Send email to **admin@quivr.app** with object : "[Megaparse] API Key Request" +* Add ```MEGAPARSE_API_KEY = md-...``` in your *.env* file + +**Use self hosted Megaparse API** +* Clone https://github.com/QuivrHQ/MegaParse +* Run the api with ```make dev``` in ```cd MegaParse``` +* Back in quivr-core add ```MEGAPARSE_URL = http://localhost:8000``` in your env variables diff --git a/core/pyproject.toml b/core/pyproject.toml index 224077bdc0f4..05cc7c8ac1fc 100644 --- a/core/pyproject.toml +++ b/core/pyproject.toml @@ -60,7 +60,6 @@ markers = [ "slow: marks tests as slow (deselect with '-m \"not slow\"')", "base: these tests require quivr-core with extra `base` to be installed", "tika: these tests require a tika server to be running", - "unstructured: these tests require `unstructured` dependency", ] [[tool.mypy.overrides]]