Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for repodata.json.zst #675

Draft
wants to merge 16 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ env:
jobs:
test_quetz:
# timeout for the whole job
timeout-minutes: 10
timeout-minutes: 12
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
Expand Down Expand Up @@ -79,7 +79,7 @@ jobs:
- name: Testing server
shell: bash -l -eo pipefail {0}
# timeout for the step
timeout-minutes: 5
timeout-minutes: 8
env:
TEST_DB_BACKEND: ${{ matrix.test_database }}
QUETZ_TEST_DBINIT: ${{ matrix.db_init }}
Expand Down
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
exclude: (quetz/migrations)
exclude: ^(quetz/migrations|quetz/tests/data/test-server/)
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
Expand Down Expand Up @@ -27,4 +27,4 @@ repos:
- types-toml
- types-ujson
- types-aiofiles
args: [--show-error-codes, --implicit-optional]
args: [--show-error-codes, --implicit-optional]
15 changes: 15 additions & 0 deletions docs/source/deploying/configuration.rst
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,21 @@ the ``redis-server``.

For more information, see :ref:`task_workers`.

``compression`` section
^^^^^^^^^^^^^^^^^^^^^^^

You can configure which compressions are enabled for the ``repodata.json`` file.

:gz_enabled: enable gzip compression
:bz2_enabled: enable bzip2 compression
:zst_enabled: enable zstandard compression

.. note::

Compression is an expensive operation for big files.
Updating local channels index is done in the background, so this isnt' an issue.
But for proxy channels, compression is done after downloading the remote ``repodata.json`` and before to serve it.

``quotas`` section
^^^^^^^^^^^^^^^^^^

Expand Down
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,5 +52,6 @@ dependencies:
- pytest-asyncio
- pytest-timeout
- pydantic >=2
- py-rattler
- pip:
- git+https://github.com/jupyter-server/jupyter_releaser.git@v2
5 changes: 5 additions & 0 deletions plugins/quetz_current_repodata/quetz_current_repodata/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,12 @@
from conda_index.index import _build_current_repodata

import quetz
from quetz.config import Config
from quetz.utils import add_temp_static_file

config = Config()
compression = config.get_compression_config()


@quetz.hookimpl
def post_package_indexing(tempdir: Path, channel_name, subdirs, files, packages):
Expand All @@ -25,4 +29,5 @@ def post_package_indexing(tempdir: Path, channel_name, subdirs, files, packages)
"current_repodata.json",
tempdir,
files,
compression,
)
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

config = Config()
pkgstore = config.get_package_store()
compression = config.get_compression_config()


def update_dict(packages, instructions):
Expand Down Expand Up @@ -147,6 +148,7 @@ def post_package_indexing(tempdir: Path, channel_name, subdirs, files, packages)
"repodata_from_packages.json",
tempdir,
files,
compression=compression,
)

patch_repodata(repodata, patch_instructions)
Expand All @@ -162,4 +164,5 @@ def post_package_indexing(tempdir: Path, channel_name, subdirs, files, packages)
"repodata.json",
tempdir,
files,
compression=compression,
)
20 changes: 16 additions & 4 deletions plugins/quetz_repodata_patching/tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ def test_post_package_indexing(
channel_name,
package_repodata_patches,
db,
config,
package_file_name,
repodata_stem,
compressed_repodata,
Expand All @@ -301,7 +302,9 @@ def get_db():
yield db

with mock.patch("quetz_repodata_patching.main.get_db_manager", get_db):
indexing.update_indexes(dao, pkgstore, channel_name)
indexing.update_indexes(
dao, pkgstore, channel_name, compression=config.get_compression_config()
)

ext = "json.bz2" if compressed_repodata else "json"
open_ = bz2.open if compressed_repodata else open
Expand Down Expand Up @@ -372,14 +375,17 @@ def test_index_html(
package_file_name,
dao,
db,
config,
remove_instructions,
):
@contextmanager
def get_db():
yield db

with mock.patch("quetz_repodata_patching.main.get_db_manager", get_db):
indexing.update_indexes(dao, pkgstore, channel_name)
indexing.update_indexes(
dao, pkgstore, channel_name, compression=config.get_compression_config()
)

index_path = os.path.join(
pkgstore.channels_dir,
Expand Down Expand Up @@ -412,6 +418,7 @@ def test_patches_for_subdir(
package_repodata_patches,
dao,
db,
config,
package_subdir,
patches_subdir,
):
Expand All @@ -420,7 +427,9 @@ def get_db():
yield db

with mock.patch("quetz_repodata_patching.main.get_db_manager", get_db):
indexing.update_indexes(dao, pkgstore, channel_name)
indexing.update_indexes(
dao, pkgstore, channel_name, compression=config.get_compression_config()
)

index_path = os.path.join(
pkgstore.channels_dir,
Expand Down Expand Up @@ -466,13 +475,16 @@ def test_no_repodata_patches_package(
package_file_name,
dao,
db,
config,
):
@contextmanager
def get_db():
yield db

with mock.patch("quetz_repodata_patching.main.get_db_manager", get_db):
indexing.update_indexes(dao, pkgstore, channel_name)
indexing.update_indexes(
dao, pkgstore, channel_name, compression=config.get_compression_config()
)

index_path = os.path.join(
pkgstore.channels_dir,
Expand Down
5 changes: 4 additions & 1 deletion plugins/quetz_repodata_zchunk/tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,11 @@ def test_repodata_zchunk(
package_file_name,
dao,
db,
config,
):
indexing.update_indexes(dao, pkgstore, channel_name)
indexing.update_indexes(
dao, pkgstore, channel_name, compression=config.get_compression_config()
)

index_path = os.path.join(
pkgstore.channels_dir,
Expand Down
42 changes: 42 additions & 0 deletions quetz/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import logging
import logging.config
import os
from dataclasses import dataclass
from distutils.util import strtobool
from secrets import token_bytes
from typing import Any, Dict, Iterable, List, NamedTuple, Optional, Type, Union
Expand All @@ -22,6 +23,24 @@
_user_dir = appdirs.user_config_dir("quetz")

PAGINATION_LIMIT = 20
COMPRESSION_EXTENSIONS = ["bz2", "gz", "zst"]


@dataclass
class CompressionConfig:
bz2_enabled: bool
gz_enabled: bool
zst_enabled: bool

def enabled_extensions(self):
return [
ext for ext in COMPRESSION_EXTENSIONS if getattr(self, f"{ext}_enabled")
]

def disabled_extensions(self):
return [
ext for ext in COMPRESSION_EXTENSIONS if not getattr(self, f"{ext}_enabled")
]


class ConfigEntry(NamedTuple):
Expand Down Expand Up @@ -62,6 +81,7 @@ class Config:
ConfigEntry("package_unpack_threads", int, 1),
ConfigEntry("frontend_dir", str, default=""),
ConfigEntry("redirect_http_to_https", bool, False),
ConfigEntry("rattler_cache_dir", str, default="rattler_cache"),
],
),
ConfigSection(
Expand Down Expand Up @@ -232,6 +252,14 @@ class Config:
ConfigEntry("soft_delete_package", bool, required=False, default=False),
],
),
ConfigSection(
"compression",
[
ConfigEntry("gz_enabled", bool, default=True),
ConfigEntry("bz2_enabled", bool, default=True),
ConfigEntry("zst_enabled", bool, default=False),
],
),
]
_config_dirs = [_site_dir, _user_dir]
_config_files = [os.path.join(d, _filename) for d in _config_dirs]
Expand Down Expand Up @@ -443,6 +471,20 @@ def _get_environ_config(self) -> Dict[str, Any]:

return config

def get_compression_config(self) -> CompressionConfig:
"""Return the compression configuration.

Returns
-------
compression_config : CompressionConfig
Class defining which compressions are enabled (bzip2, gzip and zstandard)
"""
return CompressionConfig(
self.compression_bz2_enabled,
self.compression_gz_enabled,
self.compression_zst_enabled,
)

def get_package_store(self) -> pkgstores.PackageStore:
"""Return the appropriate package store as set in the config.

Expand Down
Loading
Loading