diff --git a/.gitignore b/.gitignore index 38ab7d96c0..6a0e1af12e 100644 --- a/.gitignore +++ b/.gitignore @@ -127,6 +127,7 @@ ENV/ /scratches/ logs/ /*.pid +/.postgres-volume-backup # Additional clones for editable components /src/ai/backend/webui/ diff --git a/changes/2367.deps.md b/changes/2367.deps.md new file mode 100644 index 0000000000..1ee9043434 --- /dev/null +++ b/changes/2367.deps.md @@ -0,0 +1 @@ +Update the halfstack containers to point the latest stable versions diff --git a/docker-compose.halfstack-2409.yml b/docker-compose.halfstack-2409.yml new file mode 100644 index 0000000000..6397fe451a --- /dev/null +++ b/docker-compose.halfstack-2409.yml @@ -0,0 +1,69 @@ +services: + + backendai-half-db: + image: postgres:16.3-alpine + restart: unless-stopped + command: postgres -c 'max_connections=256' + networks: + - half + ports: + - "8100:5432" + environment: + - POSTGRES_PASSWORD=develove + - POSTGRES_DB=backend + volumes: + - "./volumes/${DATADIR_PREFIX:-.}/postgres-data:/var/lib/postgresql/data:rw" + healthcheck: + test: ["CMD", "pg_isready", "-U", "postgres"] + interval: 5s + timeout: 3s + retries: 10 + + backendai-half-redis: + image: redis:7.2.4-alpine + restart: unless-stopped + networks: + - half + ports: + - "8110:6379" + volumes: + - "./volumes/${DATADIR_PREFIX:-.}/redis-data:/data:rw" + command: > + redis-server + --appendonly yes + healthcheck: + test: ["CMD-SHELL", "redis-cli ping | grep PONG"] + interval: 5s + timeout: 3s + retries: 10 + + backendai-half-etcd: + image: quay.io/coreos/etcd:v3.5.14 + restart: unless-stopped + volumes: + - "./volumes/${DATADIR_PREFIX:-.}/etcd-data:/etcd-data:rw" + networks: + - half + ports: + - "8120:2379" + command: > + /usr/local/bin/etcd + --name backendai-etcd + --data-dir /etcd-data + --listen-client-urls http://0.0.0.0:2379 + --advertise-client-urls http://0.0.0.0:2379 + --listen-peer-urls http://0.0.0.0:2380 + --initial-advertise-peer-urls http://0.0.0.0:2380 + --initial-cluster backendai-etcd=http://0.0.0.0:2380 + --initial-cluster-token backendai-etcd-token + --initial-cluster-state new + --enable-v2=true + --auto-compaction-retention 1 + healthcheck: + test: ["CMD", "etcdctl", "endpoint", "health"] + interval: 5s + timeout: 3s + retries: 10 + +networks: + half: diff --git a/docker-compose.halfstack-main.yml b/docker-compose.halfstack-main.yml index 747569b752..d7c00a4050 120000 --- a/docker-compose.halfstack-main.yml +++ b/docker-compose.halfstack-main.yml @@ -1 +1 @@ -docker-compose.halfstack-2303.yml \ No newline at end of file +./docker-compose.halfstack-2409.yml \ No newline at end of file diff --git a/docs/dev/daily-workflows.rst b/docs/dev/daily-workflows.rst index 25f0910a33..d6858fd801 100644 --- a/docs/dev/daily-workflows.rst +++ b/docs/dev/daily-workflows.rst @@ -733,6 +733,26 @@ Making a new release To make workflow above effective, be aware that backporting DB revisions to older major releases will no longer be permitted after major release version is switched. +Making a new release branch +~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +This example shows the case when the current release is 24.03 and the next upcoming release is 24.09. +It makes the main branch to stand for the upcoming release 24.09, by branching out the current release 24.03. + +* Make a new git branch for the current release in the ``YY.MM`` format (like ``24.03``) from the main branch. + +* Update ``./VERSION`` file to indicate the next development version (like ``24.09.0dev0``). + +* Create a new halfstack compose configuration for the next release by copying and updating the halfstack config of the current release. + + .. code-block:: console + + $ cp docker-compose.halfstack-2403.yml docker-compose.halfstack-2409.yml + $ edit docker-compose.halfstack-2409.yml # update the container versions + $ rm docker-compose.halfstack-main.yml + $ ln -s docker-compose.halfstack-2409.yml docker-compose.halfstack-main.yml + $ git add docker-compose.*.yml + Backporting to legacy per-pkg repositories ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/docs/dev/version-management-and-upgrades.rst b/docs/dev/version-management-and-upgrades.rst index be02b36856..4a47879207 100644 --- a/docs/dev/version-management-and-upgrades.rst +++ b/docs/dev/version-management-and-upgrades.rst @@ -1,6 +1,7 @@ .. role:: raw-html-m2r(raw) :format: html +.. include:: Version Numbering ================= @@ -13,7 +14,7 @@ Version Numbering * When referring the version in other Python packages as requirements, you need to strip the leading zeros (e.g., ``20.9.0`` instead of ``20.09.0``) because Python setuptools normalizes the version integers. * ``x.y.z`` is a release tag name (patch releases). -* When releasing ``x.y.0``\ : +* When releasing ``x.y.0``: * Create a new ``x.y`` branch, do all bugfix/hotfix there, and make ``x.y.z`` releases there. * All fixes must be *first* implemented on the ``main`` branch and then *cherry-picked* back to ``x.y`` branches. @@ -27,7 +28,7 @@ Version Numbering * New development should go on the ``main`` branch. - * ``main``\ : commit here directly if your changes are a self-complete one as a single commit. + * ``main``: commit here directly if your changes are a self-complete one as a single commit. * Use both short-lived and long-running feature branches freely, but ensure there names differ from release branches and tags. * The major/minor (\ ``x.y``\ ) version of Backend.AI subprojects will go together to indicate compatibility. Currently manager/agent/common versions progress this way, while client SDKs have their own version numbers and the API specification has a different ``vN.yyyymmdd`` version format. @@ -42,11 +43,153 @@ Version Numbering Upgrading ========= -You can upgrade the installed Python packages using ``pip install -U ...`` command along with dependencies. +Local packages +-------------- -If you have cloned the stable version of source code from git, then pull and check out the next ``x.y`` release branch. -It is recommended to re-run ``pip install -U -r requirements.txt`` as dependencies might be updated. +.. note:: -For the manager, ensure that your database schema is up-to-date by running ``alembic upgrade head``. If you setup your development environment with Pants and ``install-dev.sh`` script, keep your database schema up-to-date via ``./py -m alembic upgrade head`` instead of plain alembic command above. + Before doing branch switches or package upgrades, stop all Backend.AI services first. + For most minor upgrades, you may keep the session containers running, but whenever possible, it is strongly recommended to terminate them first. + When there are changes in the agent and kernel runner, it may break up the running containers. -Also check if any manual etcd configuration scheme change is required, though we will try to keep it compatible and automatically upgrade when first executed. + For specific configurations or advanced setups, refer to the version-specific upgrade guide or contact the support. + +Development Setup +~~~~~~~~~~~~~~~~~ + +It is advised to clone a new working copy and perform a clean install to work on a different *release* branch. +You may keep multiple clones by stopping and starting compose stacks for each working copy for testing. + +The following guide is for switching *topic* branches. +Again, if the target topic branch involves complex database/configuration migration, it is better to make a new clone with a clean install. +To save the GitHub bandwidth, consider local filesystem clones like ``git clone /path/to/existing/copy /path/to/new/copy`` or minimizing the fetch depth like ``git clone --depth=10 https://github.com/lablup/backend.ai bai-topic``. + +1. Run ``./py -m alembic downgrade -N`` if the current topic branch has *N* database migrations. + For example, if it has only one migration, say ``-1``. + You may check the ``require:db-migration`` label in the GitHub PR to see if the branch has database migrations. + +2. Switch to the target topic branch using ``git switch``. + +3. Run ``pants export --resolve=python-default --resolve=python-kernel --resolve=mypy --resolve=ruff`` to repopulate the virtualenvs for local execution. + This resolve arguments are the minimum-required sets for editor/IDE configuration. + +4. Run ``./py -m alembic upgrade head`` if the new topic branch has database migrations. + +5. Check if there are any additional TOML and etcd configuration updates required. + +Package Setup +~~~~~~~~~~~~~ + +If you want to perform a release upgrade (e.g., 23.09 |rarr| 24.03), please consult the specific version's upgrade guides. + +For a minor patch update, follow the steps: + +1. Upgrade the Backend.AI wheel packages in the virtualenv. + +2. Run ``alembic upgrade head`` in the virtualenv. + +3. Check if there are TOML and etcd configuration updates required. + + +Halfstack Containers +-------------------- + +Backend.AI uses a PostgreSQL database, an etcd cluster, and a Redis service as containers for its operation. +We call this set of containers as *"halfstack"*. +When making a new major release of Backend.AI or to address upstream issues, we update the versions of halfstack containers. + +A development setup or an all-in-one package setup uses a docker-compose stack, which can be upgraded relatively easily in-place. +Still, it is advised to do a clean install on a new clone for new major releases. + +First, review your current configuration variables used in this guide. +A typical development setup uses the following values: + +- ``${COMPOSE_PROJECT_NAME}``: the name of directory where ``scripts/install-dev.sh`` resides in. +- ``${COMPOSE_FILE}``: ``docker-compose.halfstack.current.yml`` +- ``${DB_SERVICE_NAME}``: ``backendai-half-db`` +- ``${DB_USER}``: ``postgres`` +- ``${DB_NAME}``: ``backend`` +- ``${DB_BACKUP_FILE}``: set as you want (e.g., ``./db-backup.sql``) +- ``${POSTGRES_DATA_DIR}``: ``volumes/postgres-data`` + +Here is the step-by-step guide to upgrade the halfstack containers. + +1. Terminate all existing sessions and stop all Backend.AI services first. + +2. Backup the current PostgreSQL database content: + + .. code-block:: shell + + docker compose -p ${COMPOSE_PROJECT_NAME} -f ${COMPOSE_FILE} exec -T ${DB_SERVICE_NAME} pg_dump -U ${DB_USER} ${DB_NAME} > ${DB_BACKUP_FILE} + + .. note:: + + Currently, etcd is staying at the v3.5 release for multiple years and it is not anticipated to see its major upgrade in the foreseeable future. + When it happens, refer to the official upgrade document like https://etcd.io/docs/v3.5/upgrades/upgrade_3_5/. + Redis is also same; its version is now pinned to v7.2 release and we expect only patch releases in the foreseeable future. + + You may *skip* the PostgreSQL-related steps if the postgres' major version did not change in the target verion's compose configuration. + In that case, you may just do the step 3, 5, 6, and 8 only. + +3. Stop all halfstack containers: + + .. code-block:: shell + + docker compose -p ${DOCKER_PROJECT_NAME} -f ${COMPOSE_FILE} down + +4. Delete the PostgreSQL database volume with an additional volume backup for the emergency like when the new postgres container cannot read the dump file. + If that happens, you could mount the copied directory to a postgres container pinned to the previous version to access the data. + + .. code-block:: shell + + # sudo required as postgres runs as the non-user uid + sudo cp -Rp ${POSTGRES_DATA_DIR} ./postgres-volume-backup + sudo rm -rf ${POSTGRES_DATA_DIR} + + .. warning:: + + This step will *delete* all Backend.AI database! + Make sure all backups are verified for integrity before starting the upgrade process. + Verify your command twice before running. + +5. Overwrite the *current* halfstack compose configuration. + The target version depends on your choice, usually in the form of ``yymm`` like ``2309`` or ``2403``. + You may also create a new configuration with updated halfstack container versions. + + .. code-block:: shell + + # save the port numbers + MY_DB_PORT=$(yq -r '.services.backendai-half-db.ports[0]' docker-compose.halfstack.current.yml|cut -d: -f1) + MY_REDIS_PORT=$(yq -r '.services.backendai-half-redis.ports[0]' docker-compose.halfstack.current.yml|cut -d: -f1) + MY_ETCD_PORT=$(yq -r '.services.backendai-half-etcd.ports[0]' docker-compose.halfstack.current.yml|cut -d: -f1) + # overwrite the compose config + cp ./docker-compose.halfstack-${TARGET_VERSION}.yml ${COMPOSE_FILE} + # restore the port numbers + yq eval --inplace ".services.backendai-half-db.ports[0] = $MY_DB_PORT" docker-compose.halfstack.current.yml + yq eval --inplace ".services.backendai-half-redis.ports[0] = $MY_REDIS_PORT" docker-compose.halfstack.current.yml + yq eval --inplace ".services.backendai-half-etcd.ports[0] = $MY_ETCD_PORT" docker-compose.halfstack.current.yml + + .. tip:: + + Install the ``yq`` utility to read and manipulate the YAML files easily on the shell. + Refer to https://mikefarah.gitbook.io/yq. + +6. Start the halfstack with the new compose configuration: + + .. code-block:: shell + + docker compose -p ${COMPOSE_PROJECT_NAME} -f ${COMPOSE_FILE} up -d + +7. Restore the PostgreSQL database content: + + .. code-block:: shell + + docker compose -p ${COMPOSE_PROJECT_NAME} -f ${COMPOSE_FILE} exec -T ${DB_SERVICE_NAME} psql -U ${DB_USER} -d ${DB_NAME} < ${DB_BACKUP_FILE} + +8. Start the Backend.AI services and test. + If it successfully runs, remove the volume backup directory so that ``pants`` does not get confused with unreadable directories due to the different uid ownership. + + .. code-block:: shell + + sudo rm -rf ./postgres-volume-backup # if copied in the step 4 + rm ${DB_BACKUP_FILE} diff --git a/docs/install/install-from-package/prepare-database.rst b/docs/install/install-from-package/prepare-database.rst index a467806454..ed42ba4b0f 100644 --- a/docs/install/install-from-package/prepare-database.rst +++ b/docs/install/install-from-package/prepare-database.rst @@ -21,7 +21,7 @@ refer services: backendai-pg-active: <<: *base - image: postgres:15.1-alpine + image: postgres:16.3-alpine restart: unless-stopped command: > postgres