From 69e1dd2be82f571be05c4cf3bbfdb12c04616cff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jarl=20Sondre=20S=C3=A6ther?= <60541573+jarlsondre@users.noreply.github.com> Date: Thu, 28 Nov 2024 11:16:31 +0100 Subject: [PATCH] update readmes --- docs/uv-tutorial.md | 10 ++++++---- pyproject.toml | 1 + tutorials/distributed-ml/torch-scaling-test/README.md | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/docs/uv-tutorial.md b/docs/uv-tutorial.md index 01a53d00..4af0e47d 100644 --- a/docs/uv-tutorial.md +++ b/docs/uv-tutorial.md @@ -18,7 +18,9 @@ If you wish to use the `uv sync` and/or `uv lock` commands, which is how you use to manage all your project packages, then note that these commands will only work with the directory called `.venv` in the project directory. Sometimes, this can be a bit annoying, especially with an existing venv, so we recommend using a -[symlink](https://en.wikipedia.org/wiki/Symbolic_link). +[symlink](https://en.wikipedia.org/wiki/Symbolic_link). If you need to have multiple +venvs that you want to switch between, you can update the symlink to whichever of them +you want to use at the moment. For SLURM scripts, you can hardcode them if need be. ### Symlinking .venv @@ -61,7 +63,7 @@ particular, if you are a developer you would use one of the following two comman you are on HPC with cuda, you would use: ```bash -uv sync --extra torch --extra dev --extra linux \ +uv sync --extra torch --extra dev \ --no-cache \ --index https://download.pytorch.org/whl/cu121 ``` @@ -93,6 +95,6 @@ can use the following command: uv add ``` -> [!NOTE] +> [!Warning] > This will add the package to your `.venv` venv, so make sure to have symlinked to -> this directory if you haven't already. +> this directory if you haven't already. diff --git a/pyproject.toml b/pyproject.toml index 897ea506..f7162d82 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,7 @@ dependencies = [ "matplotlib>=3.9.2", "pip>=24.3.1", "prov4ml@git+https://github.com/matbun/ProvML@new-main", + "ray" ] [project.optional-dependencies] diff --git a/tutorials/distributed-ml/torch-scaling-test/README.md b/tutorials/distributed-ml/torch-scaling-test/README.md index f74b8212..81adfa0d 100644 --- a/tutorials/distributed-ml/torch-scaling-test/README.md +++ b/tutorials/distributed-ml/torch-scaling-test/README.md @@ -34,7 +34,7 @@ python ddp_trainer.py -c config/base.yaml -c config/ddp.yaml --log-int 42 ## Run a single training Training runs are meant to be submitted via SLURM, from a unified job script file: -`slurm.sh`.You can select the distributed training algorithm and provide the command +`slurm.sh`. You can select the distributed training algorithm and provide the command to execute setting SLURM environment variables using the `--export` option: ```bash