Skip to content

Commit

Permalink
First CI version
Browse files Browse the repository at this point in the history
  • Loading branch information
matbun committed Nov 5, 2024
1 parent f6ed515 commit e84c8c5
Show file tree
Hide file tree
Showing 11 changed files with 140 additions and 62 deletions.
1 change: 0 additions & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ pl-training.yml
# Project folders/files
# use-cases
workflows
tests
CHANGELOG

# Docs
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,5 @@ jobs:
# Default environment names are ".venv-pytorch" and ".venv-tf"
- name: Run pytest for workflows
shell: bash -l {0}
run: .venv-pytorch/bin/pytest -v ./tests/ -m "not slurm"
run: .venv-pytorch/bin/pytest -v ./tests/ -m "not hpc"

2 changes: 1 addition & 1 deletion ci/dagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
"name": "itwinai",
"sdk": "python",
"source": ".",
"engineVersion": "v0.13.3"
"engineVersion": "v0.13.6"
}
43 changes: 36 additions & 7 deletions ci/src/main/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,50 @@
@object_type
class Itwinai:

# torch_container: dagger.Container = None

@function
def build_torch(
self,
src: Annotated[
context: Annotated[
dagger.Directory,
Doc("location of directory containing Dockerfile"),
Doc("location of source directory"),
],
dockerfile: Annotated[
str,
Doc("location of Dockerfile"),
],
) -> dagger.Container:
"""Build image from existing Dockerfile"""
"""Build itwinai torch container image from existing Dockerfile"""
return (
dag.container()
.with_directory("/src", src)
.with_workdir("/src")
.directory("/src")
.docker_build()
.build(context=context, dockerfile=dockerfile)
)

@function
async def test_torch(
self,
context: Annotated[
dagger.Directory,
Doc("location of source directory"),
],
dockerfile: Annotated[
str,
Doc("location of Dockerfile"),
],
) -> str:
"""Test itwinai torch container image with pytest on non-HPC environments."""
test_cmd = [
"pytest",
"-v",
"-m",
"not hpc",
"tests"
]
return await (
self.build_torch(context=context, dockerfile=dockerfile)
.with_exec(test_cmd)
.stdout()
)

@function
Expand Down
101 changes: 69 additions & 32 deletions ci/uv.lock

Large diffs are not rendered by default.

7 changes: 5 additions & 2 deletions env-files/tensorflow/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ RUN pip install --no-cache-dir --upgrade pip \
"prov4ml[linux]@git+https://github.com/matbun/ProvML@6faafd4"

# Install itwinai
COPY pyproject.toml ./
COPY src ./
COPY pyproject.toml pyproject.toml
COPY src src
RUN pip install . \
&& itwinai sanity-check --tensorflow

Expand All @@ -32,5 +32,8 @@ ARG REQUIREMENTS=env-files/torch/requirements/requirements.txt
COPY "${REQUIREMENTS}" additional-requirements.txt
RUN pip install --no-cache-dir -r additional-requirements.txt

# Add tests
COPY tests tests

# ENTRYPOINT [ "/bin/sh" ]
# CMD [ ]
9 changes: 6 additions & 3 deletions env-files/torch/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,12 @@ RUN CONTAINER_TORCH_VERSION="$(python -c 'import torch;print(torch.__version__)'
&& sed -i "${line}s|^|#|" "/usr/local/lib/python${pver}/dist-packages/deepspeed/ops/transformer/inference/triton/matmul_ext.py"

# Install itwinai
COPY pyproject.toml ./
COPY src ./
COPY pyproject.toml pyproject.toml
COPY src src
# Torch: reuse the global torch in the container
RUN CONTAINER_TORCH_VERSION="$(python -c 'import torch;print(torch.__version__)')" \
&& pip install --no-cache-dir torch=="$CONTAINER_TORCH_VERSION" \
.[torch] \
.[torch,dev] \
&& itwinai sanity-check --torch \
--optional-deps deepspeed \
--optional-deps horovod
Expand All @@ -63,3 +63,6 @@ RUN CONTAINER_TORCH_VERSION="$(python -c 'import torch;print(torch.__version__)'
ARG REQUIREMENTS=env-files/torch/requirements/requirements.txt
COPY "${REQUIREMENTS}" additional-requirements.txt
RUN pip install --no-cache-dir -r additional-requirements.txt

# Add tests
COPY tests tests
20 changes: 10 additions & 10 deletions env-files/torch/generic_torch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -96,17 +96,17 @@ else
export DS_BUILD_TRANSFORMER_INFERENCE=1
fi
pip install --no-cache-dir py-cpuinfo || exit 1
pip install --no-cache-dir deepspeed==0.15.* || exit 1
# pip install --no-cache-dir deepspeed==0.15.* || exit 1

# fix .triton/autotune/Fp16Matmul_2d_kernel.pickle bug
line=$(cat -n $ENV_NAME/lib/python${pver}/site-packages/deepspeed/ops/transformer/inference/triton/matmul_ext.py | grep os.rename | awk '{print $1}' | head -n 1)
# # fix .triton/autotune/Fp16Matmul_2d_kernel.pickle bug
# line=$(cat -n $ENV_NAME/lib/python${pver}/site-packages/deepspeed/ops/transformer/inference/triton/matmul_ext.py | grep os.rename | awk '{print $1}' | head -n 1)

# 'sed' is implemented differently on MacOS than on Linux (https://stackoverflow.com/questions/4247068/sed-command-with-i-option-failing-on-mac-but-works-on-linux)
if [[ "$OSTYPE" =~ ^darwin ]] ; then
sed -i '' "${line}s|^|#|" $ENV_NAME/lib/python${pver}/site-packages/deepspeed/ops/transformer/inference/triton/matmul_ext.py || exit 1
else
sed -i "${line}s|^|#|" $ENV_NAME/lib/python${pver}/site-packages/deepspeed/ops/transformer/inference/triton/matmul_ext.py || exit 1
fi
# # 'sed' is implemented differently on MacOS than on Linux (https://stackoverflow.com/questions/4247068/sed-command-with-i-option-failing-on-mac-but-works-on-linux)
# if [[ "$OSTYPE" =~ ^darwin ]] ; then
# sed -i '' "${line}s|^|#|" $ENV_NAME/lib/python${pver}/site-packages/deepspeed/ops/transformer/inference/triton/matmul_ext.py || exit 1
# else
# sed -i "${line}s|^|#|" $ENV_NAME/lib/python${pver}/site-packages/deepspeed/ops/transformer/inference/triton/matmul_ext.py || exit 1
# fi
fi

# install horovod
Expand Down Expand Up @@ -138,7 +138,7 @@ else
export HOROVOD_WITHOUT_TENSORFLOW=1
export HOROVOD_WITHOUT_MXNET=1
fi
pip install --no-cache-dir git+https://github.com/horovod/horovod.git@3a31d93 || exit 1
# pip install --no-cache-dir git+https://github.com/horovod/horovod.git@3a31d93 || exit 1
fi

# get required libraries in reqs.txt
Expand Down
7 changes: 5 additions & 2 deletions env-files/torch/jupyter/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -237,8 +237,8 @@ RUN pip install --no-cache-dir \

# Core itwinai lib
WORKDIR "$HOME/itwinai"
COPY pyproject.toml ./
COPY src ./
COPY pyproject.toml pyproject.toml
COPY src src
RUN pip install --no-cache-dir ".[torch,dev]" \
&& itwinai sanity-check --torch \
--optional-deps deepspeed \
Expand All @@ -249,6 +249,9 @@ ARG REQUIREMENTS=env-files/torch/requirements/requirements.txt
COPY "${REQUIREMENTS}" additional-requirements.txt
RUN pip install --no-cache-dir -r additional-requirements.txt

# Add tests
COPY tests tests

WORKDIR $HOME

CMD ["setup.sh", "start-notebook.sh"]
9 changes: 6 additions & 3 deletions env-files/torch/jupyter/jupyter-base/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -591,9 +591,8 @@ RUN pip install --no-cache-dir \

# Core itwinai lib
WORKDIR "$HOME/itwinai"
COPY pyproject.toml ./
COPY src ./
COPY tests ./
COPY pyproject.toml pyproject.toml
COPY src src
RUN pip install --no-cache-dir ".[torch,dev]" \
&& itwinai sanity-check --torch \
--optional-deps deepspeed \
Expand All @@ -604,6 +603,10 @@ ARG REQUIREMENTS=env-files/torch/requirements/requirements.txt
COPY "${REQUIREMENTS}" additional-requirements.txt
RUN pip install --no-cache-dir -r additional-requirements.txt

# Add tests
COPY tests tests


WORKDIR $HOME

CMD ["setup.sh", "start-notebook.sh"]
1 change: 1 addition & 0 deletions env-files/torch/jupyter/jupyter-base/docker-stacks
Submodule docker-stacks added at 4d70cf

0 comments on commit e84c8c5

Please sign in to comment.