Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions text_to_image/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
mlcube/workspace/
1 change: 1 addition & 0 deletions text_to_image/mlcube/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
workspace
19 changes: 19 additions & 0 deletions text_to_image/mlcube/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
FROM pytorch/pytorch:2.7.1-cuda12.8-cudnn9-devel

RUN apt-get update && apt-get install -y --no-install-recommends \
git vim curl unzip \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean

WORKDIR /workspace/flux

COPY . .

RUN pip install --no-cache-dir \
huggingface-hub \
-r torchtitan/requirements.txt \
-r torchtitan/torchtitan/experiments/flux/requirements-flux.txt \
-r torchtitan/requirements-mlperf.txt

RUN pip install -e torchtitan/

29 changes: 29 additions & 0 deletions text_to_image/mlcube/mlcube.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Flux
description: Flux text to image
authors:
- { name: "MLCommons Best Practices Working Group" }

platform:
accelerator_count: 1

docker:
image: mlcommons/flux_benchmark:0.0.1
build_context: ".."
build_file: "mlcube/Dockerfile"
gpu_args: "--gpus=all -e HUGGING_FACE_HUB_TOKEN"

tasks:
download_demo:
entrypoint: ./scripts/download_demo.sh -a
parameters:
outputs:
data_path: demo_data/
model_path: models/
demo:
entrypoint: ./scripts/run_demo.sh -a
parameters:
inputs:
data_path: demo_data/data/
model_path: models/
outputs:
log_dir: demo_logs/
72 changes: 72 additions & 0 deletions text_to_image/mlcube/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# MLCube for Flux.1-schnell

MLCube™ GitHub [repository](https://github.com/mlcommons/mlcube). MLCube™ [wiki](https://mlcommons.github.io/mlcube/).

## Project setup

An important requirement is that you must have Docker installed.

```bash
# Create Python environment and install MLCube Docker runner
virtualenv -p python3 ./env && source ./env/bin/activate && pip install pip==24.0 && pip install mlcube-docker
# Fetch the implementation from GitHub
git clone https://github.com/mlcommons/training && cd ./training
git fetch origin pull/839/head:feature/mlcube_flux && git checkout feature/mlcube_flux
cd ./text_to_image/mlcube
```

Inside the mlcube directory run the following command to check implemented tasks.

```shell
mlcube describe
```

###  Extra requirements

You need to download the `torchtitan` git submodule:

```shell
git submodule update --init --recursive
```

You also need accept the license for the [FLUX schnell model](https://huggingface.co/black-forest-labs/FLUX.1-schnell) on Hugginface.

Finally, to be able to download all the models you will need to get a token from [Hugginface](https://huggingface.co/settings/tokens).

**Note**: Make sure that when creating the token you select:

* Read access to contents of all public gated repos you can access

After that you can set a new enviroment variable, like this:

```shell
export HUGGING_FACE_HUB_TOKEN="YOUR_TOKEN"
```

### MLCube tasks

* Demo tasks:

Download demo dataset and models.

```shell
mlcube run --task=download_demo -Pdocker.build_strategy=always
```

Train demo.

```shell
mlcube run --task=demo -Pdocker.build_strategy=always
```

### Execute the complete pipeline

You can execute the complete pipeline with one single command.

* Demo pipeline:

```shell
mlcube run --task=download_demo,demo -Pdocker.build_strategy=always
```

**Note**: To rebuild the image use the flag: `-Pdocker.build_strategy=always` during the `mlcube run` command.
61 changes: 61 additions & 0 deletions text_to_image/scripts/download_demo.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/bin/bash
set -e

DATA_PATH="./dataset"
MODEL_PATH="./models"

# Capture MLCube parameter
while [ $# -gt 0 ]; do
case "$1" in
--data_path=*)
DATA_PATH="${1#*=}"
;;
--model_path=*)
MODEL_PATH="${1#*=}"
;;
*) ;;
esac
shift
done

if ! command -v huggingface-cli &> /dev/null; then
echo "Error: huggingface-cli is not installed. Please add 'huggingface-hub' to your pip requirements." >&2
exit 1
fi


echo "--- Preparing Directories ---"
mkdir -p "$DATA_PATH"
cd "$DATA_PATH"
echo "Working directory: $(pwd)"


echo "--- Downloading and unzipping dataset ---"
curl -O https://storage.googleapis.com/mlperf_training_demo/flux/flux_minified_data.zip
unzip -o -q flux_minified_data.zip
rm flux_minified_data.zip
echo "Dataset downloaded successfully."


mkdir -p "$MODEL_PATH"
echo "--- Downloading models to ${MODEL_PATH} directory ---"
echo HUGGING_FACE_HUB_TOKEN $HUGGING_FACE_HUB_TOKEN

echo "Downloading FLUX.1-schnell autoencoder..."
huggingface-cli download black-forest-labs/FLUX.1-schnell ae.safetensors \
--local-dir "${MODEL_PATH}/autoencoder" \
--local-dir-use-symlinks False

echo "Downloading T5-v1_1-xxl text encoder..."
huggingface-cli download google/t5-v1_1-xxl \
--local-dir "${MODEL_PATH}/t5" \
--exclude "tf_model.h5" \
--local-dir-use-symlinks False

echo "Downloading CLIP-vit-large-patch14 image encoder..."
huggingface-cli download openai/clip-vit-large-patch14 \
--local-dir "${MODEL_PATH}/clip" \
--exclude "*.safetensors,*.msgpack,tf_model.h5" \
--local-dir-use-symlinks False

echo "--- All downloads completed successfully! ---"
56 changes: 56 additions & 0 deletions text_to_image/scripts/run_demo.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#!/bin/bash
set -e

DATA_PATH=""
MODEL_PATH=""
LOG_DIR=""

# Capture MLCube parameter
while [ $# -gt 0 ]; do
case "$1" in
--data_path=*)
DATA_PATH="${1#*=}"
;;
--model_path=*)
MODEL_PATH="${1#*=}"
;;
--log_dir=*)
LOG_DIR="${1#*=}"
;;
*) ;;
esac
shift
done

if [[ -z "$DATA_PATH" || -z "$MODEL_PATH" || -z "$LOG_DIR" ]]; then
echo "Error: --data_path and --log_dir were not provided by MLCube." >&2
exit 1
fi

echo "Data Path: $DATA_PATH"
echo "Model Path: $MODEL_PATH"
echo "Log Directory: $LOG_DIR"
echo "--------------------------"

export DATAROOT="$DATA_PATH"
export MODELROOT="$MODEL_PATH"
export LOGDIR="$LOG_DIR"
export NGPU=1
export CONFIG_FILE="torchtitan/torchtitan/experiments/flux/train_configs/flux_schnell_mlperf_preprocessed.toml"

echo "Running training with the following environment:"
echo "DATAROOT=$DATAROOT"
echo "MODELROOT=$MODELROOT"
echo "LOGDIR=$LOGDIR"
echo "NGPU=$NGPU"
echo "CONFIG_FILE=$CONFIG_FILE"
echo "--------------------------"

ln -s $DATAROOT /dataset
ln -s $MODELROOT /models

bash torchtitan/torchtitan/experiments/flux/run_train.sh \
--training.steps=10 \
--training.batch_size=1 \
--training.seq_len=2 \
--eval.eval_freq=5