mlcommons · davidjurado · Oct 3, 2025 · Oct 3, 2025 · Oct 3, 2025
@@ -0,0 +1 @@
+mlcube/workspace/
@@ -0,0 +1 @@
+workspace
@@ -0,0 +1,19 @@
+FROM pytorch/pytorch:2.7.1-cuda12.8-cudnn9-devel
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    git vim curl unzip \
+    && rm -rf /var/lib/apt/lists/* \
+    && apt-get clean
+
+WORKDIR /workspace/flux
+
+COPY . .
+
+RUN pip install --no-cache-dir \
+    huggingface-hub \ 
+    -r torchtitan/requirements.txt \
+    -r torchtitan/torchtitan/experiments/flux/requirements-flux.txt \
+    -r torchtitan/requirements-mlperf.txt
+
+RUN pip install -e torchtitan/
+
@@ -0,0 +1,29 @@
+name: Flux
+description: Flux text to image
+authors:
+  - { name: "MLCommons Best Practices Working Group" }
+
+platform:
+  accelerator_count: 1
+
+docker:
+  image: mlcommons/flux_benchmark:0.0.1
+  build_context: ".."
+  build_file: "mlcube/Dockerfile"
+  gpu_args: "--gpus=all -e HUGGING_FACE_HUB_TOKEN"
+
+tasks:
+  download_demo:
+    entrypoint: ./scripts/download_demo.sh -a
+    parameters:
+      outputs:
+        data_path: demo_data/
+        model_path: models/
+  demo:
+      entrypoint: ./scripts/run_demo.sh -a
+      parameters:
+        inputs:
+          data_path: demo_data/data/
+          model_path: models/
+        outputs:
+          log_dir: demo_logs/
@@ -0,0 +1,72 @@
+# MLCube for Flux.1-schnell
+
+MLCube™ GitHub [repository](https://github.com/mlcommons/mlcube). MLCube™ [wiki](https://mlcommons.github.io/mlcube/).
+
+## Project setup
+
+An important requirement is that you must have Docker installed.
+
+```bash
+# Create Python environment and install MLCube Docker runner 
+virtualenv -p python3 ./env && source ./env/bin/activate && pip install pip==24.0 && pip install mlcube-docker
+# Fetch the implementation from GitHub
+git clone https://github.com/mlcommons/training && cd ./training
+git fetch origin pull/839/head:feature/mlcube_flux && git checkout feature/mlcube_flux
+cd ./text_to_image/mlcube
+```
+
+Inside the mlcube directory run the following command to check implemented tasks.
+
+```shell
+mlcube describe
+```
+
+###  Extra requirements
+
+You need to download the `torchtitan` git submodule:
+
+```shell
+git submodule update --init --recursive
+```
+
+You also need accept the license for the [FLUX schnell model](https://huggingface.co/black-forest-labs/FLUX.1-schnell) on Hugginface.
+
+Finally, to be able to download all the models you will need to get a token from [Hugginface](https://huggingface.co/settings/tokens).
+
+**Note**: Make sure that when creating the token you select:
+
+* Read access to contents of all public gated repos you can access
+
+After that you can set a new enviroment variable, like this:
+
+```shell
+export HUGGING_FACE_HUB_TOKEN="YOUR_TOKEN"
+```
+
+### MLCube tasks
+
+* Demo tasks:
+
+Download demo dataset and models.
+
+```shell
+mlcube run --task=download_demo -Pdocker.build_strategy=always
+```
+
+Train demo.
+
+```shell
+mlcube run --task=demo -Pdocker.build_strategy=always
+```
+
+### Execute the complete pipeline
+
+You can execute the complete pipeline with one single command.
+
+* Demo pipeline:
+
+```shell
+mlcube run --task=download_demo,demo -Pdocker.build_strategy=always
+```
+
+**Note**: To rebuild the image use the flag: `-Pdocker.build_strategy=always` during the `mlcube run` command.
@@ -0,0 +1,61 @@
+#!/bin/bash
+set -e
+
+DATA_PATH="./dataset"
+MODEL_PATH="./models"
+
+# Capture MLCube parameter
+while [ $# -gt 0 ]; do
+  case "$1" in
+  --data_path=*)
+    DATA_PATH="${1#*=}"
+    ;;
+  --model_path=*)
+    MODEL_PATH="${1#*=}"
+    ;;
+  *) ;;
+  esac
+  shift
+done
+
+if ! command -v huggingface-cli &> /dev/null; then
+    echo "Error: huggingface-cli is not installed. Please add 'huggingface-hub' to your pip requirements." >&2
+    exit 1
+fi
+
+
+echo "--- Preparing Directories ---"
+mkdir -p "$DATA_PATH"
+cd "$DATA_PATH"
+echo "Working directory: $(pwd)"
+
+
+echo "--- Downloading and unzipping dataset ---"
+curl -O https://storage.googleapis.com/mlperf_training_demo/flux/flux_minified_data.zip
+unzip -o -q flux_minified_data.zip
+rm flux_minified_data.zip
+echo "Dataset downloaded successfully."
+
+
+mkdir -p "$MODEL_PATH"
+echo "--- Downloading models to ${MODEL_PATH} directory ---"
+echo HUGGING_FACE_HUB_TOKEN $HUGGING_FACE_HUB_TOKEN
+
+echo "Downloading FLUX.1-schnell autoencoder..."
+huggingface-cli download black-forest-labs/FLUX.1-schnell ae.safetensors \
+    --local-dir "${MODEL_PATH}/autoencoder" \
+    --local-dir-use-symlinks False
+
+echo "Downloading T5-v1_1-xxl text encoder..."
+huggingface-cli download google/t5-v1_1-xxl \
+    --local-dir "${MODEL_PATH}/t5" \
+    --exclude "tf_model.h5" \
+    --local-dir-use-symlinks False
+
+echo "Downloading CLIP-vit-large-patch14 image encoder..."
+huggingface-cli download openai/clip-vit-large-patch14 \
+    --local-dir "${MODEL_PATH}/clip" \
+    --exclude "*.safetensors,*.msgpack,tf_model.h5" \
+    --local-dir-use-symlinks False
+
+echo "--- All downloads completed successfully! ---"
@@ -0,0 +1,56 @@
+#!/bin/bash
+set -e
+
+DATA_PATH=""
+MODEL_PATH=""
+LOG_DIR=""
+
+# Capture MLCube parameter
+while [ $# -gt 0 ]; do
+  case "$1" in
+  --data_path=*)
+    DATA_PATH="${1#*=}"
+    ;;
+  --model_path=*)
+    MODEL_PATH="${1#*=}"
+    ;;
+  --log_dir=*)
+    LOG_DIR="${1#*=}"
+    ;;
+  *) ;;
+  esac
+  shift
+done
+
+if [[ -z "$DATA_PATH" || -z "$MODEL_PATH" || -z "$LOG_DIR" ]]; then
+  echo "Error: --data_path and --log_dir were not provided by MLCube." >&2
+  exit 1
+fi
+
+echo "Data Path: $DATA_PATH"
+echo "Model Path: $MODEL_PATH"
+echo "Log Directory: $LOG_DIR"
+echo "--------------------------"
+
+export DATAROOT="$DATA_PATH"
+export MODELROOT="$MODEL_PATH"
+export LOGDIR="$LOG_DIR"
+export NGPU=1
+export CONFIG_FILE="torchtitan/torchtitan/experiments/flux/train_configs/flux_schnell_mlperf_preprocessed.toml"
+
+echo "Running training with the following environment:"
+echo "DATAROOT=$DATAROOT"
+echo "MODELROOT=$MODELROOT"
+echo "LOGDIR=$LOGDIR"
+echo "NGPU=$NGPU"
+echo "CONFIG_FILE=$CONFIG_FILE"
+echo "--------------------------"
+
+ln -s $DATAROOT /dataset
+ln -s $MODELROOT /models
+
+bash torchtitan/torchtitan/experiments/flux/run_train.sh \
+  --training.steps=10 \
+  --training.batch_size=1 \
+  --training.seq_len=2 \
+  --eval.eval_freq=5