Project-MONAI · yiheng-wang-nv · Nov 6, 2024 · Nov 6, 2024
diff --git a/models/vista3d/configs/metadata.json b/models/vista3d/configs/metadata.json
@@ -1,7 +1,8 @@
 {
     "schema": "https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/meta_schema_20240725.json",
-    "version": "0.5.5",
+    "version": "0.5.6",
     "changelog": {
+        "0.5.6": "add mlflow support",
         "0.5.5": "add arg for trt compiler base path",
         "0.5.4": "add undefined label prompt check",
         "0.5.3": "update readme",
@@ -27,7 +28,8 @@
         "scikit-image": "0.23.2",
         "nibabel": "5.2.1",
         "pytorch-ignite": "0.4.11",
-        "cucim-cu12": "24.6.0"
+        "cucim-cu12": "24.6.0",
+        "mlflow": "2.17.2"
     },
     "supported_apps": {
         "vista3d-nim": ""

diff --git a/models/vista3d/configs/train.json b/models/vista3d/configs/train.json
@@ -15,6 +15,8 @@
     "finetune": false,
     "finetune_model_path": "$@bundle_root + '/models/model.pt'",
     "early_stop": false,
+    "use_mlflow": false,
+    "mlflow_dir": "$@bundle_root + '/mlruns'",
     "fold": 0,
     "device": "$torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')",
     "epochs": 5,
@@ -248,6 +250,12 @@
                 "tag_name": "train_loss",
                 "name": "StatsHandler",
                 "output_transform": "$monai.handlers.from_engine(['loss'], first=True)"
+            },
+            {
+                "_target_": "MLFlowHandler",
+                "_disabled_": "$not @use_mlflow",
+                "tracking_uri": "$os.path.abspath(@mlflow_dir)",
+                "output_transform": "$monai.handlers.from_engine(['loss'], first=True)"
             }
         ],
         "key_metric": {
@@ -343,6 +351,12 @@
                 },
                 "save_key_metric": true,
                 "key_metric_filename": "model.pt"
+            },
+            {
+                "_target_": "MLFlowHandler",
+                "_disabled_": "$not @use_mlflow",
+                "iteration_log": false,
+                "tracking_uri": "$os.path.abspath(@mlflow_dir)"
             }
         ],
         "key_metric": {

diff --git a/models/vista3d/docs/README.md b/models/vista3d/docs/README.md
@@ -133,6 +133,24 @@ torchrun --nnodes=1 --nproc_per_node=8 -m monai.bundle run \
 	--config_file="['configs/train.json','configs/train_continual.json','configs/multi_gpu_train.json']" --epochs=320 --learning_rate=0.00005
 ```
 
+### MLFlow support
+
+MLflow can be enabled to track and manage your machine learning experiments. To enable MLflow, set the `use_mlflow` parameter to `True`. Below is an example of how to run a single-GPU training command with MLflow enabled:
+
+```bash
+python -m monai.bundle run \
+	--config_file="['configs/train.json','configs/train_continual.json']" --epochs=320 --learning_rate=0.00005 --use_mlflow True
+```
+
+By default, the data of MLflow is stored in the `mlruns/` folder under the bundle's root directory. To launch the MLflow UI and track your experiment data, follow these steps:
+
+1. Open a terminal and navigate to the root directory of your bundle where the `mlruns/` folder is located.
+
+2. Execute the following command to start the MLflow server. This will make the MLflow UI accessible.
+
+```Bash
+mlflow ui
+```
 
 ## Evaluation
 Evaluation can be used to calculate dice scores for the model or a finetuned model. Change the `ckpt_path` to the checkpoint you wish to evaluate. The dice score is calculated on the original image spacing using `invertd`, while the dice score during finetuning is calculated on resampled space.