hiyouga · snova-supasani · Oct 18, 2024 · Oct 18, 2024 · Oct 18, 2024 · Oct 18, 2024
diff --git a/data/dataset_info.json b/data/dataset_info.json
@@ -624,5 +624,33 @@
       "prompt": "content"
     },
     "folder": "python"
+  },
+  "llava-med-train": {
+    "hf_hub_url": "Shubhangi29/llava_med_instruct_60k_inline_mention_filtered",
+    "formatting": "sharegpt",
+    "columns": {
+      "messages": "conversations",
+      "images": "image"
+    },
+    "tags": {
+      "role_tag": "from",
+      "content_tag": "value",
+      "user_tag": "human",
+      "assistant_tag": "gpt"
+    }
+  },
+  "llava-med-val": {
+  "hf_hub_url": "Shubhangi29/llava_med_instruct_60k_inline_mention_filtered",
+  "formatting": "sharegpt",
+  "columns": {
+   "messages": "conversations",
+   "images": "image"
+  },
+  "tags": {
+   "role_tag": "from",
+   "content_tag": "value",
+   "user_tag": "human",
+   "assistant_tag": "gpt"
   }
-}
+ }
+}
diff --git a/examples/deepspeed/ds_z3_offload_config.json b/examples/deepspeed/ds_z3_offload_config.json
@@ -28,11 +28,11 @@
     "overlap_comm": true,
     "contiguous_gradients": true,
     "sub_group_size": 1e9,
-    "reduce_bucket_size": "auto",
-    "stage3_prefetch_bucket_size": "auto",
-    "stage3_param_persistence_threshold": "auto",
+    "reduce_bucket_size": 1e9,
+    "stage3_prefetch_bucket_size": 1e9,
+    "stage3_param_persistence_threshold": 1e5,
     "stage3_max_live_parameters": 1e9,
     "stage3_max_reuse_distance": 1e9,
     "stage3_gather_16bit_weights_on_model_save": true
   }
-}
+}
diff --git a/examples/deepspeed/jonathan.json b/examples/deepspeed/jonathan.json
@@ -0,0 +1,49 @@
+{
+    "zero_optimization": {
+      "stage": 3,
+      "overlap_comm": true,
+      "contiguous_gradients": true,
+      "sub_group_size": 1e9,
+      "reduce_bucket_size": 1e9,
+      "stage3_prefetch_bucket_size": 1e9,
+      "stage3_param_persistence_threshold": 1e5,
+      "stage3_max_live_parameters": 1e9,
+      "stage3_max_reuse_distance": 1e9,
+      "stage3_gather_16bit_weights_on_model_save": true
+    },
+    "fp16": {
+      "enabled": "auto",
+      "auto_cast": true,
+      "loss_scale": 0,
+      "initial_scale_power": 32,
+      "loss_scale_window": 1000,
+      "hysteresis": 2,
+      "min_loss_scale": 1
+    },
+    "bf16": {
+      "enabled": "auto"
+    },
+    "optimizer": {
+      "type": "AdamW",
+      "params": {
+        "lr": "auto",
+        "betas": [
+          0.9,
+          0.999
+        ],
+        "eps": 1e-8,
+        "weight_decay": "auto"
+      }
+    },
+    "gradient_accumulation_steps": "auto",
+    "gradient_clipping": "auto",
+    "steps_per_print": 2000,
+    "train_batch_size": "auto",
+    "train_micro_batch_size_per_gpu": "auto",
+    "wall_clock_breakdown": true
+}
+
+
+
+
+
diff --git a/examples/train_full/llama_finetuning_on_llava_med.yaml b/examples/train_full/llama_finetuning_on_llava_med.yaml
@@ -0,0 +1,43 @@
+### model
+model_name_or_path: /import/ml-sc-nlpcheckpoints-scratch3/jonathanl/generic_checkpoints/llama3.2/Llama-3.2-11B-Vision-Instruct
+
+### method
+stage: sft
+do_train: true
+finetuning_type: full
+deepspeed: /import/ml-sc-scratch4/shubhangiu/LLaMA-Factory/examples/deepspeed/ds_z3_offload_config.json
+
+### dataset
+dataset: llava-med-train  
+template: llama3_vl
+cutoff_len: 1024
+overwrite_cache: true
+preprocessing_num_workers: 4
+
+### eval
+eval_dataset: llava-med-val
+per_device_eval_batch_size: 1
+eval_strategy: steps
+eval_steps: 100
+
+### output
+output_dir: saves/llama-3.2-11b/full/sft
+logging_steps: 1
+save_steps: 5000
+plot_loss: true
+overwrite_output_dir: true
+
+### train
+per_device_train_batch_size: 2
+gradient_accumulation_steps: 16
+learning_rate: 2.0e-5
+num_train_epochs: 3
+lr_scheduler_type: cosine
+warmup_ratio: 0.03
+weight_decay: 0.
+bf16: true
+ddp_timeout: 180000000
+
+### report 
+report_to: wandb
+run_name: test_run
diff --git a/src/llamafactory/data/aligner.py b/src/llamafactory/data/aligner.py
@@ -42,7 +42,7 @@ def _convert_images(
     """
     if len(images) == 0:
         return None
-
+   
     images = images[:]
     if dataset_attr.load_from in ["script", "file"]:
         for i in range(len(images)):
@@ -207,15 +207,19 @@ def convert_sharegpt(
     if broken_data:
         logger.warning("Skipping this abnormal example.")
         prompt, response = [], []
-
+    
     convert_images = partial(_convert_images, dataset_attr=dataset_attr, data_args=data_args)
     convert_videos = partial(_convert_videos, dataset_attr=dataset_attr, data_args=data_args)
+
+    if dataset_attr.images and not isinstance(example[dataset_attr.images], List):
+        _images = [example[dataset_attr.images]]
+
     output = {
         "_prompt": prompt,
         "_response": response,
         "_system": system,
         "_tools": example[dataset_attr.tools] if dataset_attr.tools else "",
-        "_images": convert_images(example[dataset_attr.images]) if dataset_attr.images else None,
+        "_images": convert_images(_images) if dataset_attr.images else None,
         "_videos": convert_videos(example[dataset_attr.videos]) if dataset_attr.videos else None,
     }
     return output

diff --git a/src/llamafactory/data/loader.py b/src/llamafactory/data/loader.py
@@ -166,7 +166,6 @@ def _get_merged_dataset(
     """
     if dataset_names is None:
         return None
-
     datasets = []
     for dataset_attr in get_dataset_list(dataset_names, data_args.dataset_dir):
         if (stage == "rm" and dataset_attr.ranking is False) or (stage != "rm" and dataset_attr.ranking is True):
@@ -244,7 +243,6 @@ def get_dataset(
             logger.warning("Loading dataset from disk will ignore other data arguments.")
             dataset_dict: "DatasetDict" = load_from_disk(data_args.tokenized_path)
             logger.info("Loaded tokenized dataset from {}.".format(data_args.tokenized_path))
-
             dataset_module: Dict[str, "Dataset"] = {}
             if "train" in dataset_dict:
                 dataset_module["train_dataset"] = dataset_dict["train"]