diff --git a/megatron/neox_arguments/neox_args.py b/megatron/neox_arguments/neox_args.py
index fe02d4a45..1ac062e4d 100644
--- a/megatron/neox_arguments/neox_args.py
+++ b/megatron/neox_arguments/neox_args.py
@@ -723,16 +723,7 @@ class NeoXArgsTokenizer(NeoXArgsTemplate):
     """
     tokenizer object loaded into memory and accessible by other functions
     """
-    
-    finetune_groups_key_words = ['image_prefix']
-    """
-    parameter will be putted into fintune groups as long as its name contain one of these keywords
-    """
 
-    finetune_factor = 1/400
-    """
-    control the learning rate of fintuned groups, whose real_lr=lr*finetune_factor
-    """
 
 @dataclass
 class NeoXArgsTraining(NeoXArgsTemplate):
@@ -1141,6 +1132,16 @@ class NeoXArgsTraining(NeoXArgsTemplate):
     Resample dataset with different weights for each dataset. eg 1::1 , 1::2
     """ 
 
+    finetune_groups_key_words = ['image_prefix']
+    """
+    parameter will be putted into fintune groups as long as its name contain one of these keywords
+    """
+
+    finetune_factor = 1/400
+    """
+    control the learning rate of fintuned groups, whose real_lr=lr*finetune_factor
+    """
+
 @dataclass
 class NeoXArgsTextgen(NeoXArgsTemplate):
     """