ManifoldRG · bhavul · Apr 20, 2024 · Nov 16, 2023 · Feb 13, 2024 · Feb 28, 2024
diff --git a/README.md b/README.md
@@ -42,7 +42,6 @@ python ./gato/data/download_custom_datasets.py
 ```bash
 docker build -t gato-control -f ./docker/Dockerfile .
 docker run -it --mount "type=bind,source=$(pwd),target=/app/gato-control" --entrypoint /bin/bash --gpus=all gato-control
-
 ```
 
 

diff --git a/gato/tasks/control_task.py b/gato/tasks/control_task.py
@@ -110,13 +110,21 @@ def evaluate(self, model: GatoPolicy, n_iterations, deterministic=True, promptle
         ep_lens = []
         metrics = {}
 
-        context_timesteps = model.context_len // self.tokens_per_timestep # amount of timesteps that fit into context
+        context_timesteps = model.module.context_len // self.tokens_per_timestep # amount of timesteps that fit into context
 
         for i in range(n_iterations):
             observation, info = self.env.reset()
 
             # sample prompt
-            input_dict = self.sample_batch_configurable(batch_size=1, device=model.device, prompt_proportions=[1.], prompt_types = ['end'], max_tokens = model.context_len, share_prompt_episodes=True,ep_ids=self.top_ids)[0]
+            input_dict = self.sample_batch_configurable(
+                batch_size=1,
+                device=model.device,
+                prompt_proportions=[1.],
+                prompt_types=['end'],
+                max_tokens=model.module.context_len,
+                share_prompt_episodes=True,
+                ep_ids=self.top_ids
+            )[0]
 
             # infer dtypes
             action_type = input_dict[self.action_str].dtype
@@ -145,7 +153,7 @@ def evaluate(self, model: GatoPolicy, n_iterations, deterministic=True, promptle
                 # trim to context length
                 input_dict[self.obs_str] = input_dict[self.obs_str][-context_timesteps:,]
                 input_dict[self.action_str] = input_dict[self.action_str][-context_timesteps:,]
-                action = model.predict_control(input_dict, task=self, deterministic=deterministic)
+                action = model.module.predict_control(input_dict, task=self, deterministic=deterministic)
                 input_dict[self.action_str][-1,] = action
                 np_action = action.cpu().numpy()
                 observation, reward, terminated, truncated, info = self.env.step(np_action)

diff --git a/gato/tasks/text_task.py b/gato/tasks/text_task.py
@@ -60,7 +60,7 @@ def sample_batch(self, batch_size, is_test=False)->List[Dict]:
         return batch_dicts
 
     def evaluate(self, model: GatoPolicy, num_examples_to_test=50, deterministic=True, log_examples_to_output=False):
-        tokenizer = model.text_tokenizer
+        tokenizer = model.module.text_tokenizer
         loss_fn = nn.CrossEntropyLoss()
         total_loss = 0
         total_tokens = 0
@@ -89,7 +89,7 @@ def evaluate(self, model: GatoPolicy, num_examples_to_test=50, deterministic=Tru
             new_batch_dict['text'] = input_tokens
 
             # Generate prediction
-            pred_logits, pred_tokens = model.predict_text(new_batch_dict, max_length=len(target_tokens), deterministic=deterministic)
+            pred_logits, pred_tokens = model.module.predict_text(new_batch_dict, max_length=len(target_tokens), deterministic=deterministic)
             # todo: pull 50 into a CLI argument in train.py
             if log_examples_to_output and idx%50==0:
                 print(f'Text Example : {tokenizer.decode(batch_dict["text"])} \n Input passed to model : {tokenizer.decode(new_batch_dict["text"])} \n Predicted output : {tokenizer.decode(pred_tokens)}')

diff --git a/gato/training/trainer.py b/gato/training/trainer.py
@@ -45,8 +45,7 @@ def train(self):
         iters = self.args.training_steps // self.args.log_eval_freq
         for i in range(iters):
             logs = self.train_iteration(self.args.log_eval_freq, i)
-            if self.args.use_wandb and self.accelerator.is_main_process:
-                wandb.log(logs)
+            self.accelerator.log(logs)
 
         ## Save model at end of training only if not saving checkpoints
         if self.args.save_model and self.args.save_mode == 'last':
@@ -55,6 +54,8 @@ def train(self):
                 unwrapped_model = self.accelerator.unwrap_model(self.model)
                 save_model(unwrapped_model, self.exp_dir, f'checkpoint_{self.steps}', self.args)
 
+        self.accelerator.end_training()
+
 
     def train_iteration(self, num_steps, iter):
         logs = {}

diff --git a/train.py b/train.py
@@ -23,7 +23,18 @@
 
 def main(args):
     ddp_kwargs = DistributedDataParallelKwargs(find_unused_parameters=True)
-    accelerator = Accelerator(cpu=args.cpu, mixed_precision=args.mixed_precision, split_batches=True, gradient_accumulation_steps=args.gradient_accumulation_steps, kwargs_handlers=[ddp_kwargs])
+    if args.use_wandb:
+        log_with = 'wandb'
+    else:
+        log_with = None
+    accelerator = Accelerator(
+        cpu=args.cpu,
+        mixed_precision=args.mixed_precision,
+        split_batches=True,
+        gradient_accumulation_steps=args.gradient_accumulation_steps,
+        kwargs_handlers=[ddp_kwargs],
+        log_with=log_with,
+    )
     args.device = accelerator.device.type
 
     exp_date = datetime.now().strftime('%y-%m-%d_%H-%M-%S')
@@ -126,11 +137,9 @@ def main(args):
     optimizer, scheduler = accelerator.prepare(optimizer, scheduler)
 
     if args.use_wandb:
-        wandb.init(
-            name = exp_name,
-            project=args.wandb_project,
-            config=args,
-        )
+        accelerator.init_trackers(args.wandb_project, init_kwargs={'wandb': {'name': exp_name, 'config': args}})
+    else:
+        accelerator.init_trackers('')
 
     # Create save dir if does not exist
     if args.save_model and not os.path.exists(args.save_dir):
-Original file line number
+Diff line change
@@ Expand Up / @@ -42,7 +42,6 @@ python ./gato/data/download_custom_datasets.py @@
     ```bash
     docker build -t gato-control -f ./docker/Dockerfile .
     docker run -it --mount "type=bind,source=$(pwd),target=/app/gato-control" --entrypoint /bin/bash --gpus=all gato-control
     ```
@@ Expand Down @@