updated requirements file and train script

statscol · May 11, 2022 · 9f81466 · 9f81466
1 parent ab4f8be
commit 9f81466
Show file tree

Hide file tree

Showing 2 changed files with 17 additions and 4 deletions.
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,8 @@
+ipywidgets
+datasets
+torch
+transformers
+huggingface_hub
+jiwer
+torchaudio -f https://download.pytorch.org/whl/torch_stable.html
+wandb
diff --git a/train.py b/train.py
@@ -26,6 +26,8 @@ def prepare_dataset(batch):
     return batch
 
 
+
+
 ##apply it for every audio
 
 @dataclass
@@ -135,19 +137,21 @@ def train_model(tr:float,tst:float):
     training_args = TrainingArguments(
     output_dir=REPO_OUT,
     group_by_length=True,
-    per_device_train_batch_size=12,
+    per_device_train_batch_size=18,
     gradient_accumulation_steps=2,
     evaluation_strategy="steps",
-    num_train_epochs=10,
-    gradient_checkpointing=True,
+    num_train_epochs=20,
     fp16=True,
+    gradient_checkpointing=True,
     save_steps=800,
     eval_steps=400,
     logging_steps=400,
     learning_rate=2e-4,
     warmup_steps=300,
     save_total_limit=30,
     push_to_hub=True,
+    report_to="wandb",
+    run_name="wav2vec-large-noLM"
     )
 
     trainer = Trainer(
@@ -162,9 +166,10 @@ def train_model(tr:float,tst:float):
 
     ###
     trainer.train()
-    trainer.push_to_hub()
 
 if __name__=='__main__':
+    import wandb
+    wandb.init(project="wav2vec-spanish")
 
     parser = argparse.ArgumentParser(description = 'ASR Parser')
     parser.add_argument('-tr',type=float,help="train sample ratio",dest="tr_size")