From a6c53e0e1097e3c2b4f2db72cd5c68082930a2cc Mon Sep 17 00:00:00 2001 From: JingyaHuang Date: Thu, 12 Oct 2023 15:21:01 +0000 Subject: [PATCH] update trainer and its args to main --- optimum/onnxruntime/trainer.py | 5 +---- optimum/onnxruntime/training_args.py | 20 +++++++++++--------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/optimum/onnxruntime/trainer.py b/optimum/onnxruntime/trainer.py index 6e76f37af00..afc90e405bb 100644 --- a/optimum/onnxruntime/trainer.py +++ b/optimum/onnxruntime/trainer.py @@ -213,17 +213,16 @@ class ORTTrainer(Trainer): def __init__( self, model: Union[PreTrainedModel, nn.Module] = None, - tokenizer: Optional[PreTrainedTokenizerBase] = None, args: ORTTrainingArguments = None, data_collator: Optional[DataCollator] = None, train_dataset: Optional[Dataset] = None, eval_dataset: Optional[Union[Dataset, Dict[str, Dataset]]] = None, + tokenizer: Optional[PreTrainedTokenizerBase] = None, model_init: Optional[Callable[[], PreTrainedModel]] = None, compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None, callbacks: Optional[List[TrainerCallback]] = None, optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None), preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None, - onnx_model_path: Union[str, os.PathLike] = None, ): super().__init__( model=model, @@ -249,8 +248,6 @@ def __init__( self.model = model - self.onnx_model_path = onnx_model_path - self.exported_with_loss = False if self.args.local_rank: torch.cuda.set_device(self.args.local_rank) diff --git a/optimum/onnxruntime/training_args.py b/optimum/onnxruntime/training_args.py index e3311f7754e..a0cb7c8e983 100644 --- a/optimum/onnxruntime/training_args.py +++ b/optimum/onnxruntime/training_args.py @@ -139,8 +139,9 @@ def __post_init__(self): if self.load_best_model_at_end: if self.evaluation_strategy != self.save_strategy: raise ValueError( - "--load_best_model_at_end requires the save and eval strategy to match, but found\n- Evaluation " - f"strategy: {self.evaluation_strategy}\n- Save strategy: {self.save_strategy}" + "--load_best_model_at_end requires the saving steps to be a multiple of the evaluation " + "steps, which cannot get guaranteed when mixing ratio and absolute steps for save_steps " + f"{self.save_steps} and eval_steps {self.eval_steps}." ) if self.evaluation_strategy == IntervalStrategy.STEPS and self.save_steps % self.eval_steps != 0: if self.eval_steps < 1 or self.save_steps < 1: @@ -191,14 +192,15 @@ def __post_init__(self): self.half_precision_backend = self.fp16_backend if self.bf16 or self.bf16_full_eval: - if self.no_cuda and not is_torch_bf16_cpu_available(): + if self.use_cpu and not is_torch_bf16_cpu_available(): # cpu raise ValueError("Your setup doesn't support bf16/(cpu, tpu, neuroncore). You need torch>=1.10") - elif not self.no_cuda and torch.cuda.is_available() and not is_torch_bf16_gpu_available(): - # gpu - raise ValueError( - "Your setup doesn't support bf16/gpu. You need torch>=1.10, using Ampere GPU with cuda>=11.0" - ) + elif not self.use_cpu: + if torch.cuda.is_available() and not is_torch_bf16_gpu_available(): + # gpu + raise ValueError( + "Your setup doesn't support bf16/gpu. You need torch>=1.10, using Ampere GPU with cuda>=11.0" + ) if self.fp16 and self.bf16: raise ValueError("At most one of fp16 and bf16 can be True, but not both") @@ -307,7 +309,7 @@ def __post_init__(self): # no need to assert on else # if training args is specified, it will override the one specified in the accelerate config - if self.half_precision_backend != "apex" and len(self.sharded_ddp) == 0: + if self.half_precision_backend != "apex": mixed_precision_dtype = os.environ.get("ACCELERATE_MIXED_PRECISION", "no") if self.fp16: mixed_precision_dtype = "fp16"