diff --git a/examples/onnxruntime/training/docker/Dockerfile-ort-nightly-cu118 b/examples/onnxruntime/training/docker/Dockerfile-ort-nightly-cu118
index 668e5e56695..3e6841453b5 100644
--- a/examples/onnxruntime/training/docker/Dockerfile-ort-nightly-cu118
+++ b/examples/onnxruntime/training/docker/Dockerfile-ort-nightly-cu118
@@ -22,6 +22,7 @@ CMD nvidia-smi
ENV DEBIAN_FRONTEND noninteractive
# Versions
+# available options 3.8, 3.9, 3.10, 3.11
ARG PYTHON_VERSION=3.9
ARG TORCH_CUDA_VERSION=cu118
ARG TORCH_VERSION=2.0.0
@@ -34,7 +35,7 @@ SHELL ["/bin/bash", "-c"]
# Install and update tools to minimize security vulnerabilities
RUN apt-get update
RUN apt-get install -y software-properties-common wget apt-utils patchelf git libprotobuf-dev protobuf-compiler cmake \
- bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev && \
+ bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev ffmpeg && \
apt-get clean
RUN unattended-upgrade
RUN apt-get autoremove -y
diff --git a/examples/onnxruntime/training/docker/Dockerfile-ort1.14.1-cu116 b/examples/onnxruntime/training/docker/Dockerfile-ort1.14.1-cu116
index db2219b5c62..15df7c352fe 100644
--- a/examples/onnxruntime/training/docker/Dockerfile-ort1.14.1-cu116
+++ b/examples/onnxruntime/training/docker/Dockerfile-ort1.14.1-cu116
@@ -33,7 +33,7 @@ ARG TORCHVISION_VERSION=0.14.1
# Install and update tools to minimize security vulnerabilities
RUN apt-get update
RUN apt-get install -y software-properties-common wget apt-utils patchelf git libprotobuf-dev protobuf-compiler cmake \
- bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev && \
+ bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev ffmpeg && \
apt-get clean
RUN unattended-upgrade
RUN apt-get autoremove -y
diff --git a/examples/onnxruntime/training/docker/Dockerfile-ort1.15.1-cu118 b/examples/onnxruntime/training/docker/Dockerfile-ort1.15.1-cu118
index 51c9ec514c4..2d1306e1a35 100644
--- a/examples/onnxruntime/training/docker/Dockerfile-ort1.15.1-cu118
+++ b/examples/onnxruntime/training/docker/Dockerfile-ort1.15.1-cu118
@@ -34,7 +34,7 @@ ARG TORCHVISION_VERSION=0.15.1
# Install and update tools to minimize security vulnerabilities
RUN apt-get update
RUN apt-get install -y software-properties-common wget apt-utils patchelf git libprotobuf-dev protobuf-compiler cmake \
- bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev && \
+ bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev ffmpeg && \
apt-get clean
RUN unattended-upgrade
RUN apt-get autoremove -y
diff --git a/examples/onnxruntime/training/docker/Dockerfile-ort1.16.1-cu118 b/examples/onnxruntime/training/docker/Dockerfile-ort1.16.1-cu118
index 3f6b8335923..482d495fcb4 100644
--- a/examples/onnxruntime/training/docker/Dockerfile-ort1.16.1-cu118
+++ b/examples/onnxruntime/training/docker/Dockerfile-ort1.16.1-cu118
@@ -34,7 +34,7 @@ SHELL ["/bin/bash", "-c"]
# Install and update tools to minimize security vulnerabilities
RUN apt-get update
RUN apt-get install -y software-properties-common wget apt-utils patchelf git libprotobuf-dev protobuf-compiler cmake \
- bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev && \
+ bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev ffmpeg && \
apt-get clean
RUN unattended-upgrade
RUN apt-get autoremove -y
diff --git a/examples/onnxruntime/training/image-classification/run_image_classification.py b/examples/onnxruntime/training/image-classification/run_image_classification.py
index 837cb57a4bb..ec8de0b52da 100644
--- a/examples/onnxruntime/training/image-classification/run_image_classification.py
+++ b/examples/onnxruntime/training/image-classification/run_image_classification.py
@@ -16,6 +16,7 @@
import logging
import os
import sys
+import warnings
from dataclasses import dataclass, field
from typing import Optional
@@ -54,7 +55,7 @@
logger = logging.getLogger(__name__)
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.26.0")
+check_min_version("4.34.0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/image-classification/requirements.txt")
@@ -141,12 +142,28 @@ class ModelArguments:
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
)
image_processor_name: str = field(default=None, metadata={"help": "Name or path of preprocessor config."})
+ token: str = field(
+ default=None,
+ metadata={
+ "help": (
+ "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
+ "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
+ )
+ },
+ )
use_auth_token: bool = field(
+ default=None,
+ metadata={
+ "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
+ },
+ )
+ trust_remote_code: bool = field(
default=False,
metadata={
"help": (
- "Will use the token generated when running `huggingface-cli login` (necessary to use this script "
- "with private models)."
+ "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
+ "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
+ "execute code present on the Hub on your local machine."
)
},
)
@@ -162,32 +179,24 @@ def collate_fn(examples):
return {"pixel_values": pixel_values, "labels": labels}
-@dataclass
-class InferenceArguments:
- """
- Arguments for inference(evaluate, predict).
- """
-
- inference_with_ort: bool = field(
- default=False,
- metadata={"help": "Whether use ONNX Runtime as backend for inference. Default set to false."},
- )
-
-
def main():
# See all possible arguments in src/transformers/training_args.py
# or by passing the --help flag to this script.
# We now keep distinct sets of args, for a cleaner separation of concerns.
- parser = HfArgumentParser((ModelArguments, DataTrainingArguments, ORTTrainingArguments, InferenceArguments))
+ parser = HfArgumentParser((ModelArguments, DataTrainingArguments, ORTTrainingArguments))
if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
# If we pass only one argument to the script and it's the path to a json file,
# let's parse it to get our arguments.
- model_args, data_args, training_args, inference_args = parser.parse_json_file(
- json_file=os.path.abspath(sys.argv[1])
- )
+ model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
else:
- model_args, data_args, training_args, inference_args = parser.parse_args_into_dataclasses()
+ model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+ if model_args.use_auth_token is not None:
+ warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
+ if model_args.token is not None:
+ raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
+ model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions.
@@ -200,6 +209,10 @@ def main():
handlers=[logging.StreamHandler(sys.stdout)],
)
+ if training_args.should_log:
+ # The default of training_args.log_level is passive, so we set log level at info here to have that default.
+ transformers.utils.logging.set_verbosity_info()
+
log_level = training_args.get_process_log_level()
logger.setLevel(log_level)
transformers.utils.logging.set_verbosity(log_level)
@@ -209,7 +222,7 @@ def main():
# Log on each process the small summary:
logger.warning(
f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
- + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
+ + f"distributed training: {training_args.parallel_mode.value == 'distributed'}, 16-bits training: {training_args.fp16}"
)
logger.info(f"Training/evaluation parameters {training_args}")
@@ -238,7 +251,7 @@ def main():
data_args.dataset_config_name,
cache_dir=model_args.cache_dir,
task="image-classification",
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
)
else:
data_files = {}
@@ -285,7 +298,8 @@ def compute_metrics(p):
finetuning_task="image-classification",
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
+ trust_remote_code=model_args.trust_remote_code,
)
model = AutoModelForImageClassification.from_pretrained(
model_args.model_name_or_path,
@@ -293,14 +307,16 @@ def compute_metrics(p):
config=config,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
+ trust_remote_code=model_args.trust_remote_code,
ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,
)
image_processor = AutoImageProcessor.from_pretrained(
model_args.image_processor_name or model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
+ trust_remote_code=model_args.trust_remote_code,
)
# Define torchvision transforms to be applied to each image.
@@ -367,7 +383,6 @@ def val_transforms(example_batch):
compute_metrics=compute_metrics,
tokenizer=image_processor,
data_collator=collate_fn,
- feature="image-classification",
)
# Training
@@ -385,7 +400,7 @@ def val_transforms(example_batch):
# Evaluation
if training_args.do_eval:
- metrics = trainer.evaluate(inference_with_ort=inference_args.inference_with_ort)
+ metrics = trainer.evaluate()
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)
diff --git a/examples/onnxruntime/training/language-modeling/run_clm.py b/examples/onnxruntime/training/language-modeling/run_clm.py
index 2807d3f7211..bd9694ae41b 100644
--- a/examples/onnxruntime/training/language-modeling/run_clm.py
+++ b/examples/onnxruntime/training/language-modeling/run_clm.py
@@ -24,12 +24,14 @@
import math
import os
import sys
+import warnings
from dataclasses import dataclass, field
from itertools import chain
from typing import Optional
import datasets
import evaluate
+import torch
import transformers
from datasets import load_dataset
from transformers import (
@@ -52,7 +54,7 @@
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.23.0")
+check_min_version("4.34.0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
@@ -73,7 +75,7 @@ class ModelArguments:
default=None,
metadata={
"help": (
- "The model checkpoint for weights initialization.Don't set if you want to train a model from scratch."
+ "The model checkpoint for weights initialization. Don't set if you want to train a model from scratch."
)
},
)
@@ -108,12 +110,47 @@ class ModelArguments:
default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
)
+ token: str = field(
+ default=None,
+ metadata={
+ "help": (
+ "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
+ "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
+ )
+ },
+ )
use_auth_token: bool = field(
+ default=None,
+ metadata={
+ "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
+ },
+ )
+ trust_remote_code: bool = field(
+ default=False,
+ metadata={
+ "help": (
+ "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
+ "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
+ "execute code present on the Hub on your local machine."
+ )
+ },
+ )
+ torch_dtype: Optional[str] = field(
+ default=None,
+ metadata={
+ "help": (
+ "Override the default `torch.dtype` and load the model under this dtype. If `auto` is passed, the "
+ "dtype will be automatically derived from the model's weights."
+ ),
+ "choices": ["auto", "bfloat16", "float16", "float32"],
+ },
+ )
+ low_cpu_mem_usage: bool = field(
default=False,
metadata={
"help": (
- "Will use the token generated when running `huggingface-cli login` (necessary to use this script "
- "with private models)."
+ "It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded. "
+ "set True will benefit LLM loading time and RAM consumption."
)
},
)
@@ -160,7 +197,7 @@ class DataTrainingArguments:
)
},
)
-
+ streaming: bool = field(default=False, metadata={"help": "Enable streaming mode"})
block_size: Optional[int] = field(
default=None,
metadata={
@@ -189,6 +226,9 @@ class DataTrainingArguments:
)
def __post_init__(self):
+ if self.streaming:
+ require_version("datasets>=2.0.0", "The streaming feature requires `datasets>=2.0.0`")
+
if self.dataset_name is None and self.train_file is None and self.validation_file is None:
raise ValueError("Need either a dataset name or a training/validation file.")
else:
@@ -200,32 +240,24 @@ def __post_init__(self):
assert extension in ["csv", "json", "txt"], "`validation_file` should be a csv, a json or a txt file."
-@dataclass
-class InferenceArguments:
- """
- Arguments for inference(evaluate, predict).
- """
-
- inference_with_ort: bool = field(
- default=False,
- metadata={"help": "Whether use ONNX Runtime as backend for inference. Default set to false."},
- )
-
-
def main():
# See all possible arguments in src/transformers/training_args.py
# or by passing the --help flag to this script.
# We now keep distinct sets of args, for a cleaner separation of concerns.
- parser = HfArgumentParser((ModelArguments, DataTrainingArguments, ORTTrainingArguments, InferenceArguments))
+ parser = HfArgumentParser((ModelArguments, DataTrainingArguments, ORTTrainingArguments))
if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
# If we pass only one argument to the script and it's the path to a json file,
# let's parse it to get our arguments.
- model_args, data_args, training_args, inference_args = parser.parse_json_file(
- json_file=os.path.abspath(sys.argv[1])
- )
+ model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
else:
- model_args, data_args, training_args, inference_args = parser.parse_args_into_dataclasses()
+ model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+ if model_args.use_auth_token is not None:
+ warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
+ if model_args.token is not None:
+ raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
+ model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions.
@@ -238,6 +270,10 @@ def main():
handlers=[logging.StreamHandler(sys.stdout)],
)
+ if training_args.should_log:
+ # The default of training_args.log_level is passive, so we set log level at info here to have that default.
+ transformers.utils.logging.set_verbosity_info()
+
log_level = training_args.get_process_log_level()
logger.setLevel(log_level)
datasets.utils.logging.set_verbosity(log_level)
@@ -248,7 +284,7 @@ def main():
# Log on each process the small summary:
logger.warning(
f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
- + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
+ + f"distributed training: {training_args.parallel_mode.value == 'distributed'}, 16-bits training: {training_args.fp16}"
)
logger.info(f"Training/evaluation parameters {training_args}")
@@ -285,7 +321,8 @@ def main():
data_args.dataset_name,
data_args.dataset_config_name,
cache_dir=model_args.cache_dir,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
+ streaming=data_args.streaming,
)
if "validation" not in raw_datasets.keys():
raw_datasets["validation"] = load_dataset(
@@ -293,14 +330,16 @@ def main():
data_args.dataset_config_name,
split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
+ streaming=data_args.streaming,
)
raw_datasets["train"] = load_dataset(
data_args.dataset_name,
data_args.dataset_config_name,
split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
+ streaming=data_args.streaming,
)
else:
data_files = {}
@@ -321,7 +360,7 @@ def main():
extension,
data_files=data_files,
cache_dir=model_args.cache_dir,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
**dataset_args,
)
# If no validation data is there, validation_split_percentage will be used to divide the dataset.
@@ -331,7 +370,7 @@ def main():
data_files=data_files,
split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
**dataset_args,
)
raw_datasets["train"] = load_dataset(
@@ -339,7 +378,7 @@ def main():
data_files=data_files,
split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
**dataset_args,
)
@@ -355,7 +394,8 @@ def main():
config_kwargs = {
"cache_dir": model_args.cache_dir,
"revision": model_args.model_revision,
- "use_auth_token": True if model_args.use_auth_token else None,
+ "token": model_args.token,
+ "trust_remote_code": model_args.trust_remote_code,
}
if model_args.config_name:
config = AutoConfig.from_pretrained(model_args.config_name, **config_kwargs)
@@ -373,7 +413,8 @@ def main():
"cache_dir": model_args.cache_dir,
"use_fast": model_args.use_fast_tokenizer,
"revision": model_args.model_revision,
- "use_auth_token": True if model_args.use_auth_token else None,
+ "token": model_args.token,
+ "trust_remote_code": model_args.trust_remote_code,
}
if model_args.tokenizer_name:
tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name, **tokenizer_kwargs)
@@ -381,32 +422,44 @@ def main():
tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, **tokenizer_kwargs)
else:
raise ValueError(
- "You are instantiating a new tokenizer from scratch. This is not supported by this script."
+ "You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)
if model_args.model_name_or_path:
+ torch_dtype = (
+ model_args.torch_dtype
+ if model_args.torch_dtype in ["auto", None]
+ else getattr(torch, model_args.torch_dtype)
+ )
model = AutoModelForCausalLM.from_pretrained(
model_args.model_name_or_path,
from_tf=bool(".ckpt" in model_args.model_name_or_path),
config=config,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
+ trust_remote_code=model_args.trust_remote_code,
+ torch_dtype=torch_dtype,
+ low_cpu_mem_usage=model_args.low_cpu_mem_usage,
)
else:
- model = AutoModelForCausalLM.from_config(config)
+ model = AutoModelForCausalLM.from_config(config, trust_remote_code=model_args.trust_remote_code)
n_params = sum({p.data_ptr(): p.numel() for p in model.parameters()}.values())
logger.info(f"Training new model from scratch - Total size={n_params/2**20:.2f}M params")
- model.resize_token_embeddings(len(tokenizer))
+ # We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch
+ # on a small vocab and want a smaller embedding size, remove this test.
+ embedding_size = model.get_input_embeddings().weight.shape[0]
+ if len(tokenizer) > embedding_size:
+ model.resize_token_embeddings(len(tokenizer))
# Preprocessing the datasets.
# First we tokenize all the texts.
if training_args.do_train:
- column_names = raw_datasets["train"].column_names
+ column_names = list(raw_datasets["train"].features)
else:
- column_names = raw_datasets["validation"].column_names
+ column_names = list(raw_datasets["validation"].features)
text_column_name = "text" if "text" in column_names else column_names[0]
# since this will be pickled to avoid _LazyModule error in Hasher force logger loading before tokenize_function
@@ -424,27 +477,34 @@ def tokenize_function(examples):
return output
with training_args.main_process_first(desc="dataset map tokenization"):
- tokenized_datasets = raw_datasets.map(
- tokenize_function,
- batched=True,
- num_proc=data_args.preprocessing_num_workers,
- remove_columns=column_names,
- load_from_cache_file=not data_args.overwrite_cache,
- desc="Running tokenizer on dataset",
- )
+ if not data_args.streaming:
+ tokenized_datasets = raw_datasets.map(
+ tokenize_function,
+ batched=True,
+ num_proc=data_args.preprocessing_num_workers,
+ remove_columns=column_names,
+ load_from_cache_file=not data_args.overwrite_cache,
+ desc="Running tokenizer on dataset",
+ )
+ else:
+ tokenized_datasets = raw_datasets.map(
+ tokenize_function,
+ batched=True,
+ remove_columns=column_names,
+ )
if data_args.block_size is None:
block_size = tokenizer.model_max_length
- if block_size > 1024:
+ if block_size > config.max_position_embeddings:
logger.warning(
f"The tokenizer picked seems to have a very large `model_max_length` ({tokenizer.model_max_length}). "
- "Picking 1024 instead. You can change that default value by passing --block_size xxx."
+ f"Using block_size={min(1024, config.max_position_embeddings)} instead. You can change that default value by passing --block_size xxx."
)
- block_size = 1024
+ block_size = min(1024, config.max_position_embeddings)
else:
if data_args.block_size > tokenizer.model_max_length:
logger.warning(
- f"The block_size passed ({data_args.block_size}) is larger than the maximum length for the model"
+ f"The block_size passed ({data_args.block_size}) is larger than the maximum length for the model "
f"({tokenizer.model_max_length}). Using block_size={tokenizer.model_max_length}."
)
block_size = min(data_args.block_size, tokenizer.model_max_length)
@@ -454,10 +514,9 @@ def group_texts(examples):
# Concatenate all texts.
concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()}
total_length = len(concatenated_examples[list(examples.keys())[0]])
- # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
- # customize this part to your needs.
- if total_length >= block_size:
- total_length = (total_length // block_size) * block_size
+ # We drop the small remainder, and if the total_length < block_size we exclude this batch and return an empty dict.
+ # We could add padding if the model supported it instead of this drop, you can customize this part to your needs.
+ total_length = (total_length // block_size) * block_size
# Split by chunks of max_len.
result = {
k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
@@ -471,16 +530,22 @@ def group_texts(examples):
# to preprocess.
#
# To speed up this part, we use multiprocessing. See the documentation of the map method for more information:
- # https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map
+ # https://huggingface.co/docs/datasets/process#map
with training_args.main_process_first(desc="grouping texts together"):
- lm_datasets = tokenized_datasets.map(
- group_texts,
- batched=True,
- num_proc=data_args.preprocessing_num_workers,
- load_from_cache_file=not data_args.overwrite_cache,
- desc=f"Grouping texts in chunks of {block_size}",
- )
+ if not data_args.streaming:
+ lm_datasets = tokenized_datasets.map(
+ group_texts,
+ batched=True,
+ num_proc=data_args.preprocessing_num_workers,
+ load_from_cache_file=not data_args.overwrite_cache,
+ desc=f"Grouping texts in chunks of {block_size}",
+ )
+ else:
+ lm_datasets = tokenized_datasets.map(
+ group_texts,
+ batched=True,
+ )
if training_args.do_train:
if "train" not in tokenized_datasets:
@@ -528,7 +593,6 @@ def compute_metrics(eval_preds):
preprocess_logits_for_metrics=preprocess_logits_for_metrics
if training_args.do_eval and not is_torch_tpu_available()
else None,
- feature="text-generation",
)
# Training
@@ -556,7 +620,7 @@ def compute_metrics(eval_preds):
if training_args.do_eval:
logger.info("*** Evaluate ***")
- metrics = trainer.evaluate(inference_with_ort=inference_args.inference_with_ort)
+ metrics = trainer.evaluate()
max_eval_samples = data_args.max_eval_samples if data_args.max_eval_samples is not None else len(eval_dataset)
metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset))
diff --git a/examples/onnxruntime/training/language-modeling/run_mlm.py b/examples/onnxruntime/training/language-modeling/run_mlm.py
index 122395a1cd7..3365ca8703d 100755
--- a/examples/onnxruntime/training/language-modeling/run_mlm.py
+++ b/examples/onnxruntime/training/language-modeling/run_mlm.py
@@ -25,6 +25,7 @@
import math
import os
import sys
+import warnings
from dataclasses import dataclass, field
from itertools import chain
from typing import Optional
@@ -52,7 +53,7 @@
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.23.0")
+check_min_version("4.34.0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
@@ -71,7 +72,7 @@ class ModelArguments:
default=None,
metadata={
"help": (
- "The model checkpoint for weights initialization.Don't set if you want to train a model from scratch."
+ "The model checkpoint for weights initialization. Don't set if you want to train a model from scratch."
)
},
)
@@ -106,12 +107,37 @@ class ModelArguments:
default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
)
+ token: str = field(
+ default=None,
+ metadata={
+ "help": (
+ "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
+ "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
+ )
+ },
+ )
use_auth_token: bool = field(
+ default=None,
+ metadata={
+ "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
+ },
+ )
+ trust_remote_code: bool = field(
+ default=False,
+ metadata={
+ "help": (
+ "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
+ "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
+ "execute code present on the Hub on your local machine."
+ )
+ },
+ )
+ low_cpu_mem_usage: bool = field(
default=False,
metadata={
"help": (
- "Will use the token generated when running `huggingface-cli login` (necessary to use this script "
- "with private models)."
+ "It is an option to create the model as an empty shell, then only materialize its parameters when the pretrained weights are loaded. "
+ "set True will benefit LLM loading time and RAM consumption."
)
},
)
@@ -196,8 +222,12 @@ class DataTrainingArguments:
)
},
)
+ streaming: bool = field(default=False, metadata={"help": "Enable streaming mode"})
def __post_init__(self):
+ if self.streaming:
+ require_version("datasets>=2.0.0", "The streaming feature requires `datasets>=2.0.0`")
+
if self.dataset_name is None and self.train_file is None and self.validation_file is None:
raise ValueError("Need either a dataset name or a training/validation file.")
else:
@@ -211,32 +241,24 @@ def __post_init__(self):
raise ValueError("`validation_file` should be a csv, a json or a txt file.")
-@dataclass
-class InferenceArguments:
- """
- Arguments for inference(evaluate, predict).
- """
-
- inference_with_ort: bool = field(
- default=False,
- metadata={"help": "Whether use ONNX Runtime as backend for inference. Default set to false."},
- )
-
-
def main():
# See all possible arguments in src/transformers/training_args.py
# or by passing the --help flag to this script.
# We now keep distinct sets of args, for a cleaner separation of concerns.
- parser = HfArgumentParser((ModelArguments, DataTrainingArguments, ORTTrainingArguments, InferenceArguments))
+ parser = HfArgumentParser((ModelArguments, DataTrainingArguments, ORTTrainingArguments))
if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
# If we pass only one argument to the script and it's the path to a json file,
# let's parse it to get our arguments.
- model_args, data_args, training_args, inference_args = parser.parse_json_file(
- json_file=os.path.abspath(sys.argv[1])
- )
+ model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
else:
- model_args, data_args, training_args, inference_args = parser.parse_args_into_dataclasses()
+ model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+ if model_args.use_auth_token is not None:
+ warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
+ if model_args.token is not None:
+ raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
+ model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions.
@@ -249,6 +271,10 @@ def main():
handlers=[logging.StreamHandler(sys.stdout)],
)
+ if training_args.should_log:
+ # The default of training_args.log_level is passive, so we set log level at info here to have that default.
+ transformers.utils.logging.set_verbosity_info()
+
log_level = training_args.get_process_log_level()
logger.setLevel(log_level)
datasets.utils.logging.set_verbosity(log_level)
@@ -259,7 +285,7 @@ def main():
# Log on each process the small summary:
logger.warning(
f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
- + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
+ + f"distributed training: {training_args.parallel_mode.value == 'distributed'}, 16-bits training: {training_args.fp16}"
)
# Set the verbosity to info of the Transformers logger (on main process only):
logger.info(f"Training/evaluation parameters {training_args}")
@@ -297,7 +323,8 @@ def main():
data_args.dataset_name,
data_args.dataset_config_name,
cache_dir=model_args.cache_dir,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
+ streaming=data_args.streaming,
)
if "validation" not in raw_datasets.keys():
raw_datasets["validation"] = load_dataset(
@@ -305,14 +332,16 @@ def main():
data_args.dataset_config_name,
split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
+ streaming=data_args.streaming,
)
raw_datasets["train"] = load_dataset(
data_args.dataset_name,
data_args.dataset_config_name,
split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
+ streaming=data_args.streaming,
)
else:
data_files = {}
@@ -328,7 +357,7 @@ def main():
extension,
data_files=data_files,
cache_dir=model_args.cache_dir,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
)
# If no validation data is there, validation_split_percentage will be used to divide the dataset.
@@ -338,14 +367,14 @@ def main():
data_files=data_files,
split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
)
raw_datasets["train"] = load_dataset(
extension,
data_files=data_files,
split=f"train[{data_args.validation_split_percentage}%:]",
cache_dir=model_args.cache_dir,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
)
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
@@ -359,7 +388,8 @@ def main():
config_kwargs = {
"cache_dir": model_args.cache_dir,
"revision": model_args.model_revision,
- "use_auth_token": True if model_args.use_auth_token else None,
+ "token": model_args.token,
+ "trust_remote_code": model_args.trust_remote_code,
}
if model_args.config_name:
config = AutoConfig.from_pretrained(model_args.config_name, **config_kwargs)
@@ -377,7 +407,8 @@ def main():
"cache_dir": model_args.cache_dir,
"use_fast": model_args.use_fast_tokenizer,
"revision": model_args.model_revision,
- "use_auth_token": True if model_args.use_auth_token else None,
+ "token": model_args.token,
+ "trust_remote_code": model_args.trust_remote_code,
}
if model_args.tokenizer_name:
tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name, **tokenizer_kwargs)
@@ -385,7 +416,7 @@ def main():
tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, **tokenizer_kwargs)
else:
raise ValueError(
- "You are instantiating a new tokenizer from scratch. This is not supported by this script."
+ "You are instantiating a new tokenizer from scratch. This is not supported by this script. "
"You can do it from another script, save it, and load it from here, using --tokenizer_name."
)
@@ -396,34 +427,41 @@ def main():
config=config,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
+ trust_remote_code=model_args.trust_remote_code,
+ low_cpu_mem_usage=model_args.low_cpu_mem_usage,
)
else:
logger.info("Training new model from scratch")
- model = AutoModelForMaskedLM.from_config(config)
+ model = AutoModelForMaskedLM.from_config(config, trust_remote_code=model_args.trust_remote_code)
- model.resize_token_embeddings(len(tokenizer))
+ # We resize the embeddings only when necessary to avoid index errors. If you are creating a model from scratch
+ # on a small vocab and want a smaller embedding size, remove this test.
+ embedding_size = model.get_input_embeddings().weight.shape[0]
+ if len(tokenizer) > embedding_size:
+ model.resize_token_embeddings(len(tokenizer))
# Preprocessing the datasets.
# First we tokenize all the texts.
if training_args.do_train:
- column_names = raw_datasets["train"].column_names
+ column_names = list(raw_datasets["train"].features)
else:
- column_names = raw_datasets["validation"].column_names
+ column_names = list(raw_datasets["validation"].features)
text_column_name = "text" if "text" in column_names else column_names[0]
if data_args.max_seq_length is None:
max_seq_length = tokenizer.model_max_length
if max_seq_length > 1024:
logger.warning(
- f"The tokenizer picked seems to have a very large `model_max_length` ({tokenizer.model_max_length}). "
- "Picking 1024 instead. You can change that default value by passing --max_seq_length xxx."
+ "The chosen tokenizer supports a `model_max_length` that is longer than the default `block_size` value"
+ " of 1024. If you would like to use a longer `block_size` up to `tokenizer.model_max_length` you can"
+ " override this default with `--block_size xxx`."
)
max_seq_length = 1024
else:
if data_args.max_seq_length > tokenizer.model_max_length:
logger.warning(
- f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
+ f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the "
f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
)
max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
@@ -448,14 +486,21 @@ def tokenize_function(examples):
)
with training_args.main_process_first(desc="dataset map tokenization"):
- tokenized_datasets = raw_datasets.map(
- tokenize_function,
- batched=True,
- num_proc=data_args.preprocessing_num_workers,
- remove_columns=[text_column_name],
- load_from_cache_file=not data_args.overwrite_cache,
- desc="Running tokenizer on dataset line_by_line",
- )
+ if not data_args.streaming:
+ tokenized_datasets = raw_datasets.map(
+ tokenize_function,
+ batched=True,
+ num_proc=data_args.preprocessing_num_workers,
+ remove_columns=[text_column_name],
+ load_from_cache_file=not data_args.overwrite_cache,
+ desc="Running tokenizer on dataset line_by_line",
+ )
+ else:
+ tokenized_datasets = raw_datasets.map(
+ tokenize_function,
+ batched=True,
+ remove_columns=[text_column_name],
+ )
else:
# Otherwise, we tokenize every text, then concatenate them together before splitting them in smaller parts.
# We use `return_special_tokens_mask=True` because DataCollatorForLanguageModeling (see below) is more
@@ -464,14 +509,21 @@ def tokenize_function(examples):
return tokenizer(examples[text_column_name], return_special_tokens_mask=True)
with training_args.main_process_first(desc="dataset map tokenization"):
- tokenized_datasets = raw_datasets.map(
- tokenize_function,
- batched=True,
- num_proc=data_args.preprocessing_num_workers,
- remove_columns=column_names,
- load_from_cache_file=not data_args.overwrite_cache,
- desc="Running tokenizer on every text in dataset",
- )
+ if not data_args.streaming:
+ tokenized_datasets = raw_datasets.map(
+ tokenize_function,
+ batched=True,
+ num_proc=data_args.preprocessing_num_workers,
+ remove_columns=column_names,
+ load_from_cache_file=not data_args.overwrite_cache,
+ desc="Running tokenizer on every text in dataset",
+ )
+ else:
+ tokenized_datasets = raw_datasets.map(
+ tokenize_function,
+ batched=True,
+ remove_columns=column_names,
+ )
# Main data processing function that will concatenate all texts from our dataset and generate chunks of
# max_seq_length.
@@ -479,10 +531,9 @@ def group_texts(examples):
# Concatenate all texts.
concatenated_examples = {k: list(chain(*examples[k])) for k in examples.keys()}
total_length = len(concatenated_examples[list(examples.keys())[0]])
- # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
- # customize this part to your needs.
- if total_length >= max_seq_length:
- total_length = (total_length // max_seq_length) * max_seq_length
+ # We drop the small remainder, and if the total_length < max_seq_length we exclude this batch and return an empty dict.
+ # We could add padding if the model supported it instead of this drop, you can customize this part to your needs.
+ total_length = (total_length // max_seq_length) * max_seq_length
# Split by chunks of max_len.
result = {
k: [t[i : i + max_seq_length] for i in range(0, total_length, max_seq_length)]
@@ -495,16 +546,22 @@ def group_texts(examples):
# might be slower to preprocess.
#
# To speed up this part, we use multiprocessing. See the documentation of the map method for more information:
- # https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map
+ # https://huggingface.co/docs/datasets/process#map
with training_args.main_process_first(desc="grouping texts together"):
- tokenized_datasets = tokenized_datasets.map(
- group_texts,
- batched=True,
- num_proc=data_args.preprocessing_num_workers,
- load_from_cache_file=not data_args.overwrite_cache,
- desc=f"Grouping texts in chunks of {max_seq_length}",
- )
+ if not data_args.streaming:
+ tokenized_datasets = tokenized_datasets.map(
+ group_texts,
+ batched=True,
+ num_proc=data_args.preprocessing_num_workers,
+ load_from_cache_file=not data_args.overwrite_cache,
+ desc=f"Grouping texts in chunks of {max_seq_length}",
+ )
+ else:
+ tokenized_datasets = tokenized_datasets.map(
+ group_texts,
+ batched=True,
+ )
if training_args.do_train:
if "train" not in tokenized_datasets:
@@ -563,7 +620,6 @@ def compute_metrics(eval_preds):
preprocess_logits_for_metrics=preprocess_logits_for_metrics
if training_args.do_eval and not is_torch_tpu_available()
else None,
- feature="fill-mask",
)
# Training
@@ -590,7 +646,7 @@ def compute_metrics(eval_preds):
if training_args.do_eval:
logger.info("*** Evaluate ***")
- metrics = trainer.evaluate(inference_with_ort=inference_args.inference_with_ort)
+ metrics = trainer.evaluate()
max_eval_samples = data_args.max_eval_samples if data_args.max_eval_samples is not None else len(eval_dataset)
metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset))
diff --git a/examples/onnxruntime/training/question-answering/run_qa.py b/examples/onnxruntime/training/question-answering/run_qa.py
index ec93d45c57a..08b581a1a84 100644
--- a/examples/onnxruntime/training/question-answering/run_qa.py
+++ b/examples/onnxruntime/training/question-answering/run_qa.py
@@ -21,6 +21,7 @@
import logging
import os
import sys
+import warnings
from dataclasses import dataclass, field
from typing import Optional
@@ -49,7 +50,7 @@
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.23.0")
+check_min_version("4.34.0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
@@ -79,12 +80,28 @@ class ModelArguments:
default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
)
+ token: str = field(
+ default=None,
+ metadata={
+ "help": (
+ "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
+ "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
+ )
+ },
+ )
use_auth_token: bool = field(
+ default=None,
+ metadata={
+ "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
+ },
+ )
+ trust_remote_code: bool = field(
default=False,
metadata={
"help": (
- "Will use the token generated when running `huggingface-cli login` (necessary to use this script "
- "with private models)."
+ "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
+ "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
+ "execute code present on the Hub on your local machine."
)
},
)
@@ -214,32 +231,24 @@ def __post_init__(self):
assert extension in ["csv", "json"], "`test_file` should be a csv or a json file."
-@dataclass
-class InferenceArguments:
- """
- Arguments for inference(evaluate, predict).
- """
-
- inference_with_ort: bool = field(
- default=False,
- metadata={"help": "Whether use ONNX Runtime as backend for inference. Default set to false."},
- )
-
-
def main():
# See all possible arguments in src/transformers/training_args.py
# or by passing the --help flag to this script.
# We now keep distinct sets of args, for a cleaner separation of concerns.
- parser = HfArgumentParser((ModelArguments, DataTrainingArguments, ORTTrainingArguments, InferenceArguments))
+ parser = HfArgumentParser((ModelArguments, DataTrainingArguments, ORTTrainingArguments))
if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
# If we pass only one argument to the script and it's the path to a json file,
# let's parse it to get our arguments.
- model_args, data_args, training_args, inference_args = parser.parse_json_file(
- json_file=os.path.abspath(sys.argv[1])
- )
+ model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
else:
- model_args, data_args, training_args, inference_args = parser.parse_args_into_dataclasses()
+ model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+ if model_args.use_auth_token is not None:
+ warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
+ if model_args.token is not None:
+ raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
+ model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions.
@@ -252,6 +261,10 @@ def main():
handlers=[logging.StreamHandler(sys.stdout)],
)
+ if training_args.should_log:
+ # The default of training_args.log_level is passive, so we set log level at info here to have that default.
+ transformers.utils.logging.set_verbosity_info()
+
log_level = training_args.get_process_log_level()
logger.setLevel(log_level)
datasets.utils.logging.set_verbosity(log_level)
@@ -262,7 +275,7 @@ def main():
# Log on each process the small summary:
logger.warning(
f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
- + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
+ + f"distributed training: {training_args.parallel_mode.value == 'distributed'}, 16-bits training: {training_args.fp16}"
)
logger.info(f"Training/evaluation parameters {training_args}")
@@ -299,13 +312,14 @@ def main():
data_args.dataset_name,
data_args.dataset_config_name,
cache_dir=model_args.cache_dir,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
)
else:
data_files = {}
if data_args.train_file is not None:
data_files["train"] = data_args.train_file
extension = data_args.train_file.split(".")[-1]
+
if data_args.validation_file is not None:
data_files["validation"] = data_args.validation_file
extension = data_args.validation_file.split(".")[-1]
@@ -317,7 +331,7 @@ def main():
data_files=data_files,
field="data",
cache_dir=model_args.cache_dir,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
)
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html.
@@ -331,14 +345,16 @@ def main():
model_args.config_name if model_args.config_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
+ trust_remote_code=model_args.trust_remote_code,
)
tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
use_fast=True,
revision=model_args.model_revision,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
+ trust_remote_code=model_args.trust_remote_code,
)
model = AutoModelForQuestionAnswering.from_pretrained(
model_args.model_name_or_path,
@@ -346,7 +362,8 @@ def main():
config=config,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
+ trust_remote_code=model_args.trust_remote_code,
)
# Tokenizer check: this script requires a fast tokenizer.
@@ -374,7 +391,7 @@ def main():
if data_args.max_seq_length > tokenizer.model_max_length:
logger.warning(
- f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
+ f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the "
f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
)
max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
@@ -599,12 +616,12 @@ def post_processing_function(examples, features, predictions, stage="eval"):
# Format the result to the format the metric expects.
if data_args.version_2_with_negative:
formatted_predictions = [
- {"id": k, "prediction_text": v, "no_answer_probability": 0.0} for k, v in predictions.items()
+ {"id": str(k), "prediction_text": v, "no_answer_probability": 0.0} for k, v in predictions.items()
]
else:
- formatted_predictions = [{"id": k, "prediction_text": v} for k, v in predictions.items()]
+ formatted_predictions = [{"id": str(k), "prediction_text": v} for k, v in predictions.items()]
- references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples]
+ references = [{"id": str(ex["id"]), "answers": ex[answer_column_name]} for ex in examples]
return EvalPrediction(predictions=formatted_predictions, label_ids=references)
metric = evaluate.load("squad_v2" if data_args.version_2_with_negative else "squad")
@@ -623,7 +640,6 @@ def compute_metrics(p: EvalPrediction):
data_collator=data_collator,
post_process_function=post_processing_function,
compute_metrics=compute_metrics,
- feature="question-answering",
)
# Training
@@ -649,7 +665,7 @@ def compute_metrics(p: EvalPrediction):
# Evaluation
if training_args.do_eval:
logger.info("*** Evaluate ***")
- metrics = trainer.evaluate(inference_with_ort=inference_args.inference_with_ort)
+ metrics = trainer.evaluate()
max_eval_samples = data_args.max_eval_samples if data_args.max_eval_samples is not None else len(eval_dataset)
metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset))
@@ -660,10 +676,7 @@ def compute_metrics(p: EvalPrediction):
# Prediction
if training_args.do_predict:
logger.info("*** Predict ***")
- results = trainer.predict(
- predict_dataset, predict_examples, inference_with_ort=inference_args.inference_with_ort
- )
-
+ results = trainer.predict(predict_dataset, predict_examples)
metrics = results.metrics
max_predict_samples = (
diff --git a/examples/onnxruntime/training/question-answering/trainer_qa.py b/examples/onnxruntime/training/question-answering/trainer_qa.py
index 695ca929277..26ea820ace0 100644
--- a/examples/onnxruntime/training/question-answering/trainer_qa.py
+++ b/examples/onnxruntime/training/question-answering/trainer_qa.py
@@ -15,7 +15,10 @@
"""
A subclass of `ORTTrainer` specific to Question-Answering tasks
"""
-from transformers.trainer_utils import PredictionOutput
+import math
+import time
+
+from transformers.trainer_utils import PredictionOutput, speed_metrics
from optimum.onnxruntime import ORTTrainer
@@ -26,14 +29,7 @@ def __init__(self, *args, eval_examples=None, post_process_function=None, **kwar
self.eval_examples = eval_examples
self.post_process_function = post_process_function
- def evaluate(
- self,
- eval_dataset=None,
- eval_examples=None,
- ignore_keys=None,
- metric_key_prefix: str = "eval",
- inference_with_ort: bool = False,
- ):
+ def evaluate(self, eval_dataset=None, eval_examples=None, ignore_keys=None, metric_key_prefix: str = "eval"):
eval_dataset = self.eval_dataset if eval_dataset is None else eval_dataset
eval_dataloader = self.get_eval_dataloader(eval_dataset)
eval_examples = self.eval_examples if eval_examples is None else eval_examples
@@ -41,11 +37,8 @@ def evaluate(
# Temporarily disable metric computation, we will do it in the loop here.
compute_metrics = self.compute_metrics
self.compute_metrics = None
- if inference_with_ort:
- eval_loop = self.prediction_loop_ort if self.args.use_legacy_prediction_loop else self.evaluation_loop_ort
- else:
- eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop
-
+ eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop
+ start_time = time.time()
try:
output = eval_loop(
eval_dataloader,
@@ -54,11 +47,23 @@ def evaluate(
# self.args.prediction_loss_only
prediction_loss_only=True if compute_metrics is None else None,
ignore_keys=ignore_keys,
+ metric_key_prefix=metric_key_prefix,
)
finally:
self.compute_metrics = compute_metrics
-
- if self.post_process_function is not None and self.compute_metrics is not None:
+ total_batch_size = self.args.eval_batch_size * self.args.world_size
+ if f"{metric_key_prefix}_jit_compilation_time" in output.metrics:
+ start_time += output.metrics[f"{metric_key_prefix}_jit_compilation_time"]
+ output.metrics.update(
+ speed_metrics(
+ metric_key_prefix,
+ start_time,
+ num_samples=output.num_samples,
+ num_steps=math.ceil(output.num_samples / total_batch_size),
+ )
+ )
+ if self.post_process_function is not None and self.compute_metrics is not None and self.args.should_save:
+ # Only the main node write the results by default
eval_preds = self.post_process_function(eval_examples, eval_dataset, output.predictions)
metrics = self.compute_metrics(eval_preds)
@@ -66,31 +71,25 @@ def evaluate(
for key in list(metrics.keys()):
if not key.startswith(f"{metric_key_prefix}_"):
metrics[f"{metric_key_prefix}_{key}"] = metrics.pop(key)
+ metrics.update(output.metrics)
+ else:
+ metrics = output.metrics
+ if self.args.should_log:
+ # Only the main node log the results by default
self.log(metrics)
- else:
- metrics = {}
self.control = self.callback_handler.on_evaluate(self.args, self.state, self.control, metrics)
return metrics
- def predict(
- self,
- predict_dataset,
- predict_examples,
- ignore_keys=None,
- metric_key_prefix: str = "test",
- inference_with_ort: bool = False,
- ):
+ def predict(self, predict_dataset, predict_examples, ignore_keys=None, metric_key_prefix: str = "test"):
predict_dataloader = self.get_test_dataloader(predict_dataset)
# Temporarily disable metric computation, we will do it in the loop here.
compute_metrics = self.compute_metrics
self.compute_metrics = None
- if inference_with_ort:
- eval_loop = self.prediction_loop_ort if self.args.use_legacy_prediction_loop else self.evaluation_loop_ort
- else:
- eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop
+ eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop
+ start_time = time.time()
try:
output = eval_loop(
predict_dataloader,
@@ -99,9 +98,21 @@ def predict(
# self.args.prediction_loss_only
prediction_loss_only=True if compute_metrics is None else None,
ignore_keys=ignore_keys,
+ metric_key_prefix=metric_key_prefix,
)
finally:
self.compute_metrics = compute_metrics
+ total_batch_size = self.args.eval_batch_size * self.args.world_size
+ if f"{metric_key_prefix}_jit_compilation_time" in output.metrics:
+ start_time += output.metrics[f"{metric_key_prefix}_jit_compilation_time"]
+ output.metrics.update(
+ speed_metrics(
+ metric_key_prefix,
+ start_time,
+ num_samples=output.num_samples,
+ num_steps=math.ceil(output.num_samples / total_batch_size),
+ )
+ )
if self.post_process_function is None or self.compute_metrics is None:
return output
@@ -113,5 +124,5 @@ def predict(
for key in list(metrics.keys()):
if not key.startswith(f"{metric_key_prefix}_"):
metrics[f"{metric_key_prefix}_{key}"] = metrics.pop(key)
-
+ metrics.update(output.metrics)
return PredictionOutput(predictions=predictions.predictions, label_ids=predictions.label_ids, metrics=metrics)
diff --git a/examples/onnxruntime/training/summarization/run_summarization.py b/examples/onnxruntime/training/summarization/run_summarization.py
index d1264489d82..83ec61f225b 100644
--- a/examples/onnxruntime/training/summarization/run_summarization.py
+++ b/examples/onnxruntime/training/summarization/run_summarization.py
@@ -21,6 +21,7 @@
import logging
import os
import sys
+import warnings
from dataclasses import dataclass, field
from typing import Optional
@@ -51,7 +52,7 @@
# Might have error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.23.0")
+check_min_version("4.34.0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/summarization/requirements.txt")
@@ -98,12 +99,28 @@ class ModelArguments:
default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
)
+ token: str = field(
+ default=None,
+ metadata={
+ "help": (
+ "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
+ "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
+ )
+ },
+ )
use_auth_token: bool = field(
+ default=None,
+ metadata={
+ "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
+ },
+ )
+ trust_remote_code: bool = field(
default=False,
metadata={
"help": (
- "Will use the token generated when running `huggingface-cli login` (necessary to use this script "
- "with private models)."
+ "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
+ "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
+ "execute code present on the Hub on your local machine."
)
},
)
@@ -187,7 +204,7 @@ class DataTrainingArguments:
metadata={
"help": (
"The maximum total sequence length for validation target text after tokenization. Sequences longer "
- "than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`."
+ "than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`. "
"This argument is also used to override the ``max_length`` param of ``model.generate``, which is used "
"during ``evaluate`` and ``predict``."
)
@@ -246,14 +263,14 @@ class DataTrainingArguments:
},
)
source_prefix: Optional[str] = field(
- default="", metadata={"help": "A prefix to add before every source text (useful for T5 models)."}
+ default=None, metadata={"help": "A prefix to add before every source text (useful for T5 models)."}
)
forced_bos_token: Optional[str] = field(
default=None,
metadata={
"help": (
- "The token to force as the first generated token after the decoder_start_token_id."
+ "The token to force as the first generated token after the decoder_start_token_id. "
"Useful for multilingual models like mBART where the first generated token"
"needs to be the target language token (Usually it is the target language token)"
)
@@ -261,8 +278,13 @@ class DataTrainingArguments:
)
def __post_init__(self):
- if self.dataset_name is None and self.train_file is None and self.validation_file is None:
- raise ValueError("Need either a dataset name or a training/validation file.")
+ if (
+ self.dataset_name is None
+ and self.train_file is None
+ and self.validation_file is None
+ and self.test_file is None
+ ):
+ raise ValueError("Need either a dataset name or a training, validation, or test file.")
else:
if self.train_file is not None:
extension = self.train_file.split(".")[-1]
@@ -270,22 +292,13 @@ def __post_init__(self):
if self.validation_file is not None:
extension = self.validation_file.split(".")[-1]
assert extension in ["csv", "json"], "`validation_file` should be a csv or a json file."
+ if self.test_file is not None:
+ extension = self.test_file.split(".")[-1]
+ assert extension in ["csv", "json"], "`test_file` should be a csv or a json file."
if self.val_max_target_length is None:
self.val_max_target_length = self.max_target_length
-@dataclass
-class InferenceArguments:
- """
- Arguments for inference(evaluate, predict).
- """
-
- inference_with_ort: bool = field(
- default=False,
- metadata={"help": "Whether use ONNX Runtime as backend for inference. Default set to false."},
- )
-
-
summarization_name_mapping = {
"amazon_reviews_multi": ("review_body", "review_title"),
"big_patent": ("description", "abstract"),
@@ -307,15 +320,19 @@ def main():
# or by passing the --help flag to this script.
# We now keep distinct sets of args, for a cleaner separation of concerns.
- parser = HfArgumentParser((ModelArguments, DataTrainingArguments, ORTSeq2SeqTrainingArguments, InferenceArguments))
+ parser = HfArgumentParser((ModelArguments, DataTrainingArguments, ORTSeq2SeqTrainingArguments))
if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
# If we pass only one argument to the script and it's the path to a json file,
# let's parse it to get our arguments.
- model_args, data_args, training_args, inference_args = parser.parse_json_file(
- json_file=os.path.abspath(sys.argv[1])
- )
+ model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
else:
- model_args, data_args, training_args, inference_args = parser.parse_args_into_dataclasses()
+ model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+ if model_args.use_auth_token is not None:
+ warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
+ if model_args.token is not None:
+ raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
+ model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions.
@@ -327,6 +344,11 @@ def main():
datefmt="%m/%d/%Y %H:%M:%S",
handlers=[logging.StreamHandler(sys.stdout)],
)
+
+ if training_args.should_log:
+ # The default of training_args.log_level is passive, so we set log level at info here to have that default.
+ transformers.utils.logging.set_verbosity_info()
+
log_level = training_args.get_process_log_level()
logger.setLevel(log_level)
datasets.utils.logging.set_verbosity(log_level)
@@ -337,7 +359,7 @@ def main():
# Log on each process the small summary:
logger.warning(
f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
- + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
+ + f"distributed training: {training_args.parallel_mode.value == 'distributed'}, 16-bits training: {training_args.fp16}"
)
logger.info(f"Training/evaluation parameters {training_args}")
@@ -386,7 +408,7 @@ def main():
data_args.dataset_name,
data_args.dataset_config_name,
cache_dir=model_args.cache_dir,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
)
else:
data_files = {}
@@ -403,7 +425,7 @@ def main():
extension,
data_files=data_files,
cache_dir=model_args.cache_dir,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
)
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html.
@@ -417,14 +439,16 @@ def main():
model_args.config_name if model_args.config_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
+ trust_remote_code=model_args.trust_remote_code,
)
tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer,
revision=model_args.model_revision,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
+ trust_remote_code=model_args.trust_remote_code,
)
model = AutoModelForSeq2SeqLM.from_pretrained(
model_args.model_name_or_path,
@@ -432,11 +456,10 @@ def main():
config=config,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
+ trust_remote_code=model_args.trust_remote_code,
)
- model.resize_token_embeddings(len(tokenizer))
-
if model.config.decoder_start_token_id is None and isinstance(tokenizer, (MBartTokenizer, MBartTokenizerFast)):
if isinstance(tokenizer, MBartTokenizer):
model.config.decoder_start_token_id = tokenizer.lang_code_to_id[data_args.lang]
@@ -471,10 +494,16 @@ def main():
# Preprocessing the datasets.
# We need to tokenize inputs and targets.
if training_args.do_train:
+ if "train" not in raw_datasets:
+ raise ValueError("--do_train requires a train dataset")
column_names = raw_datasets["train"].column_names
elif training_args.do_eval:
+ if "validation" not in raw_datasets:
+ raise ValueError("--do_eval requires a validation dataset")
column_names = raw_datasets["validation"].column_names
elif training_args.do_predict:
+ if "test" not in raw_datasets:
+ raise ValueError("--do_predict requires a test dataset")
column_names = raw_datasets["test"].column_names
else:
logger.info("There is nothing to do. Please pass `do_train`, `do_eval` and/or `do_predict`.")
@@ -520,7 +549,7 @@ def main():
if training_args.label_smoothing_factor > 0 and not hasattr(model, "prepare_decoder_input_ids_from_labels"):
logger.warning(
- "label_smoothing is enabled but the `prepare_decoder_input_ids_from_labels` method is not defined for"
+ "label_smoothing is enabled but the `prepare_decoder_input_ids_from_labels` method is not defined for "
f"`{model.__class__.__name__}`. This will lead to loss being calculated twice and will take up more memory"
)
@@ -550,8 +579,6 @@ def preprocess_function(examples):
return model_inputs
if training_args.do_train:
- if "train" not in raw_datasets:
- raise ValueError("--do_train requires a train dataset")
train_dataset = raw_datasets["train"]
if data_args.max_train_samples is not None:
max_train_samples = min(len(train_dataset), data_args.max_train_samples)
@@ -568,8 +595,6 @@ def preprocess_function(examples):
if training_args.do_eval:
max_target_length = data_args.val_max_target_length
- if "validation" not in raw_datasets:
- raise ValueError("--do_eval requires a validation dataset")
eval_dataset = raw_datasets["validation"]
if data_args.max_eval_samples is not None:
max_eval_samples = min(len(eval_dataset), data_args.max_eval_samples)
@@ -586,8 +611,6 @@ def preprocess_function(examples):
if training_args.do_predict:
max_target_length = data_args.val_max_target_length
- if "test" not in raw_datasets:
- raise ValueError("--do_predict requires a test dataset")
predict_dataset = raw_datasets["test"]
if data_args.max_predict_samples is not None:
max_predict_samples = min(len(predict_dataset), data_args.max_predict_samples)
@@ -628,10 +651,10 @@ def compute_metrics(eval_preds):
preds, labels = eval_preds
if isinstance(preds, tuple):
preds = preds[0]
+ # Replace -100s used for padding as we can't decode them
+ preds = np.where(preds != -100, preds, tokenizer.pad_token_id)
decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
- if data_args.ignore_pad_token_for_loss:
- # Replace -100 in the labels as we can't decode them.
- labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
+ labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
# Some simple post-processing
@@ -643,6 +666,16 @@ def compute_metrics(eval_preds):
result["gen_len"] = np.mean(prediction_lens)
return result
+ # Override the decoding parameters of Seq2SeqTrainer
+ training_args.generation_max_length = (
+ training_args.generation_max_length
+ if training_args.generation_max_length is not None
+ else data_args.val_max_target_length
+ )
+ training_args.generation_num_beams = (
+ data_args.num_beams if data_args.num_beams is not None else training_args.generation_num_beams
+ )
+
# Initialize our Trainer
trainer = ORTSeq2SeqTrainer(
model=model,
@@ -652,7 +685,6 @@ def compute_metrics(eval_preds):
tokenizer=tokenizer,
data_collator=data_collator,
compute_metrics=compute_metrics if training_args.predict_with_generate else None,
- feature="text2text-generation",
)
# Training
@@ -677,20 +709,15 @@ def compute_metrics(eval_preds):
# Evaluation
results = {}
- max_length = (
- training_args.generation_max_length
- if training_args.generation_max_length is not None
- else data_args.val_max_target_length
- )
- num_beams = data_args.num_beams if data_args.num_beams is not None else training_args.generation_num_beams
if training_args.do_eval:
logger.info("*** Evaluate ***")
- metrics = trainer.evaluate(
- max_length=max_length,
- num_beams=num_beams,
- metric_key_prefix="eval",
- inference_with_ort=inference_args.inference_with_ort,
- )
+ if isinstance(eval_dataset, dict):
+ metrics = {}
+ for eval_ds_name, eval_ds in eval_dataset.items():
+ dataset_metrics = trainer.evaluate(eval_dataset=eval_ds, metric_key_prefix=f"eval_{eval_ds_name}")
+ metrics.update(dataset_metrics)
+ else:
+ metrics = trainer.evaluate(metric_key_prefix="eval")
max_eval_samples = data_args.max_eval_samples if data_args.max_eval_samples is not None else len(eval_dataset)
metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset))
@@ -700,13 +727,7 @@ def compute_metrics(eval_preds):
if training_args.do_predict:
logger.info("*** Predict ***")
- predict_results = trainer.predict(
- predict_dataset,
- metric_key_prefix="predict",
- max_length=max_length,
- num_beams=num_beams,
- inference_with_ort=inference_args.inference_with_ort,
- )
+ predict_results = trainer.predict(predict_dataset, metric_key_prefix="predict")
metrics = predict_results.metrics
max_predict_samples = (
data_args.max_predict_samples if data_args.max_predict_samples is not None else len(predict_dataset)
@@ -718,8 +739,10 @@ def compute_metrics(eval_preds):
if trainer.is_world_process_zero():
if training_args.predict_with_generate:
+ predictions = predict_results.predictions
+ predictions = np.where(predictions != -100, predictions, tokenizer.pad_token_id)
predictions = tokenizer.batch_decode(
- predict_results.predictions, skip_special_tokens=True, clean_up_tokenization_spaces=True
+ predictions, skip_special_tokens=True, clean_up_tokenization_spaces=True
)
predictions = [pred.strip() for pred in predictions]
output_prediction_file = os.path.join(training_args.output_dir, "generated_predictions.txt")
diff --git a/examples/onnxruntime/training/test_examples.py b/examples/onnxruntime/training/test_examples.py
deleted file mode 100644
index 8fe1de53d56..00000000000
--- a/examples/onnxruntime/training/test_examples.py
+++ /dev/null
@@ -1,174 +0,0 @@
-# coding=utf-8
-# Copyright 2022 The HuggingFace Team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import logging
-import os
-import sys
-import unittest
-from unittest.mock import patch
-
-import torch
-from transformers.file_utils import is_apex_available
-from transformers.testing_utils import TestCasePlus, get_gpu_count, slow, torch_device
-
-
-SRC_DIRS = [
- os.path.join(os.path.dirname(__file__), dirname)
- for dirname in [
- "text-classification",
- "token-classification",
- "question-answering",
- "translation",
- ]
-]
-sys.path.extend(SRC_DIRS)
-if SRC_DIRS is not None:
- import run_glue
- import run_ner
- import run_qa
- import run_translation
-
-
-logging.basicConfig(level=logging.DEBUG)
-logger = logging.getLogger()
-
-
-def get_results(output_dir):
- results = {}
- path = os.path.join(output_dir, "all_results.json")
- if os.path.exists(path):
- with open(path, "r") as f:
- results = json.load(f)
- else:
- raise ValueError(f"can't find {path}")
- return results
-
-
-def is_cuda_and_apex_available():
- is_using_cuda = torch.cuda.is_available() and torch_device == "cuda"
- return is_using_cuda and is_apex_available()
-
-
-class ExamplesTests(TestCasePlus):
- # Text Classification Tests
- def test_run_glue(self):
- stream_handler = logging.StreamHandler(sys.stdout)
- logger.addHandler(stream_handler)
-
- tmp_dir = self.get_auto_remove_tmp_dir()
- testargs = f"""
- run_glue.py
- --model_name_or_path bert-base-uncased
- --task_name sst2
- --do_train
- --do_eval
- --output_dir {tmp_dir}
- --overwrite_output_dir
- --learning_rate=1e-5
- --per_device_train_batch_size=16
- --per_device_eval_batch_size=16
- """.split()
-
- with patch.object(sys, "argv", testargs):
- run_glue.main()
- result = get_results(tmp_dir)
- self.assertGreaterEqual(result["eval_accuracy"], 0.75)
-
- # Token Classification Tests
- def test_run_ner(self):
- stream_handler = logging.StreamHandler(sys.stdout)
- logger.addHandler(stream_handler)
-
- # with so little data distributed training needs more epochs to get the score on par with 0/1 gpu
- epochs = 7 if get_gpu_count() > 1 else 2
-
- tmp_dir = self.get_auto_remove_tmp_dir()
- testargs = f"""
- run_ner.py
- --model_name_or_path bert-base-uncased
- --dataset_name conll2003
- --do_train
- --do_eval
- --output_dir {tmp_dir}
- --overwrite_output_dir
- --learning_rate=1e-5
- --per_device_train_batch_size=16
- --per_device_eval_batch_size=16
- --num_train_epochs={epochs}
- """.split()
-
- with patch.object(sys, "argv", testargs):
- run_ner.main()
- result = get_results(tmp_dir)
- self.assertGreaterEqual(result["eval_accuracy"], 0.75)
- self.assertLess(result["eval_loss"], 0.5)
-
- # Question Answering Tests
- def test_run_qa(self):
- stream_handler = logging.StreamHandler(sys.stdout)
- logger.addHandler(stream_handler)
-
- tmp_dir = self.get_auto_remove_tmp_dir()
- testargs = f"""
- run_qa.py
- --model_name_or_path bert-base-uncased
- --dataset_name squad
- --do_train
- --do_eval
- --output_dir {tmp_dir}
- --overwrite_output_dir
- --learning_rate=1e-5
- --per_device_train_batch_size=16
- --per_device_eval_batch_size=16
- """.split()
-
- with patch.object(sys, "argv", testargs):
- run_qa.main()
- result = get_results(tmp_dir)
- self.assertGreaterEqual(result["eval_f1"], 30)
- self.assertGreaterEqual(result["eval_exact"], 30)
-
- @slow
- def test_run_translation(self):
- stream_handler = logging.StreamHandler(sys.stdout)
- logger.addHandler(stream_handler)
-
- tmp_dir = self.get_auto_remove_tmp_dir()
- testargs = f"""
- run_translation.py
- --model_name_or_path t5-large
- --source_lang en
- --target_lang ro
- --dataset_name wmt16
- --output_dir {tmp_dir}
- --overwrite_output_dir
- --max_steps=50
- --warmup_steps=8
- --do_train
- --learning_rate=3e-3
- --per_device_train_batch_size=2
- --per_device_eval_batch_size=1
- --predict_with_generate
- """.split()
-
- with patch.object(sys, "argv", testargs):
- run_translation.main()
- result = get_results(tmp_dir)
- self.assertGreaterEqual(result["eval_bleu"], 30)
-
-
-if __name__ == "__main__":
- unittest.main()
diff --git a/examples/onnxruntime/training/text-classification/run_glue.py b/examples/onnxruntime/training/text-classification/run_glue.py
index 7a81a2ff156..f3f04657afb 100644
--- a/examples/onnxruntime/training/text-classification/run_glue.py
+++ b/examples/onnxruntime/training/text-classification/run_glue.py
@@ -21,6 +21,7 @@
import os
import random
import sys
+import warnings
from dataclasses import dataclass, field
from typing import Optional
@@ -48,7 +49,7 @@
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.23.0")
+check_min_version("4.34.0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt")
@@ -188,12 +189,28 @@ class ModelArguments:
default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
)
+ token: str = field(
+ default=None,
+ metadata={
+ "help": (
+ "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
+ "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
+ )
+ },
+ )
use_auth_token: bool = field(
+ default=None,
+ metadata={
+ "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
+ },
+ )
+ trust_remote_code: bool = field(
default=False,
metadata={
"help": (
- "Will use the token generated when running `huggingface-cli login` (necessary to use this script "
- "with private models)."
+ "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
+ "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
+ "execute code present on the Hub on your local machine."
)
},
)
@@ -203,32 +220,24 @@ class ModelArguments:
)
-@dataclass
-class InferenceArguments:
- """
- Arguments for inference(evaluate, predict).
- """
-
- inference_with_ort: bool = field(
- default=False,
- metadata={"help": "Whether use ONNX Runtime as backend for inference. Default set to false."},
- )
-
-
def main():
# See all possible arguments in src/transformers/training_args.py
# or by passing the --help flag to this script.
# We now keep distinct sets of args, for a cleaner separation of concerns.
- parser = HfArgumentParser((ModelArguments, DataTrainingArguments, ORTTrainingArguments, InferenceArguments))
+ parser = HfArgumentParser((ModelArguments, DataTrainingArguments, ORTTrainingArguments))
if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
# If we pass only one argument to the script and it's the path to a json file,
# let's parse it to get our arguments.
- model_args, data_args, training_args, inference_args = parser.parse_json_file(
- json_file=os.path.abspath(sys.argv[1])
- )
+ model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
else:
- model_args, data_args, training_args, inference_args = parser.parse_args_into_dataclasses()
+ model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+ if model_args.use_auth_token is not None:
+ warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
+ if model_args.token is not None:
+ raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
+ model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions.
@@ -241,6 +250,10 @@ def main():
handlers=[logging.StreamHandler(sys.stdout)],
)
+ if training_args.should_log:
+ # The default of training_args.log_level is passive, so we set log level at info here to have that default.
+ transformers.utils.logging.set_verbosity_info()
+
log_level = training_args.get_process_log_level()
logger.setLevel(log_level)
datasets.utils.logging.set_verbosity(log_level)
@@ -291,7 +304,7 @@ def main():
"glue",
data_args.task_name,
cache_dir=model_args.cache_dir,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
)
elif data_args.dataset_name is not None:
# Downloading and loading a dataset from the hub.
@@ -299,7 +312,7 @@ def main():
data_args.dataset_name,
data_args.dataset_config_name,
cache_dir=model_args.cache_dir,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
)
else:
# Loading a dataset from your local files.
@@ -328,7 +341,7 @@ def main():
"csv",
data_files=data_files,
cache_dir=model_args.cache_dir,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
)
else:
# Loading a dataset from local json files
@@ -336,7 +349,7 @@ def main():
"json",
data_files=data_files,
cache_dir=model_args.cache_dir,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
)
# See more about loading any type of standard or custom dataset at
# https://huggingface.co/docs/datasets/loading_datasets.html.
@@ -371,14 +384,16 @@ def main():
finetuning_task=data_args.task_name,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
+ trust_remote_code=model_args.trust_remote_code,
)
tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer,
revision=model_args.model_revision,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
+ trust_remote_code=model_args.trust_remote_code,
)
model = AutoModelForSequenceClassification.from_pretrained(
model_args.model_name_or_path,
@@ -386,7 +401,8 @@ def main():
config=config,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
+ trust_remote_code=model_args.trust_remote_code,
ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,
)
@@ -440,7 +456,7 @@ def main():
if data_args.max_seq_length > tokenizer.model_max_length:
logger.warning(
- f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
+ f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the "
f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
)
max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
@@ -496,6 +512,8 @@ def preprocess_function(examples):
# Get the metric function
if data_args.task_name is not None:
metric = evaluate.load("glue", data_args.task_name)
+ elif is_regression:
+ metric = evaluate.load("mse")
else:
metric = evaluate.load("accuracy")
@@ -504,17 +522,12 @@ def preprocess_function(examples):
def compute_metrics(p: EvalPrediction):
preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions
preds = np.squeeze(preds) if is_regression else np.argmax(preds, axis=1)
- if data_args.task_name is not None:
- result = metric.compute(predictions=preds, references=p.label_ids)
- if len(result) > 1:
- result["combined_score"] = np.mean(list(result.values())).item()
- return result
- elif is_regression:
- return {"mse": ((preds - p.label_ids) ** 2).mean().item()}
- else:
- return {"accuracy": (preds == p.label_ids).astype(np.float32).mean().item()}
+ result = metric.compute(predictions=preds, references=p.label_ids)
+ if len(result) > 1:
+ result["combined_score"] = np.mean(list(result.values())).item()
+ return result
- # Data collator will default to DataCollatorWithPadding when the tokenizer is passed to ORTTrainer, so we change it if
+ # Data collator will default to DataCollatorWithPadding when the tokenizer is passed to Trainer, so we change it if
# we already did the padding.
if data_args.pad_to_max_length:
data_collator = default_data_collator
@@ -532,7 +545,6 @@ def compute_metrics(p: EvalPrediction):
compute_metrics=compute_metrics,
tokenizer=tokenizer,
data_collator=data_collator,
- feature="text-classification",
)
# Training
@@ -550,6 +562,7 @@ def compute_metrics(p: EvalPrediction):
metrics["train_samples"] = min(max_train_samples, len(train_dataset))
trainer.save_model() # Saves the tokenizer too for easy upload
+
trainer.log_metrics("train", metrics)
trainer.save_metrics("train", metrics)
trainer.save_state()
@@ -571,7 +584,7 @@ def compute_metrics(p: EvalPrediction):
combined = {}
for eval_dataset, task in zip(eval_datasets, tasks):
- metrics = trainer.evaluate(eval_dataset=eval_dataset, inference_with_ort=inference_args.inference_with_ort)
+ metrics = trainer.evaluate(eval_dataset=eval_dataset)
max_eval_samples = (
data_args.max_eval_samples if data_args.max_eval_samples is not None else len(eval_dataset)
@@ -599,9 +612,7 @@ def compute_metrics(p: EvalPrediction):
for predict_dataset, task in zip(predict_datasets, tasks):
# Removing the `label` columns because it contains -1 and Trainer won't like that.
predict_dataset = predict_dataset.remove_columns("label")
- predictions = trainer.predict(
- predict_dataset, metric_key_prefix="predict", inference_with_ort=inference_args.inference_with_ort
- ).predictions
+ predictions = trainer.predict(predict_dataset, metric_key_prefix="predict").predictions
predictions = np.squeeze(predictions) if is_regression else np.argmax(predictions, axis=1)
output_predict_file = os.path.join(training_args.output_dir, f"predict_results_{task}.txt")
diff --git a/examples/onnxruntime/training/token-classification/run_ner.py b/examples/onnxruntime/training/token-classification/run_ner.py
index 80366f07adb..55ddfa2cf0d 100644
--- a/examples/onnxruntime/training/token-classification/run_ner.py
+++ b/examples/onnxruntime/training/token-classification/run_ner.py
@@ -22,6 +22,7 @@
import logging
import os
import sys
+import warnings
from dataclasses import dataclass, field
from typing import Optional
@@ -49,7 +50,7 @@
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.23.0")
+check_min_version("4.34.0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/token-classification/requirements.txt")
@@ -79,12 +80,28 @@ class ModelArguments:
default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
)
+ token: str = field(
+ default=None,
+ metadata={
+ "help": (
+ "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
+ "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
+ )
+ },
+ )
use_auth_token: bool = field(
+ default=None,
+ metadata={
+ "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
+ },
+ )
+ trust_remote_code: bool = field(
default=False,
metadata={
"help": (
- "Will use the token generated when running `huggingface-cli login` (necessary to use this script "
- "with private models)."
+ "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
+ "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
+ "execute code present on the Hub on your local machine."
)
},
)
@@ -204,32 +221,24 @@ def __post_init__(self):
self.task_name = self.task_name.lower()
-@dataclass
-class InferenceArguments:
- """
- Arguments for inference(evaluate, predict).
- """
-
- inference_with_ort: bool = field(
- default=False,
- metadata={"help": "Whether use ONNX Runtime as backend for inference. Default set to false."},
- )
-
-
def main():
# See all possible arguments in src/transformers/training_args.py
# or by passing the --help flag to this script.
# We now keep distinct sets of args, for a cleaner separation of concerns.
- parser = HfArgumentParser((ModelArguments, DataTrainingArguments, ORTTrainingArguments, InferenceArguments))
+ parser = HfArgumentParser((ModelArguments, DataTrainingArguments, ORTTrainingArguments))
if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
# If we pass only one argument to the script and it's the path to a json file,
# let's parse it to get our arguments.
- model_args, data_args, training_args, inference_args = parser.parse_json_file(
- json_file=os.path.abspath(sys.argv[1])
- )
+ model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
else:
- model_args, data_args, training_args, inference_args = parser.parse_args_into_dataclasses()
+ model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+ if model_args.use_auth_token is not None:
+ warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
+ if model_args.token is not None:
+ raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
+ model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions.
@@ -242,6 +251,10 @@ def main():
handlers=[logging.StreamHandler(sys.stdout)],
)
+ if training_args.should_log:
+ # The default of training_args.log_level is passive, so we set log level at info here to have that default.
+ transformers.utils.logging.set_verbosity_info()
+
log_level = training_args.get_process_log_level()
logger.setLevel(log_level)
datasets.utils.logging.set_verbosity(log_level)
@@ -252,7 +265,7 @@ def main():
# Log on each process the small summary:
logger.warning(
f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
- + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
+ + f"distributed training: {training_args.parallel_mode.value == 'distributed'}, 16-bits training: {training_args.fp16}"
)
logger.info(f"Training/evaluation parameters {training_args}")
@@ -289,7 +302,7 @@ def main():
data_args.dataset_name,
data_args.dataset_config_name,
cache_dir=model_args.cache_dir,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
)
else:
data_files = {}
@@ -358,7 +371,8 @@ def get_label_list(labels):
finetuning_task=data_args.task_name,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
+ trust_remote_code=model_args.trust_remote_code,
)
tokenizer_name_or_path = model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path
@@ -368,7 +382,8 @@ def get_label_list(labels):
cache_dir=model_args.cache_dir,
use_fast=True,
revision=model_args.model_revision,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
+ trust_remote_code=model_args.trust_remote_code,
add_prefix_space=True,
)
else:
@@ -377,7 +392,8 @@ def get_label_list(labels):
cache_dir=model_args.cache_dir,
use_fast=True,
revision=model_args.model_revision,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
+ trust_remote_code=model_args.trust_remote_code,
)
model = AutoModelForTokenClassification.from_pretrained(
@@ -386,7 +402,8 @@ def get_label_list(labels):
config=config,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
+ trust_remote_code=model_args.trust_remote_code,
ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,
)
@@ -567,7 +584,6 @@ def compute_metrics(p):
tokenizer=tokenizer,
data_collator=data_collator,
compute_metrics=compute_metrics,
- feature="token-classification",
)
# Training
@@ -580,6 +596,7 @@ def compute_metrics(p):
train_result = trainer.train(resume_from_checkpoint=checkpoint)
metrics = train_result.metrics
trainer.save_model() # Saves the tokenizer too for easy upload
+
max_train_samples = (
data_args.max_train_samples if data_args.max_train_samples is not None else len(train_dataset)
)
@@ -593,7 +610,7 @@ def compute_metrics(p):
if training_args.do_eval:
logger.info("*** Evaluate ***")
- metrics = trainer.evaluate(inference_with_ort=inference_args.inference_with_ort)
+ metrics = trainer.evaluate()
max_eval_samples = data_args.max_eval_samples if data_args.max_eval_samples is not None else len(eval_dataset)
metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset))
@@ -605,10 +622,7 @@ def compute_metrics(p):
if training_args.do_predict:
logger.info("*** Predict ***")
- predictions, labels, metrics = trainer.predict(
- predict_dataset, metric_key_prefix="predict", inference_with_ort=inference_args.inference_with_ort
- )
-
+ predictions, labels, metrics = trainer.predict(predict_dataset, metric_key_prefix="predict")
predictions = np.argmax(predictions, axis=2)
# Remove ignored index (special tokens)
diff --git a/examples/onnxruntime/training/translation/run_translation.py b/examples/onnxruntime/training/translation/run_translation.py
index e410454f2f4..0b6a36d12ff 100644
--- a/examples/onnxruntime/training/translation/run_translation.py
+++ b/examples/onnxruntime/training/translation/run_translation.py
@@ -21,6 +21,7 @@
import logging
import os
import sys
+import warnings
from dataclasses import dataclass, field
from typing import Optional
@@ -52,7 +53,7 @@
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.23.0")
+check_min_version("4.34.0")
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/translation/requirements.txt")
@@ -89,12 +90,28 @@ class ModelArguments:
default="main",
metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
)
+ token: str = field(
+ default=None,
+ metadata={
+ "help": (
+ "The token to use as HTTP bearer authorization for remote files. If not specified, will use the token "
+ "generated when running `huggingface-cli login` (stored in `~/.huggingface`)."
+ )
+ },
+ )
use_auth_token: bool = field(
+ default=None,
+ metadata={
+ "help": "The `use_auth_token` argument is deprecated and will be removed in v4.34. Please use `token`."
+ },
+ )
+ trust_remote_code: bool = field(
default=False,
metadata={
"help": (
- "Will use the token generated when running `huggingface-cli login` (necessary to use this script "
- "with private models)."
+ "Whether or not to allow for custom models defined on the Hub in their own modeling files. This option"
+ "should only be set to `True` for repositories you trust and in which you have read the code, as it will "
+ "execute code present on the Hub on your local machine."
)
},
)
@@ -156,7 +173,7 @@ class DataTrainingArguments:
metadata={
"help": (
"The maximum total sequence length for validation target text after tokenization. Sequences longer "
- "than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`."
+ "than this will be truncated, sequences shorter will be padded. Will default to `max_target_length`. "
"This argument is also used to override the ``max_length`` param of ``model.generate``, which is used "
"during ``evaluate`` and ``predict``."
)
@@ -248,32 +265,24 @@ def __post_init__(self):
self.val_max_target_length = self.max_target_length
-@dataclass
-class InferenceArguments:
- """
- Arguments for inference(evaluate, predict).
- """
-
- inference_with_ort: bool = field(
- default=False,
- metadata={"help": "Whether use ONNX Runtime as backend for inference. Default set to false."},
- )
-
-
def main():
# See all possible arguments in src/transformers/training_args.py
# or by passing the --help flag to this script.
# We now keep distinct sets of args, for a cleaner separation of concerns.
- parser = HfArgumentParser((ModelArguments, DataTrainingArguments, ORTSeq2SeqTrainingArguments, InferenceArguments))
+ parser = HfArgumentParser((ModelArguments, DataTrainingArguments, ORTSeq2SeqTrainingArguments))
if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
# If we pass only one argument to the script and it's the path to a json file,
# let's parse it to get our arguments.
- model_args, data_args, training_args, inference_args = parser.parse_json_file(
- json_file=os.path.abspath(sys.argv[1])
- )
+ model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
else:
- model_args, data_args, training_args, inference_args = parser.parse_args_into_dataclasses()
+ model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+ if model_args.use_auth_token is not None:
+ warnings.warn("The `use_auth_token` argument is deprecated and will be removed in v4.34.", FutureWarning)
+ if model_args.token is not None:
+ raise ValueError("`token` and `use_auth_token` are both specified. Please set only the argument `token`.")
+ model_args.token = model_args.use_auth_token
# Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The
# information sent is the one passed as arguments along with your Python/PyTorch versions.
@@ -286,6 +295,10 @@ def main():
handlers=[logging.StreamHandler(sys.stdout)],
)
+ if training_args.should_log:
+ # The default of training_args.log_level is passive, so we set log level at info here to have that default.
+ transformers.utils.logging.set_verbosity_info()
+
log_level = training_args.get_process_log_level()
logger.setLevel(log_level)
datasets.utils.logging.set_verbosity(log_level)
@@ -296,7 +309,7 @@ def main():
# Log on each process the small summary:
logger.warning(
f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
- + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
+ + f"distributed training: {training_args.parallel_mode.value == 'distributed'}, 16-bits training: {training_args.fp16}"
)
logger.info(f"Training/evaluation parameters {training_args}")
@@ -345,7 +358,7 @@ def main():
data_args.dataset_name,
data_args.dataset_config_name,
cache_dir=model_args.cache_dir,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
)
else:
data_files = {}
@@ -362,7 +375,7 @@ def main():
extension,
data_files=data_files,
cache_dir=model_args.cache_dir,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
)
# See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
# https://huggingface.co/docs/datasets/loading_datasets.html.
@@ -376,14 +389,16 @@ def main():
model_args.config_name if model_args.config_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
+ trust_remote_code=model_args.trust_remote_code,
)
tokenizer = AutoTokenizer.from_pretrained(
model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
cache_dir=model_args.cache_dir,
use_fast=model_args.use_fast_tokenizer,
revision=model_args.model_revision,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
+ trust_remote_code=model_args.trust_remote_code,
)
model = AutoModelForSeq2SeqLM.from_pretrained(
model_args.model_name_or_path,
@@ -391,11 +406,10 @@ def main():
config=config,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
- use_auth_token=True if model_args.use_auth_token else None,
+ token=model_args.token,
+ trust_remote_code=model_args.trust_remote_code,
)
- model.resize_token_embeddings(len(tokenizer))
-
# Set decoder_start_token_id
if model.config.decoder_start_token_id is None and isinstance(tokenizer, (MBartTokenizer, MBartTokenizerFast)):
if isinstance(tokenizer, MBartTokenizer):
@@ -448,7 +462,7 @@ def main():
if training_args.label_smoothing_factor > 0 and not hasattr(model, "prepare_decoder_input_ids_from_labels"):
logger.warning(
- "label_smoothing is enabled but the `prepare_decoder_input_ids_from_labels` method is not defined for"
+ "label_smoothing is enabled but the `prepare_decoder_input_ids_from_labels` method is not defined for "
f"`{model.__class__.__name__}`. This will lead to loss being calculated twice and will take up more memory"
)
@@ -549,10 +563,10 @@ def compute_metrics(eval_preds):
preds, labels = eval_preds
if isinstance(preds, tuple):
preds = preds[0]
+ # Replace -100s used for padding as we can't decode them
+ preds = np.where(preds != -100, preds, tokenizer.pad_token_id)
decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
- if data_args.ignore_pad_token_for_loss:
- # Replace -100 in the labels as we can't decode them.
- labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
+ labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
# Some simple post-processing
@@ -575,7 +589,6 @@ def compute_metrics(eval_preds):
tokenizer=tokenizer,
data_collator=data_collator,
compute_metrics=compute_metrics if training_args.predict_with_generate else None,
- feature="text2text-generation",
)
# Training
@@ -609,12 +622,7 @@ def compute_metrics(eval_preds):
if training_args.do_eval:
logger.info("*** Evaluate ***")
- metrics = trainer.evaluate(
- max_length=max_length,
- num_beams=num_beams,
- metric_key_prefix="eval",
- inference_with_ort=inference_args.inference_with_ort,
- )
+ metrics = trainer.evaluate(max_length=max_length, num_beams=num_beams, metric_key_prefix="eval")
max_eval_samples = data_args.max_eval_samples if data_args.max_eval_samples is not None else len(eval_dataset)
metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset))
@@ -625,11 +633,7 @@ def compute_metrics(eval_preds):
logger.info("*** Predict ***")
predict_results = trainer.predict(
- predict_dataset,
- metric_key_prefix="predict",
- max_length=max_length,
- num_beams=num_beams,
- inference_with_ort=inference_args.inference_with_ort,
+ predict_dataset, metric_key_prefix="predict", max_length=max_length, num_beams=num_beams
)
metrics = predict_results.metrics
max_predict_samples = (
@@ -642,8 +646,10 @@ def compute_metrics(eval_preds):
if trainer.is_world_process_zero():
if training_args.predict_with_generate:
+ predictions = predict_results.predictions
+ predictions = np.where(predictions != -100, predictions, tokenizer.pad_token_id)
predictions = tokenizer.batch_decode(
- predict_results.predictions, skip_special_tokens=True, clean_up_tokenization_spaces=True
+ predictions, skip_special_tokens=True, clean_up_tokenization_spaces=True
)
predictions = [pred.strip() for pred in predictions]
output_prediction_file = os.path.join(training_args.output_dir, "generated_predictions.txt")
diff --git a/optimum/onnxruntime/trainer.py b/optimum/onnxruntime/trainer.py
index 89363b6b266..afc90e405bb 100644
--- a/optimum/onnxruntime/trainer.py
+++ b/optimum/onnxruntime/trainer.py
@@ -22,8 +22,7 @@
import time
import types
import warnings
-from pathlib import Path
-from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Type, Union
+from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union
# Integrations must be imported before ML frameworks:
@@ -49,43 +48,27 @@
# isort: on
-import numpy as np
+import huggingface_hub.utils as hf_hub_utils
import torch
import torch.distributed as dist
from torch import nn
-from torch.utils.data import DataLoader, Dataset
+from torch.utils.data import Dataset, RandomSampler
from transformers.data.data_collator import DataCollator
from transformers.debug_utils import DebugOption, DebugUnderflowOverflow
from transformers.deepspeed import deepspeed_init, deepspeed_load_checkpoint, is_deepspeed_zero3_enabled
-from transformers.file_utils import (
- is_apex_available,
- is_sagemaker_dp_enabled,
- is_sagemaker_mp_enabled,
- is_torch_tpu_available,
-)
from transformers.modeling_utils import PreTrainedModel, unwrap_model
from transformers.tokenization_utils_base import PreTrainedTokenizerBase
from transformers.trainer import Trainer
from transformers.trainer_callback import TrainerCallback, TrainerState
from transformers.trainer_pt_utils import (
- DistributedTensorGatherer,
- IterableDatasetShard,
- SequentialDistributedSampler,
- find_batch_size,
get_model_param_count,
get_module_class_from_name,
get_parameter_names,
- nested_concat,
- nested_detach,
- nested_numpify,
)
from transformers.trainer_utils import (
- EvalLoopOutput,
EvalPrediction,
HPSearchBackend,
- PredictionOutput,
TrainOutput,
- denumpify_detensorize,
enable_full_determinism,
find_executable_batch_size,
get_last_checkpoint,
@@ -94,31 +77,17 @@
speed_metrics,
)
from transformers.training_args import ParallelMode
+from transformers.utils import (
+ is_apex_available,
+ is_sagemaker_dp_enabled,
+ is_sagemaker_mp_enabled,
+ is_torch_tpu_available,
+)
-from ..exporters import TasksManager
-from ..exporters.onnx import OnnxConfigWithPast, export, export_models, get_decoder_models_for_export
from ..utils import logging
-from .modeling_decoder import ORTModelForCausalLM
-from .modeling_ort import (
- ORTModel,
- ORTModelForCustomTasks,
- ORTModelForFeatureExtraction,
- ORTModelForImageClassification,
- ORTModelForMaskedLM,
- ORTModelForMultipleChoice,
- ORTModelForQuestionAnswering,
- ORTModelForSemanticSegmentation,
- ORTModelForSequenceClassification,
- ORTModelForTokenClassification,
-)
-from .modeling_seq2seq import ORTModelForSeq2SeqLM, ORTModelForSpeechSeq2Seq
from .training_args import ORTOptimizerNames, ORTTrainingArguments
from .utils import (
- ONNX_DECODER_NAME,
- ONNX_DECODER_WITH_PAST_NAME,
- ONNX_WEIGHTS_NAME,
is_onnxruntime_training_available,
- wrap_onnx_config_for_loss,
)
@@ -132,14 +101,10 @@
import optuna
-logger = logging.get_logger(__name__)
-
# Name of the files used for checkpointing
-TRAINING_ARGS_NAME = "training_args.bin"
TRAINER_STATE_NAME = "trainer_state.json"
-OPTIMIZER_NAME = "optimizer.pt"
-SCHEDULER_NAME = "scheduler.pt"
-SCALER_NAME = "scaler.pt"
+
+logger = logging.get_logger(__name__)
class ModuleWithLoss(nn.Module):
@@ -168,44 +133,6 @@ def config(self):
return self._original_model.config
-class ORTFeaturesManager:
- _TASKS_TO_ORTMODELS = {
- "feature-extraction": ORTModelForFeatureExtraction,
- "fill-mask": ORTModelForMaskedLM,
- "text-generation": ORTModelForCausalLM,
- "text-generation-with-past": ORTModelForCausalLM,
- "text2text-generation": ORTModelForSeq2SeqLM,
- "text2text-generation-with-past": ORTModelForSeq2SeqLM,
- "text-classification": ORTModelForSequenceClassification,
- "token-classification": ORTModelForTokenClassification,
- "multiple-choice": ORTModelForMultipleChoice,
- "question-answering": ORTModelForQuestionAnswering,
- "image-classification": ORTModelForImageClassification,
- "semantic-segmentation": ORTModelForSemanticSegmentation,
- "automatic-speech-recognition": ORTModelForSpeechSeq2Seq,
- }
-
- SUPPORTED_FEATURES = _TASKS_TO_ORTMODELS.keys()
-
- @staticmethod
- def get_model_class_for_feature(feature: str) -> Type:
- """
- Gets the subclass of `ORTModel` associated with the feature.
- """
-
- return ORTFeaturesManager._TASKS_TO_ORTMODELS[feature]
-
- @staticmethod
- def do_use_cache(feature: str) -> bool:
- """
- Gets the value of `use_cache` for the feature.
- """
- if "-with-past" in feature:
- return True
- else:
- return False
-
-
class ORTTrainer(Trainer):
"""
ORTTrainer is a simple but feature-complete training and eval loop for ONNX Runtime, optimized for 🤗 Transformers.
@@ -286,18 +213,16 @@ class ORTTrainer(Trainer):
def __init__(
self,
model: Union[PreTrainedModel, nn.Module] = None,
- tokenizer: Optional[PreTrainedTokenizerBase] = None,
- feature: str = "feature-extraction",
args: ORTTrainingArguments = None,
data_collator: Optional[DataCollator] = None,
train_dataset: Optional[Dataset] = None,
eval_dataset: Optional[Union[Dataset, Dict[str, Dataset]]] = None,
+ tokenizer: Optional[PreTrainedTokenizerBase] = None,
model_init: Optional[Callable[[], PreTrainedModel]] = None,
compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
callbacks: Optional[List[TrainerCallback]] = None,
optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None,
- onnx_model_path: Union[str, os.PathLike] = None,
):
super().__init__(
model=model,
@@ -323,9 +248,6 @@ def __init__(
self.model = model
- self.feature = feature
- self.onnx_model_path = onnx_model_path
- self.exported_with_loss = False
if self.args.local_rank:
torch.cuda.set_device(self.args.local_rank)
@@ -437,7 +359,12 @@ def train(
if resume_from_checkpoint is None:
raise ValueError(f"No valid checkpoint found in output directory ({args.output_dir})")
- if resume_from_checkpoint is not None and not is_sagemaker_mp_enabled() and args.deepspeed is None:
+ if (
+ resume_from_checkpoint is not None
+ and not is_sagemaker_mp_enabled()
+ and not self.is_deepspeed_enabled
+ and not self.is_fsdp_enabled
+ ):
self._load_from_checkpoint(resume_from_checkpoint)
# If model was re-initialized, put it on the right device and update self.model_wrapped
@@ -449,12 +376,25 @@ def train(
inner_training_loop = find_executable_batch_size(
self._inner_training_loop, self._train_batch_size, args.auto_find_batch_size
)
- return inner_training_loop(
- args=args,
- resume_from_checkpoint=resume_from_checkpoint,
- trial=trial,
- ignore_keys_for_eval=ignore_keys_for_eval,
- )
+ if args.push_to_hub:
+ try:
+ # Disable progress bars when uploading models during checkpoints to avoid polluting stdout
+ hf_hub_utils.disable_progress_bars()
+ return inner_training_loop(
+ args=args,
+ resume_from_checkpoint=resume_from_checkpoint,
+ trial=trial,
+ ignore_keys_for_eval=ignore_keys_for_eval,
+ )
+ finally:
+ hf_hub_utils.enable_progress_bars()
+ else:
+ return inner_training_loop(
+ args=args,
+ resume_from_checkpoint=resume_from_checkpoint,
+ trial=trial,
+ ignore_keys_for_eval=ignore_keys_for_eval,
+ )
def _inner_training_loop(
self, batch_size=None, args=None, resume_from_checkpoint=None, trial=None, ignore_keys_for_eval=None
@@ -504,14 +444,6 @@ def _inner_training_loop(
f" {args.max_steps}"
)
- # Compute absolute values for logging, eval, and save if given as ratio
- if args.logging_steps and args.logging_steps < 1:
- args.logging_steps = math.ceil(max_steps * args.logging_steps)
- if args.eval_steps and args.eval_steps < 1:
- args.eval_steps = math.ceil(max_steps * args.eval_steps)
- if args.save_steps and args.save_steps < 1:
- args.save_steps = math.ceil(max_steps * args.save_steps)
-
if DebugOption.UNDERFLOW_OVERFLOW in self.args.debug:
if self.args.n_gpu > 1:
# nn.DataParallel(model) replicates the model, creating new variables and module
@@ -556,13 +488,30 @@ def _inner_training_loop(
self.state = TrainerState()
self.state.is_hyper_param_search = trial is not None
+ # Compute absolute values for logging, eval, and save if given as ratio
+ if args.logging_steps is not None:
+ if args.logging_steps < 1:
+ self.state.logging_steps = math.ceil(max_steps * args.logging_steps)
+ else:
+ self.state.logging_steps = args.logging_steps
+ if args.eval_steps is not None:
+ if args.eval_steps < 1:
+ self.state.eval_steps = math.ceil(max_steps * args.eval_steps)
+ else:
+ self.state.eval_steps = args.eval_steps
+ if args.save_steps is not None:
+ if args.save_steps < 1:
+ self.state.save_steps = math.ceil(max_steps * args.save_steps)
+ else:
+ self.state.save_steps = args.save_steps
+
# Activate gradient checkpointing if needed
if args.gradient_checkpointing:
self.model.gradient_checkpointing_enable()
model = self._wrap_model(self.model_wrapped) # Wrap unless the ORTModule is already wrapped, eg. wrap DDP
- if is_sagemaker_mp_enabled() and resume_from_checkpoint is not None:
+ if (is_sagemaker_mp_enabled() or self.is_fsdp_enabled) and resume_from_checkpoint is not None:
self._load_from_checkpoint(resume_from_checkpoint, model)
# as the model is wrapped, don't use `accelerator.prepare`
@@ -688,11 +637,27 @@ def _inner_training_loop(
self.control = self.callback_handler.on_train_begin(args, self.state, self.control)
+ # Temp: remove after transformers 4.34 release
+ def get_dataloader_sampler(dataloader):
+ if hasattr(dataloader, "batch_sampler") and dataloader.batch_sampler is not None:
+ return get_dataloader_sampler(dataloader.batch_sampler)
+ elif hasattr(dataloader, "sampler"):
+ return dataloader.sampler
+
# Skip the first epochs_trained epochs to get the random state of the dataloader at the right point.
if not args.ignore_data_skip:
for epoch in range(epochs_trained):
- for _ in train_dataloader:
- break
+ sampler = get_dataloader_sampler(train_dataloader)
+ is_random_sampler = isinstance(sampler, RandomSampler)
+ if not is_random_sampler:
+ # We just need to begin an iteration to create the randomization of the sampler.
+ for _ in train_dataloader:
+ break
+ else:
+ # Otherwise we need to call the whooooole sampler cause there is some random operation added
+ # AT THE VERY END!
+ sampler = sampler if sampler is not None else []
+ _ = list(sampler)
total_batched_samples = 0
for epoch in range(epochs_trained, num_train_epochs):
@@ -703,7 +668,7 @@ def _inner_training_loop(
self._past = None
steps_in_epoch = (
- len(train_dataloader)
+ len(epoch_iterator)
if len_dataloader is not None
else args.max_steps * args.gradient_accumulation_steps
)
@@ -715,13 +680,13 @@ def _inner_training_loop(
rng_to_sync = False
steps_skipped = 0
if steps_trained_in_current_epoch > 0:
- skip_first_batches(epoch_iterator, steps_trained_in_current_epoch)
+ epoch_iterator = skip_first_batches(epoch_iterator, steps_trained_in_current_epoch)
steps_skipped = steps_trained_in_current_epoch
steps_trained_in_current_epoch = 0
rng_to_sync = True
step = -1
- for step, inputs in enumerate(train_dataloader):
+ for step, inputs in enumerate(epoch_iterator):
total_batched_samples += 1
if rng_to_sync:
self._load_rng_state(resume_from_checkpoint)
@@ -864,759 +829,16 @@ def _inner_training_loop(
# Delete the last checkpoint when save_total_limit=1 if it's different from the best checkpoint and process allowed to save.
if self.args.should_save and self.state.best_model_checkpoint is not None and self.args.save_total_limit == 1:
for checkpoint in checkpoints_sorted:
- if checkpoint != self.state.best_model_checkpoint:
+ if not os.path.samefile(checkpoint, self.state.best_model_checkpoint):
logger.info(f"Deleting older checkpoint [{checkpoint}] due to args.save_total_limit")
shutil.rmtree(checkpoint)
self.control = self.callback_handler.on_train_end(args, self.state, self.control)
- return TrainOutput(self.state.global_step, train_loss, metrics)
-
- def evaluate(
- self,
- eval_dataset: Optional[Dataset] = None,
- ignore_keys: Optional[List[str]] = None,
- metric_key_prefix: str = "eval",
- inference_with_ort: bool = False,
- ) -> Dict[str, float]:
- """
- Run evaluation with ONNX Runtime or PyTorch backend and returns metrics.
-
- Args:
- eval_dataset (`Dataset`, *optional*):
- Pass a dataset if you wish to override `self.eval_dataset`. If it is a [`~datasets.Dataset`], columns
- not accepted by the `model.forward()` method are automatically removed. It must implement the `__len__`
- method.
- ignore_keys (`List[str]`, *optional*):
- A list of keys in the output of your model (if it is a dictionary) that should be ignored when
- gathering predictions.
- metric_key_prefix (`str`, *optional*, defaults to `"eval"`):
- An optional prefix to be used as the metrics key prefix. For example the metrics "bleu" will be named
- "eval_bleu" if the prefix is "eval" (default)
-
- Returns:
- A dictionary containing the evaluation loss and the potential metrics computed from the predictions. The
- dictionary also contains the epoch number which comes from the training state.
- """
- # memory metrics - must set up as early as possible
- # TODO: We need to enable evaluation using ORT backend.
- if self.args.use_module_with_loss:
- self.model = self.model._original_model
- self._memory_tracker.start()
-
- eval_dataloader = self.get_eval_dataloader(eval_dataset)
- start_time = time.time()
-
- if inference_with_ort:
- logger.info("[INFO] Evaluating with ONNX Runtime backend.")
- eval_loop = self.prediction_loop_ort if self.args.use_legacy_prediction_loop else self.evaluation_loop_ort
- else:
- logger.info(
- "[INFO] Evaluating with PyTorch backend. If you want to use ONNX Runtime for the evaluation, set `trainer.evaluate(inference_with_ort=True)`."
- )
- eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop
-
- try:
- output = eval_loop(
- eval_dataloader,
- description="Evaluation",
- # No point gathering the predictions if there are no metrics, otherwise we defer to
- # self.args.prediction_loss_only
- prediction_loss_only=True if self.compute_metrics is None else None,
- ignore_keys=ignore_keys,
- metric_key_prefix=metric_key_prefix,
- )
- except Exception as error:
- logger.error(error)
- if inference_with_ort:
- logger.error(
- f"[ERROR!] Evaluation with ONNX Runtime is not available for {self.model.config.name_or_path} model. Set `inference_with_ort=False` to evaluate with PyTorch."
- )
- raise
-
- total_batch_size = self.args.eval_batch_size * self.args.world_size
- if f"{metric_key_prefix}_jit_compilation_time" in output.metrics:
- start_time += output.metrics[f"{metric_key_prefix}_jit_compilation_time"]
- output.metrics.update(
- speed_metrics(
- metric_key_prefix,
- start_time,
- num_samples=output.num_samples,
- num_steps=math.ceil(output.num_samples / total_batch_size),
- )
- )
-
- self.log(output.metrics)
-
- self.control = self.callback_handler.on_evaluate(self.args, self.state, self.control, output.metrics)
-
- self._memory_tracker.stop_and_update_metrics(output.metrics)
-
- return output.metrics
-
- def predict(
- self,
- test_dataset: Dataset,
- ignore_keys: Optional[List[str]] = None,
- metric_key_prefix: str = "test",
- inference_with_ort: bool = False,
- ) -> PredictionOutput:
- """
- Run prediction and returns predictions and potential metrics.
-
- Depending on the dataset and your use case, your test dataset may contain labels. In that case, this method
- will also return metrics, like in `evaluate()`.
-
- Args:
- test_dataset (`Dataset`):
- Dataset to run the predictions on. If it is an `datasets.Dataset`, columns not accepted by the
- `model.forward()` method are automatically removed. Has to implement the method `__len__`
- ignore_keys (`List[str]`, *optional*):
- A list of keys in the output of your model (if it is a dictionary) that should be ignored when
- gathering predictions.
- metric_key_prefix (`str`, *optional*, defaults to `"test"`):
- An optional prefix to be used as the metrics key prefix. For example the metrics "bleu" will be named
- "test_bleu" if the prefix is "test" (default)
-
-
-
- If your predictions or labels have different sequence length (for instance because you're doing dynamic padding
- in a token classification task) the predictions will be padded (on the right) to allow for concatenation into
- one array. The padding index is -100.
-
-
-
- Returns: *NamedTuple* A namedtuple with the following keys:
-
- - predictions (`np.ndarray`): The predictions on `test_dataset`.
- - label_ids (`np.ndarray`, *optional*): The labels (if the dataset contained some).
- - metrics (`Dict[str, float]`, *optional*): The potential dictionary of metrics (if the dataset contained
- labels).
- """
- # TODO: We need to enable evaluation using ORT backend.
- if self.args.use_module_with_loss:
- self.model = self.model._original_model
-
- # memory metrics - must set up as early as possible
- self._memory_tracker.start()
-
- test_dataloader = self.get_test_dataloader(test_dataset)
- start_time = time.time()
-
- if inference_with_ort:
- logger.info("[INFO] Predicting with ONNX Runtime backend.")
- eval_loop = self.prediction_loop_ort if self.args.use_legacy_prediction_loop else self.evaluation_loop_ort
- else:
- logger.info(
- "[INFO] Predicting with PyTorch backend. If you want to use ONNX Runtime for the prediction, set `trainer.predict(inference_with_ort=True)`."
- )
- eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop
-
- try:
- output = eval_loop(
- test_dataloader, description="Prediction", ignore_keys=ignore_keys, metric_key_prefix=metric_key_prefix
- )
- except Exception as error:
- logger.error(error)
- if inference_with_ort:
- logger.error(
- f"[ERROR!] Prediction with ONNX Runtime is not available for {self.model.config.name_or_path} model. Set `inference_with_ort=False` to predict with PyTorch."
- )
- raise
-
- total_batch_size = self.args.eval_batch_size * self.args.world_size
- if f"{metric_key_prefix}_jit_compilation_time" in output.metrics:
- start_time += output.metrics[f"{metric_key_prefix}_jit_compilation_time"]
- output.metrics.update(
- speed_metrics(
- metric_key_prefix,
- start_time,
- num_samples=output.num_samples,
- num_steps=math.ceil(output.num_samples / total_batch_size),
- )
- )
-
- self._memory_tracker.stop_and_update_metrics(output.metrics)
-
- return PredictionOutput(predictions=output.predictions, label_ids=output.label_ids, metrics=output.metrics)
-
- def evaluation_loop_ort(
- self,
- dataloader: DataLoader,
- description: str,
- prediction_loss_only: Optional[bool] = None,
- ignore_keys: Optional[List[str]] = None,
- metric_key_prefix: str = "eval",
- ) -> EvalLoopOutput:
- """
- Prediction/evaluation loop, shared by `ORTTrainer.evaluate()` and `ORTTrainer.predict()`.
-
- Works both with or without labels.
- """
- logger.info("[INFO] ONNX Runtime inference starts...")
-
- # Check if there are labels in the dataset
- dummy_inputs = next(iter(dataloader))
- has_labels = all(dummy_inputs.get(k) is not None for k in self.label_names)
- use_cache = ORTFeaturesManager.do_use_cache(self.feature)
-
- if self.onnx_model_path and (has_labels == self.exported_with_loss):
- logger.info("[INFO] Inference with given ONNX model")
- self.onnx_model_path = Path(self.onnx_model_path).as_posix()
- else:
- onnx_model_path = Path(self.args.output_dir)
-
- logger.info("[INFO] Exporting the model to ONNX...")
- if self.args.deepspeed and self.args.fp16:
- export_device = "cuda"
- else:
- export_device = "cpu"
-
- # With `label_smoother` the loss will be computed outside modeling
- with_loss = has_labels and not self.label_smoother
- self._export(onnx_model_path, with_loss=with_loss, device=export_device, use_cache=use_cache)
-
- self.exported_with_loss = with_loss
- self.onnx_model_path = onnx_model_path.as_posix()
- logger.info(f"[INFO] ONNX model is stored in: {self.onnx_model_path}")
-
- # Load ORT model
- support_loss_in_modeling = self.feature in [
- "text-generation",
- "text-generation-with-past",
- "text2text-generation",
- "text2text-generation-with-past",
- ]
- support_feature = self.feature in ORTFeaturesManager.SUPPORTED_FEATURES
- if support_loss_in_modeling or (not self.exported_with_loss and support_feature):
- # Exported with standard outputs, use specific ORTModels
- ort_model_cls = ORTFeaturesManager.get_model_class_for_feature(self.feature)
- else:
- ort_model_cls = ORTModelForCustomTasks
-
- model_id = self.onnx_model_path
- args = self.args
- if ort_model_cls is ORTModelForCausalLM:
- ort_model = ort_model_cls.from_pretrained(model_id=model_id, use_cache=use_cache).to(args.device)
- else:
- ort_model = ort_model_cls.from_pretrained(model_id=model_id).to(args.device)
-
- prediction_loss_only = prediction_loss_only if prediction_loss_only is not None else args.prediction_loss_only
-
- batch_size = dataloader.batch_size
-
- logger.info(f"***** Running {description} *****")
- if has_length(dataloader):
- logger.info(f" Num examples = {self.num_examples(dataloader)}")
- else:
- logger.info(" Num examples: Unknown")
- logger.info(f" Batch size = {batch_size}")
-
- self.callback_handler.eval_dataloader = dataloader
- # Do this before wrapping.
- eval_dataset = getattr(dataloader, "dataset", None)
-
- if args.past_index >= 0:
- self._past = None
-
- # Initialize containers
- # losses/preds/labels on GPU/TPU (accumulated for eval_accumulation_steps)
- losses_host = None
- preds_host = None
- labels_host = None
- inputs_host = None
-
- # losses/preds/labels on CPU (final containers)
- all_losses = None
- all_preds = None
- all_labels = None
- all_inputs = None
- # Will be useful when we have an iterable dataset so don't know its length.
-
- observed_num_examples = 0
- # Main evaluation loop
- for step, inputs in enumerate(dataloader):
- # Update the observed num examples
- observed_batch_size = find_batch_size(inputs)
- if observed_batch_size is not None:
- observed_num_examples += observed_batch_size
- # For batch samplers, batch_size is not known by the dataloader in advance.
- if batch_size is None:
- batch_size = observed_batch_size
-
- # Prediction step(send also onnxruntime inference session)
- loss, logits, labels = self.prediction_step_ort(
- ort_model, inputs, prediction_loss_only, ignore_keys=ignore_keys
- )
- inputs_decode = inputs["input_ids"] if args.include_inputs_for_metrics else None
-
- # Update containers on host
- if loss is not None:
- losses = self.accelerator.gather_for_metrics((loss.repeat(batch_size)))
- losses_host = losses if losses_host is None else nested_concat(losses_host, losses, padding_index=-100)
- if labels is not None:
- labels = self.accelerator.pad_across_processes(labels, dim=1, pad_index=-100)
- if inputs_decode is not None:
- inputs_decode = self.accelerator.pad_across_processes(inputs_decode, dim=1, pad_index=-100)
- inputs_decode = self.accelerator.gather_for_metrics((inputs_decode))
- inputs_host = (
- inputs_decode
- if inputs_host is None
- else nested_concat(inputs_host, inputs_decode, padding_index=-100)
- )
- if logits is not None:
- logits = self.accelerator.pad_across_processes(logits, dim=1, pad_index=-100)
- if self.preprocess_logits_for_metrics is not None:
- logits = self.preprocess_logits_for_metrics(logits, labels)
- logits = self.accelerator.gather_for_metrics((logits))
- preds_host = logits if preds_host is None else nested_concat(preds_host, logits, padding_index=-100)
-
- if labels is not None:
- labels = self.accelerator.gather_for_metrics((labels))
- labels_host = labels if labels_host is None else nested_concat(labels_host, labels, padding_index=-100)
-
- self.control = self.callback_handler.on_prediction_step(args, self.state, self.control)
-
- # Gather all tensors and put them back on the CPU if we have done enough accumulation steps.
- if args.eval_accumulation_steps is not None and self.accelerator.sync_gradients:
- if losses_host is not None:
- losses = nested_numpify(losses_host)
- all_losses = losses if all_losses is None else np.concatenate((all_losses, losses), axis=0)
- if preds_host is not None:
- logits = nested_numpify(preds_host)
- all_preds = logits if all_preds is None else nested_concat(all_preds, logits, padding_index=-100)
- if inputs_host is not None:
- inputs_decode = nested_numpify(inputs_host)
- all_inputs = (
- inputs_decode
- if all_inputs is None
- else nested_concat(all_inputs, inputs_decode, padding_index=-100)
- )
- if labels_host is not None:
- labels = nested_numpify(labels_host)
- all_labels = (
- labels if all_labels is None else nested_concat(all_labels, labels, padding_index=-100)
- )
-
- # Set back to None to begin a new accumulation
- losses_host, preds_host, inputs_host, labels_host = None, None, None, None
-
- if args.past_index and hasattr(self, "_past"):
- # Clean the state at the end of the evaluation loop
- delattr(self, "_past")
-
- # Gather all remaining tensors and put them back on the CPU
- if losses_host is not None:
- losses = nested_numpify(losses_host)
- all_losses = losses if all_losses is None else np.concatenate((all_losses, losses), axis=0)
- if preds_host is not None:
- logits = nested_numpify(preds_host)
- all_preds = logits if all_preds is None else nested_concat(all_preds, logits, padding_index=-100)
- if inputs_host is not None:
- inputs_decode = nested_numpify(inputs_host)
- all_inputs = (
- inputs_decode if all_inputs is None else nested_concat(all_inputs, inputs_decode, padding_index=-100)
- )
- if labels_host is not None:
- labels = nested_numpify(labels_host)
- all_labels = labels if all_labels is None else nested_concat(all_labels, labels, padding_index=-100)
-
- # Number of samples
- if has_length(eval_dataset):
- num_samples = len(eval_dataset)
- # The instance check is weird and does not actually check for the type, but whether the dataset has the right
- # methods. Therefore we need to make sure it also has the attribute.
- elif isinstance(eval_dataset, IterableDatasetShard) and getattr(eval_dataset, "num_examples", 0) > 0:
- num_samples = eval_dataset.num_examples
- else:
- if has_length(dataloader):
- num_samples = self.num_examples(dataloader)
- else: # both len(dataloader.dataset) and len(dataloader) fail
- num_samples = observed_num_examples
- if num_samples == 0 and observed_num_examples > 0:
- num_samples = observed_num_examples
-
- # Metrics!
- if self.compute_metrics is not None and all_preds is not None and all_labels is not None:
- if args.include_inputs_for_metrics:
- metrics = self.compute_metrics(
- EvalPrediction(predictions=all_preds, label_ids=all_labels, inputs=all_inputs)
- )
- else:
- metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels))
- else:
- metrics = {}
-
- # To be JSON-serializable, we need to remove numpy types or zero-d tensors
- metrics = denumpify_detensorize(metrics)
+ # Wait for the checkpoint to be uploaded.
+ self._finish_current_push()
- if all_losses is not None:
- metrics[f"{metric_key_prefix}_loss"] = all_losses.mean().item()
- if hasattr(self, "jit_compilation_time"):
- metrics[f"{metric_key_prefix}_jit_compilation_time"] = self.jit_compilation_time
-
- # Prefix all keys with metric_key_prefix + '_'
- for key in list(metrics.keys()):
- if not key.startswith(f"{metric_key_prefix}_"):
- metrics[f"{metric_key_prefix}_{key}"] = metrics.pop(key)
-
- return EvalLoopOutput(predictions=all_preds, label_ids=all_labels, metrics=metrics, num_samples=num_samples)
-
- def prediction_loop_ort(
- self,
- dataloader: DataLoader,
- description: str,
- prediction_loss_only: Optional[bool] = None,
- ignore_keys: Optional[List[str]] = None,
- metric_key_prefix: str = "eval",
- ) -> EvalLoopOutput:
- """
- Prediction/evaluation loop, shared by `ORTTrainer.evaluate()` and `ORTTrainer.predict()`.
-
- Works both with or without labels.
- """
- logger.info("[INFO] ONNX Runtime inference starts...")
-
- # Check if there are labels in the dataset
- dummy_inputs = next(iter(dataloader))
- has_labels = all(dummy_inputs.get(k) is not None for k in self.label_names)
- use_cache = ORTFeaturesManager.do_use_cache(self.feature)
-
- if self.onnx_model_path and (has_labels == self.exported_with_loss):
- logger.info("[INFO] Inference with given ONNX model")
- self.onnx_model_path = Path(self.onnx_model_path).as_posix()
- else:
- onnx_model_path = Path(self.args.output_dir)
-
- logger.info("[INFO] Exporting the model to ONNX...")
- if self.args.deepspeed and self.args.fp16:
- export_device = "cuda"
- else:
- export_device = "cpu"
-
- # With `label_smoother` the loss will be computed outside modeling
- with_loss = has_labels and not self.label_smoother
- self._export(onnx_model_path, with_loss=with_loss, device=export_device, use_cache=use_cache)
-
- self.exported_with_loss = with_loss
- self.onnx_model_path = onnx_model_path.as_posix()
- logger.info("[INFO] ONNX model is stored in:\n", self.onnx_model_path)
-
- # Load ORT model
- support_loss_in_modeling = self.feature in [
- "text-generation",
- "text-generation-with-past",
- "text2text-generation",
- "text2text-generation-with-past",
- ]
- support_feature = self.feature in ORTFeaturesManager.SUPPORTED_FEATURES
- if support_loss_in_modeling or (not self.exported_with_loss and support_feature):
- # Exported with standard outputs, use specific ORTModels
- ort_model_cls = ORTFeaturesManager.get_model_class_for_feature(self.feature)
- else:
- ort_model_cls = ORTModelForCustomTasks
-
- model_id = self.onnx_model_path
- args = self.args
- if ort_model_cls is ORTModelForCausalLM:
- ort_model = ort_model_cls.from_pretrained(model_id=model_id, use_cache=use_cache).to(args.device)
- else:
- ort_model = ort_model_cls.from_pretrained(model_id=model_id).to(args.device)
-
- if not has_length(dataloader):
- raise ValueError("dataloader must implement a working __len__")
-
- prediction_loss_only = prediction_loss_only if prediction_loss_only is not None else args.prediction_loss_only
-
- batch_size = dataloader.batch_size
- num_examples = self.num_examples(dataloader)
- logger.info(f"***** Running {description} *****")
- logger.info(f" Num examples = {num_examples}")
- logger.info(f" Batch size = {batch_size}")
- losses_host: torch.Tensor = None
- preds_host: Union[torch.Tensor, List[torch.Tensor]] = None
- labels_host: Union[torch.Tensor, List[torch.Tensor]] = None
- inputs_host: Union[torch.Tensor, List[torch.Tensor]] = None
-
- world_size = max(1, args.world_size)
-
- eval_losses_gatherer = DistributedTensorGatherer(world_size, num_examples, make_multiple_of=batch_size)
- if not prediction_loss_only:
- # The actual number of eval_sample can be greater than num_examples in distributed settings (when we pass
- # a batch size to the sampler)
- make_multiple_of = None
- if hasattr(dataloader, "sampler") and isinstance(dataloader.sampler, SequentialDistributedSampler):
- make_multiple_of = dataloader.sampler.batch_size
- preds_gatherer = DistributedTensorGatherer(world_size, num_examples, make_multiple_of=make_multiple_of)
- labels_gatherer = DistributedTensorGatherer(world_size, num_examples, make_multiple_of=make_multiple_of)
- inputs_gatherer = DistributedTensorGatherer(world_size, num_examples, make_multiple_of=make_multiple_of)
-
- if args.past_index >= 0:
- self._past = None
-
- self.callback_handler.eval_dataloader = dataloader
-
- for step, inputs in enumerate(dataloader):
- loss, logits, labels = self.prediction_step_ort(
- ort_model, inputs, prediction_loss_only, ignore_keys=ignore_keys
- )
- inputs_decode = inputs["input_ids"] if args.include_inputs_for_metrics else None
-
- if loss is not None:
- losses = loss.repeat(batch_size)
- losses_host = losses if losses_host is None else torch.cat((losses_host, losses), dim=0)
- if logits is not None:
- preds_host = logits if preds_host is None else nested_concat(preds_host, logits, padding_index=-100)
- if labels is not None:
- labels_host = labels if labels_host is None else nested_concat(labels_host, labels, padding_index=-100)
- if inputs_decode is not None:
- inputs_host = (
- inputs_decode
- if inputs_host is None
- else nested_concat(inputs_host, inputs_decode, padding_index=-100)
- )
- self.control = self.callback_handler.on_prediction_step(args, self.state, self.control)
-
- # Gather all tensors and put them back on the CPU if we have done enough accumulation steps.
- if args.eval_accumulation_steps is not None and (step + 1) % args.eval_accumulation_steps == 0:
- eval_losses_gatherer.add_arrays(self._gather_and_numpify(losses_host, "eval_losses"))
- if not prediction_loss_only:
- preds_gatherer.add_arrays(self._gather_and_numpify(preds_host, "eval_preds"))
- labels_gatherer.add_arrays(self._gather_and_numpify(labels_host, "eval_label_ids"))
- inputs_gatherer.add_arrays(self._gather_and_numpify(inputs_host, "eval_inputs_ids"))
-
- # Set back to None to begin a new accumulation
- losses_host, preds_host, labels_host, inputs_host = None, None, None, None
-
- if args.past_index and hasattr(self, "_past"):
- # Clean the state at the end of the evaluation loop
- delattr(self, "_past")
-
- # Gather all remaining tensors and put them back on the CPU
- eval_losses_gatherer.add_arrays(self._gather_and_numpify(losses_host, "eval_losses"))
- if not prediction_loss_only:
- preds_gatherer.add_arrays(self._gather_and_numpify(preds_host, "eval_preds"))
- labels_gatherer.add_arrays(self._gather_and_numpify(labels_host, "eval_label_ids"))
- inputs_gatherer.add_arrays(self._gather_and_numpify(inputs_host, "eval_inputs_ids"))
-
- eval_loss = eval_losses_gatherer.finalize()
- preds = preds_gatherer.finalize() if not prediction_loss_only else None
- label_ids = labels_gatherer.finalize() if not prediction_loss_only else None
- inputs_ids = inputs_gatherer.finalize() if not prediction_loss_only else None
-
- if self.compute_metrics is not None and preds is not None and label_ids is not None:
- if args.include_inputs_for_metrics:
- metrics = self.compute_metrics(
- EvalPrediction(predictions=preds, label_ids=label_ids, inputs=inputs_ids)
- )
- else:
- metrics = self.compute_metrics(EvalPrediction(predictions=preds, label_ids=label_ids))
- else:
- metrics = {}
-
- # To be JSON-serializable, we need to remove numpy types or zero-d tensors
- metrics = denumpify_detensorize(metrics)
-
- if eval_loss is not None:
- metrics[f"{metric_key_prefix}_loss"] = eval_loss.mean().item()
-
- # Prefix all keys with metric_key_prefix + '_'
- for key in list(metrics.keys()):
- if not key.startswith(f"{metric_key_prefix}_"):
- metrics[f"{metric_key_prefix}_{key}"] = metrics.pop(key)
-
- return EvalLoopOutput(predictions=preds, label_ids=label_ids, metrics=metrics, num_samples=num_examples)
-
- def prediction_step_ort(
- self,
- model: ORTModel,
- inputs: Dict[str, Union[torch.Tensor, Any]],
- prediction_loss_only: bool,
- ignore_keys: Optional[List[str]] = None,
- ) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]:
- """
- Perform an evaluation step on `model` using `inputs`.
-
- Args:
- model (`ORTModel`):
- The model to evaluate.
- inputs (`Dict[str, Union[torch.Tensor, Any]]`):
- The inputs and targets of the model.
-
- The dictionary will be unpacked before being fed to the model. Most models expect the targets under the
- argument `labels`. Check your model's documentation for all accepted arguments.
- prediction_loss_only (`bool`):
- Whether or not to return the loss only.
- ignore_keys (`List[str]`, *optional*):
- A list of keys in the output of your model (if it is a dictionary) that should be ignored when
- gathering predictions.
-
- Return:
- Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]: A tuple with the loss,
- logits and labels (each being optional).
- """
- has_labels = False if len(self.label_names) == 0 else all(inputs.get(k) is not None for k in self.label_names)
- # For CLIP-like models capable of returning loss values.
- # If `return_loss` is not specified or being `None` in `inputs`, we check if the default value of `return_loss`
- # is `True` in `model.forward`.
- return_loss = inputs.get("return_loss", None)
- if return_loss is None:
- return_loss = self.can_return_loss
- loss_without_labels = True if len(self.label_names) == 0 and return_loss else False
-
- inputs = self._prepare_inputs(inputs)
-
- if ignore_keys is None:
- if hasattr(self.model, "config"):
- ignore_keys = getattr(self.model.config, "keys_to_ignore_at_inference", [])
- else:
- ignore_keys = []
-
- # labels may be popped when computing the loss (label smoothing for instance) so we grab them first.
- if has_labels or loss_without_labels:
- labels = nested_detach(tuple(inputs.get(name) for name in self.label_names))
- if len(labels) == 1:
- labels = labels[0]
- else:
- labels = None
-
- with torch.no_grad():
- if is_sagemaker_mp_enabled():
- raise NotImplementedError(
- "Sagemaker's distributed data parallel features are not supported by `ORTTrainer` yet."
- )
- else:
- if has_labels or loss_without_labels:
- with self.compute_loss_context_manager():
- loss, outputs = self.compute_loss_ort(model, inputs, return_outputs=True)
- loss = loss.mean().detach()
-
- if isinstance(outputs, dict):
- logits = tuple(v for k, v in outputs.items() if k not in ignore_keys + ["loss"])
- else:
- logits = outputs[1:]
- else:
- loss = None
- with self.compute_loss_context_manager():
- outputs = model(**inputs)
- if isinstance(outputs, dict):
- logits = tuple(v for k, v in outputs.items() if k not in ignore_keys)
- else:
- logits = outputs
- # TODO: this needs to be fixed and made cleaner later.
- if self.args.past_index >= 0:
- self._past = outputs[self.args.past_index - 1]
-
- if prediction_loss_only:
- return (loss, None, None)
-
- logits = nested_detach(logits)
- if len(logits) == 1:
- logits = logits[0]
-
- return (loss, logits, labels)
-
- def compute_loss_ort(self, model, inputs, return_outputs=False):
- """
- How the loss is computed by ORTTrainer. By default, all models return the loss in the first element.
- Subclass and override for custom behavior.
- """
- if self.label_smoother is not None and "labels" in inputs:
- labels = inputs.pop("labels")
- else:
- labels = None
- outputs = model(**inputs)
- # Save past state if it exists
- # TODO: this needs to be fixed and made cleaner later.
- if self.args.past_index >= 0:
- self._past = outputs[self.args.past_index]
-
- if labels is not None:
- if "text-generation" in self.feature:
- loss = self.label_smoother(outputs, labels, shift_labels=True)
- else:
- loss = self.label_smoother(outputs, labels)
- else:
- if isinstance(outputs, dict) and "loss" not in outputs:
- raise ValueError(
- "The model did not return a loss from the inputs, only the following keys: "
- f"{','.join(outputs.keys())}. For reference, the inputs it received are {','.join(inputs.keys())}."
- )
- # We don't use .loss here since the model may return tuples instead of ModelOutput.
- loss = outputs["loss"] if isinstance(outputs, dict) else outputs[0]
-
- return (loss, outputs) if return_outputs else loss
-
- def _export(
- self,
- model_path: os.PathLike,
- model: Optional[PreTrainedModel] = None,
- opset: Optional[int] = None,
- device: str = "cpu",
- with_loss: bool = True,
- use_cache: bool = False,
- ) -> None:
- """
- Load and export a model to an ONNX format.
-
- Args:
- model_path (`os.PathLike`):
- The path used to save the model exported to an ONNX format.
- model ([`PreTrainedModel`], *optional*):
- The model to export. If not provided, a `model_path` must be passed.
- opset (`int`, *optional*):
- ONNX opset version to export the model with.
- device (`str`, *optional*, defaults to `cpu`):
- The device on which the ONNX model will be exported. Either `cpu` or `cuda`.
- with_loss (`bool`, defaults to `True`):
- Whether to export ONNX model with the loss in outputs.
- """
- if model is None:
- if not (self.args.fp16 and self.args.deepspeed):
- # Taking CPU to export the model
- self.model.to("cpu")
- model = unwrap_model(self.model)
-
- onnx_config_constructor = TasksManager.get_exporter_config_constructor(
- model=model, exporter="onnx", task=self.feature
- )
- onnx_config = onnx_config_constructor(model.config)
- opset = onnx_config.DEFAULT_ONNX_OPSET if opset is None else opset
-
- is_decoder = isinstance(onnx_config, OnnxConfigWithPast)
-
- if is_decoder:
- output_names = [ONNX_DECODER_NAME]
- if use_cache is True:
- output_names.append(ONNX_DECODER_WITH_PAST_NAME)
-
- models_and_onnx_configs = get_decoder_models_for_export(model, onnx_config)
- if with_loss is True:
- opset = max(opset, 12)
- models_and_onnx_configs_with_loss = {}
- for decoder_name, (decoder, decoder_config) in models_and_onnx_configs.items():
- models_and_onnx_configs_with_loss[decoder_name] = (
- decoder,
- wrap_onnx_config_for_loss(decoder_config),
- )
-
- export_models(
- models_and_onnx_configs=models_and_onnx_configs_with_loss if with_loss else models_and_onnx_configs,
- opset=opset,
- output_dir=model_path,
- output_names=output_names,
- device=device,
- disable_dynamic_axes_fix=True, # onnxruntime floating point exception (core dumped)
- )
- else:
- if with_loss is True:
- onnx_config = wrap_onnx_config_for_loss(onnx_config)
- opset = max(opset, 12) # Operators like `nll_loss`are added for opset>=12
-
- output_path = model_path / ONNX_WEIGHTS_NAME
- _ = export(model=model, config=onnx_config, opset=opset, output=output_path, device=device)
-
- model.config.save_pretrained(model_path)
+ return TrainOutput(self.state.global_step, train_loss, metrics)
def _wrap_model(self, model, training=True, dataloader=None):
# TODO: ipex only works with inference with PyTorch, will move `inference_with_ort` to training arguments and
@@ -1674,18 +896,24 @@ def _wrap_model(self, model, training=True, dataloader=None):
auto_wrap_policy = None
auto_wrapper_callable = None
- if self.args.fsdp_config["fsdp_min_num_params"] > 0:
+ default_transformer_cls_names_to_wrap = getattr(model, "_no_split_modules", None)
+ fsdp_transformer_layer_cls_to_wrap = self.args.fsdp_config.get(
+ "transformer_layer_cls_to_wrap", default_transformer_cls_names_to_wrap
+ )
+
+ if self.args.fsdp_config["min_num_params"] > 0:
auto_wrap_policy = functools.partial(
- size_based_auto_wrap_policy, min_num_params=self.args.fsdp_config["fsdp_min_num_params"]
+ size_based_auto_wrap_policy, min_num_params=self.args.fsdp_config["min_num_params"]
)
- elif self.args.fsdp_config.get("fsdp_transformer_layer_cls_to_wrap", None) is not None:
+ elif fsdp_transformer_layer_cls_to_wrap is not None:
transformer_cls_to_wrap = set()
- for layer_class in self.args.fsdp_config["fsdp_transformer_layer_cls_to_wrap"]:
+ for layer_class in fsdp_transformer_layer_cls_to_wrap:
transformer_cls = get_module_class_from_name(model, layer_class)
if transformer_cls is None:
raise Exception("Could not find the transformer layer class to wrap in the model.")
else:
transformer_cls_to_wrap.add(transformer_cls)
+
auto_wrap_policy = functools.partial(
transformer_auto_wrap_policy,
# Transformer layer class to wrap
diff --git a/optimum/onnxruntime/trainer_seq2seq.py b/optimum/onnxruntime/trainer_seq2seq.py
index a281462fdef..2e43ee89e00 100644
--- a/optimum/onnxruntime/trainer_seq2seq.py
+++ b/optimum/onnxruntime/trainer_seq2seq.py
@@ -14,40 +14,25 @@
"""
The ORTSeq2SeqTrainer class, to easily train a sequence to sequence model in 🤗 Transformers from scratch or finetune it on a new task with ONNX Runtime.
"""
-from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union
-import numpy as np
import torch
from torch import nn
-from torch.utils.data import DataLoader, Dataset
+from torch.utils.data import Dataset
from transformers.deepspeed import is_deepspeed_zero3_enabled
-from transformers.modeling_utils import PreTrainedModel, unwrap_model
-from transformers.trainer_pt_utils import (
- DistributedTensorGatherer,
- IterableDatasetShard,
- SequentialDistributedSampler,
- find_batch_size,
- nested_concat,
- nested_numpify,
-)
-from transformers.trainer_utils import (
- EvalLoopOutput,
- EvalPrediction,
- PredictionOutput,
- denumpify_detensorize,
- has_length,
-)
-from transformers.utils import logging
-
-from ..exporters import TasksManager
-from ..exporters.onnx import export
-from .modeling_ort import ORTModel
-from .modeling_seq2seq import ORTModelForSeq2SeqLM
+from transformers.trainer_utils import PredictionOutput
+from transformers.utils import is_accelerate_available, logging
+
from .trainer import ORTTrainer
-from .utils import ONNX_DECODER_NAME, ONNX_DECODER_WITH_PAST_NAME, ONNX_ENCODER_NAME, wrap_onnx_config_for_loss
+if is_accelerate_available():
+ pass
+else:
+ raise ImportError(
+ "The package `accelerate` is required to use the ORTTrainer. Please install it following https://huggingface.co/docs/accelerate/basic_tutorials/install."
+ )
+
logger = logging.get_logger(__name__)
@@ -57,15 +42,19 @@ def evaluate(
eval_dataset: Optional[Dataset] = None,
ignore_keys: Optional[List[str]] = None,
metric_key_prefix: str = "eval",
- inference_with_ort: bool = False,
**gen_kwargs,
) -> Dict[str, float]:
"""
- Run evaluation with ONNX Runtime or PyTorch backend and returns metrics.
+ Run evaluation and returns metrics.
+
+ The calling script will be responsible for providing a method to compute metrics, as they are task-dependent
+ (pass it to the init `compute_metrics` argument).
+
+ You can also subclass and override this method to inject custom behavior.
Args:
eval_dataset (`Dataset`, *optional*):
- Pass a dataset if you wish to override `self.eval_dataset`. If it is a [`~datasets.Dataset`], columns
+ Pass a dataset if you wish to override `self.eval_dataset`. If it is an [`~datasets.Dataset`], columns
not accepted by the `model.forward()` method are automatically removed. It must implement the `__len__`
method.
ignore_keys (`List[str]`, *optional*):
@@ -73,7 +62,14 @@ def evaluate(
gathering predictions.
metric_key_prefix (`str`, *optional*, defaults to `"eval"`):
An optional prefix to be used as the metrics key prefix. For example the metrics "bleu" will be named
- "eval_bleu" if the prefix is "eval" (default)
+ "eval_bleu" if the prefix is `"eval"` (default)
+ max_length (`int`, *optional*):
+ The maximum target length to use when predicting with the generate method.
+ num_beams (`int`, *optional*):
+ Number of beams for beam search that will be used when predicting with the generate method. 1 means no
+ beam search.
+ gen_kwargs:
+ Additional `generate` specific kwargs.
Returns:
A dictionary containing the evaluation loss and the potential metrics computed from the predictions. The
@@ -81,26 +77,26 @@ def evaluate(
"""
gen_kwargs = gen_kwargs.copy()
- if gen_kwargs.get("max_length") is None and gen_kwargs.get("max_new_tokens") is None:
+
+ # Use legacy argument setting if a) the option is not explicitly passed; and b) the argument is set in the
+ # training args
+ if (
+ gen_kwargs.get("max_length") is None
+ and gen_kwargs.get("max_new_tokens") is None
+ and self.args.generation_max_length is not None
+ ):
gen_kwargs["max_length"] = self.args.generation_max_length
- gen_kwargs["num_beams"] = (
- gen_kwargs["num_beams"] if gen_kwargs.get("num_beams") is not None else self.args.generation_num_beams
- )
+ if gen_kwargs.get("num_beams") is None and self.args.generation_num_beams is not None:
+ gen_kwargs["num_beams"] = self.args.generation_num_beams
self._gen_kwargs = gen_kwargs
- return super().evaluate(
- eval_dataset,
- ignore_keys=ignore_keys,
- metric_key_prefix=metric_key_prefix,
- inference_with_ort=inference_with_ort,
- )
+ return super().evaluate(eval_dataset, ignore_keys=ignore_keys, metric_key_prefix=metric_key_prefix)
def predict(
self,
test_dataset: Dataset,
ignore_keys: Optional[List[str]] = None,
- metric_key_prefix: str = "eval",
- inference_with_ort: bool = False,
+ metric_key_prefix: str = "test",
**gen_kwargs,
) -> "PredictionOutput":
"""
@@ -111,20 +107,27 @@ def predict(
Args:
test_dataset (`Dataset`):
- Dataset to run the predictions on. If it is an `datasets.Dataset`, columns not accepted by the
+ Dataset to run the predictions on. If it is a [`~datasets.Dataset`], columns not accepted by the
`model.forward()` method are automatically removed. Has to implement the method `__len__`
ignore_keys (`List[str]`, *optional*):
A list of keys in the output of your model (if it is a dictionary) that should be ignored when
gathering predictions.
- metric_key_prefix (`str`, *optional*, defaults to `"test"`):
+ metric_key_prefix (`str`, *optional*, defaults to `"eval"`):
An optional prefix to be used as the metrics key prefix. For example the metrics "bleu" will be named
- "test_bleu" if the prefix is "test" (default)
+ "eval_bleu" if the prefix is `"eval"` (default)
+ max_length (`int`, *optional*):
+ The maximum target length to use when predicting with the generate method.
+ num_beams (`int`, *optional*):
+ Number of beams for beam search that will be used when predicting with the generate method. 1 means no
+ beam search.
+ gen_kwargs:
+ Additional `generate` specific kwargs.
- If your predictions or labels have different sequence length (for instance because you're doing dynamic padding
- in a token classification task) the predictions will be padded (on the right) to allow for concatenation into
- one array. The padding index is -100.
+ If your predictions or labels have different sequence lengths (for instance because you're doing dynamic
+ padding in a token classification task) the predictions will be padded (on the right) to allow for
+ concatenation into one array. The padding index is -100.
@@ -137,486 +140,20 @@ def predict(
"""
gen_kwargs = gen_kwargs.copy()
- if gen_kwargs.get("max_length") is None and gen_kwargs.get("max_new_tokens") is None:
- gen_kwargs["max_length"] = self.args.generation_max_length
- gen_kwargs["num_beams"] = (
- gen_kwargs["num_beams"] if gen_kwargs.get("num_beams") is not None else self.args.generation_num_beams
- )
- self._gen_kwargs = gen_kwargs
-
- return super().predict(
- test_dataset,
- ignore_keys=ignore_keys,
- metric_key_prefix=metric_key_prefix,
- inference_with_ort=inference_with_ort,
- )
-
- def evaluation_loop_ort(
- self,
- dataloader: DataLoader,
- description: str,
- prediction_loss_only: Optional[bool] = None,
- ignore_keys: Optional[List[str]] = None,
- metric_key_prefix: str = "eval",
- ) -> EvalLoopOutput:
- """
- Prediction/evaluation loop, shared by `ORTTrainer.evaluate()` and `ORTTrainer.predict()`.
-
- Works both with or without labels.
- """
- logger.info("[INFO] ONNX Runtime inference starts...")
- self.ort_model = None
-
- # Check if there are labels in the dataset
- dummy_inputs = next(iter(dataloader))
- has_labels = all(dummy_inputs.get(k) is not None for k in self.label_names)
-
- # Export ONNX models
- if self.onnx_model_path and (has_labels == self.exported_with_loss):
- logger.info("[INFO] Inference with given ONNX model")
- self.onnx_model_path = Path(self.onnx_model_path).as_posix()
- else:
- onnx_model_path = Path(self.args.output_dir)
- logger.info("[INFO] Exporting the model to ONNX...")
- if self.args.deepspeed and self.args.fp16:
- export_device = "cuda"
- else:
- export_device = "cpu"
-
- with_loss = has_labels and not self.label_smoother
- # Only need to export decoders if the models have been exported before.
- decoders_only = True if self.onnx_model_path else False
- self._export(onnx_model_path, with_loss=with_loss, device=export_device, decoders_only=decoders_only)
-
- self.exported_with_loss = with_loss
- self.onnx_model_path = onnx_model_path.as_posix()
- logger.info("[INFO] ONNX model is stored in:\n", self.onnx_model_path)
-
- args = self.args
- # Load ORT model
- self.ort_model = ORTModelForSeq2SeqLM.from_pretrained(model_id=self.onnx_model_path).to(args.device)
-
- prediction_loss_only = prediction_loss_only if prediction_loss_only is not None else args.prediction_loss_only
-
- batch_size = dataloader.batch_size
-
- logger.info(f"***** Running {description} *****")
- if has_length(dataloader):
- logger.info(f" Num examples = {self.num_examples(dataloader)}")
- else:
- logger.info(" Num examples: Unknown")
- logger.info(f" Batch size = {batch_size}")
-
- self.callback_handler.eval_dataloader = dataloader
- # Do this before wrapping.
- eval_dataset = getattr(dataloader, "dataset", None)
-
- if args.past_index >= 0:
- self._past = None
-
- # Initialize containers
- # losses/preds/labels on GPU/TPU (accumulated for eval_accumulation_steps)
- losses_host = None
- preds_host = None
- labels_host = None
- inputs_host = None
-
- # losses/preds/labels on CPU (final containers)
- all_losses = None
- all_preds = None
- all_labels = None
- all_inputs = None
- # Will be useful when we have an iterable dataset so don't know its length.
-
- observed_num_examples = 0
- # Main evaluation loop
- for step, inputs in enumerate(dataloader):
- # Update the observed num examples
- observed_batch_size = find_batch_size(inputs)
- if observed_batch_size is not None:
- observed_num_examples += observed_batch_size
- # For batch samplers, batch_size is not known by the dataloader in advance.
- if batch_size is None:
- batch_size = observed_batch_size
-
- # Prediction step(send also onnxruntime inference session)
- loss, logits, labels = self.prediction_step_ort(
- self.ort_model, inputs, prediction_loss_only, ignore_keys=ignore_keys
- )
- inputs_decode = inputs["input_ids"] if args.include_inputs_for_metrics else None
-
- # Update containers on host
- if loss is not None:
- losses = self.accelerator.gather_for_metrics((loss.repeat(batch_size)))
- losses_host = losses if losses_host is None else nested_concat(losses_host, losses, padding_index=-100)
- if labels is not None:
- labels = self.accelerator.pad_across_processes(labels, dim=1, pad_index=-100)
- if inputs_decode is not None:
- inputs_decode = self.accelerator.pad_across_processes(inputs_decode, dim=1, pad_index=-100)
- inputs_decode = self.accelerator.gather_for_metrics((inputs_decode))
- inputs_host = (
- inputs_decode
- if inputs_host is None
- else nested_concat(inputs_host, inputs_decode, padding_index=-100)
- )
- if logits is not None:
- logits = self.accelerator.pad_across_processes(logits, dim=1, pad_index=-100)
- if self.preprocess_logits_for_metrics is not None:
- logits = self.preprocess_logits_for_metrics(logits, labels)
- logits = self.accelerator.gather_for_metrics((logits))
- preds_host = logits if preds_host is None else nested_concat(preds_host, logits, padding_index=-100)
-
- if labels is not None:
- labels = self.accelerator.gather_for_metrics((labels))
- labels_host = labels if labels_host is None else nested_concat(labels_host, labels, padding_index=-100)
-
- self.control = self.callback_handler.on_prediction_step(args, self.state, self.control)
-
- # Gather all tensors and put them back on the CPU if we have done enough accumulation steps.
- if args.eval_accumulation_steps is not None and self.accelerator.sync_gradients:
- if losses_host is not None:
- losses = nested_numpify(losses_host)
- all_losses = losses if all_losses is None else np.concatenate((all_losses, losses), axis=0)
- if preds_host is not None:
- logits = nested_numpify(preds_host)
- all_preds = logits if all_preds is None else nested_concat(all_preds, logits, padding_index=-100)
- if inputs_host is not None:
- inputs_decode = nested_numpify(inputs_host)
- all_inputs = (
- inputs_decode
- if all_inputs is None
- else nested_concat(all_inputs, inputs_decode, padding_index=-100)
- )
- if labels_host is not None:
- labels = nested_numpify(labels_host)
- all_labels = (
- labels if all_labels is None else nested_concat(all_labels, labels, padding_index=-100)
- )
-
- # Set back to None to begin a new accumulation
- losses_host, preds_host, inputs_host, labels_host = None, None, None, None
-
- if args.past_index and hasattr(self, "_past"):
- # Clean the state at the end of the evaluation loop
- delattr(self, "_past")
-
- # Gather all remaining tensors and put them back on the CPU
- if losses_host is not None:
- losses = nested_numpify(losses_host)
- all_losses = losses if all_losses is None else np.concatenate((all_losses, losses), axis=0)
- if preds_host is not None:
- logits = nested_numpify(preds_host)
- all_preds = logits if all_preds is None else nested_concat(all_preds, logits, padding_index=-100)
- if inputs_host is not None:
- inputs_decode = nested_numpify(inputs_host)
- all_inputs = (
- inputs_decode if all_inputs is None else nested_concat(all_inputs, inputs_decode, padding_index=-100)
- )
- if labels_host is not None:
- labels = nested_numpify(labels_host)
- all_labels = labels if all_labels is None else nested_concat(all_labels, labels, padding_index=-100)
-
- # Number of samples
- if has_length(eval_dataset):
- num_samples = len(eval_dataset)
- # The instance check is weird and does not actually check for the type, but whether the dataset has the right
- # methods. Therefore we need to make sure it also has the attribute.
- elif isinstance(eval_dataset, IterableDatasetShard) and getattr(eval_dataset, "num_examples", 0) > 0:
- num_samples = eval_dataset.num_examples
- else:
- if has_length(dataloader):
- num_samples = self.num_examples(dataloader)
- else: # both len(dataloader.dataset) and len(dataloader) fail
- num_samples = observed_num_examples
- if num_samples == 0 and observed_num_examples > 0:
- num_samples = observed_num_examples
-
- # Metrics!
- if self.compute_metrics is not None and all_preds is not None and all_labels is not None:
- if args.include_inputs_for_metrics:
- metrics = self.compute_metrics(
- EvalPrediction(predictions=all_preds, label_ids=all_labels, inputs=all_inputs)
- )
- else:
- metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels))
- else:
- metrics = {}
-
- # To be JSON-serializable, we need to remove numpy types or zero-d tensors
- metrics = denumpify_detensorize(metrics)
-
- if all_losses is not None:
- metrics[f"{metric_key_prefix}_loss"] = all_losses.mean().item()
-
- # Prefix all keys with metric_key_prefix + '_'
- for key in list(metrics.keys()):
- if not key.startswith(f"{metric_key_prefix}_"):
- metrics[f"{metric_key_prefix}_{key}"] = metrics.pop(key)
-
- return EvalLoopOutput(predictions=all_preds, label_ids=all_labels, metrics=metrics, num_samples=num_samples)
-
- def prediction_loop_ort(
- self,
- dataloader: DataLoader,
- description: str,
- prediction_loss_only: Optional[bool] = None,
- ignore_keys: Optional[List[str]] = None,
- metric_key_prefix: str = "eval",
- ) -> PredictionOutput:
- """
- Prediction/evaluation loop, shared by `ORTTrainer.evaluate()` and `ORTTrainer.predict()`.
-
- Works both with or without labels.
- """
- logger.info("[INFO] ONNX Runtime inference starts...")
- self.ort_model = None
-
- # Check if there are labels in the dataset
- dummy_inputs = next(iter(dataloader))
- has_labels = all(dummy_inputs.get(k) is not None for k in self.label_names)
-
- # Export ONNX models
- if self.onnx_model_path and (has_labels == self.exported_with_loss):
- logger.info("[INFO] Inference with given ONNX model")
- self.onnx_model_path = Path(self.onnx_model_path).as_posix()
- else:
- onnx_model_path = Path(self.args.output_dir)
- logger.info("[INFO] Exporting the model to ONNX...")
- if self.args.deepspeed and self.args.fp16:
- export_device = "cuda"
- else:
- export_device = "cpu"
-
- with_loss = has_labels and not self.label_smoother
- # Only need to export decoders if the models have been exported before.
- decoders_only = True if self.onnx_model_path else False
- self._export(onnx_model_path, with_loss=with_loss, device=export_device, decoders_only=decoders_only)
-
- self.exported_with_loss = with_loss
- self.onnx_model_path = onnx_model_path.as_posix()
- logger.info("[INFO] ONNX model is stored in:\n", self.onnx_model_path)
-
- args = self.args
- # Load ORT model
- self.ort_model = ORTModelForSeq2SeqLM.from_pretrained(
- model_id=self.onnx_model_path, provider="CUDAExecutionProvider"
- )
-
- if not has_length(dataloader):
- raise ValueError("dataloader must implement a working __len__")
-
- prediction_loss_only = prediction_loss_only if prediction_loss_only is not None else args.prediction_loss_only
-
- batch_size = dataloader.batch_size
- num_examples = self.num_examples(dataloader)
- logger.info(f"***** Running {description} *****")
- logger.info(f" Num examples = {num_examples}")
- logger.info(f" Batch size = {batch_size}")
- losses_host: torch.Tensor = None
- preds_host: Union[torch.Tensor, List[torch.Tensor]] = None
- labels_host: Union[torch.Tensor, List[torch.Tensor]] = None
- inputs_host: Union[torch.Tensor, List[torch.Tensor]] = None
-
- world_size = max(1, args.world_size)
-
- eval_losses_gatherer = DistributedTensorGatherer(world_size, num_examples, make_multiple_of=batch_size)
- if not prediction_loss_only:
- # The actual number of eval_sample can be greater than num_examples in distributed settings (when we pass
- # a batch size to the sampler)
- make_multiple_of = None
- if hasattr(dataloader, "sampler") and isinstance(dataloader.sampler, SequentialDistributedSampler):
- make_multiple_of = dataloader.sampler.batch_size
- preds_gatherer = DistributedTensorGatherer(world_size, num_examples, make_multiple_of=make_multiple_of)
- labels_gatherer = DistributedTensorGatherer(world_size, num_examples, make_multiple_of=make_multiple_of)
- inputs_gatherer = DistributedTensorGatherer(world_size, num_examples, make_multiple_of=make_multiple_of)
-
- if args.past_index >= 0:
- self._past = None
-
- self.callback_handler.eval_dataloader = dataloader
-
- for step, inputs in enumerate(dataloader):
- loss, logits, labels = self.prediction_step_ort(
- self.ort_model, inputs, prediction_loss_only, ignore_keys=ignore_keys
- )
- inputs_decode = inputs["input_ids"] if args.include_inputs_for_metrics else None
-
- if loss is not None:
- losses = loss.repeat(batch_size)
- losses_host = losses if losses_host is None else torch.cat((losses_host, losses), dim=0)
- if logits is not None:
- preds_host = logits if preds_host is None else nested_concat(preds_host, logits, padding_index=-100)
- if labels is not None:
- labels_host = labels if labels_host is None else nested_concat(labels_host, labels, padding_index=-100)
- if inputs_decode is not None:
- inputs_host = (
- inputs_decode
- if inputs_host is None
- else nested_concat(inputs_host, inputs_decode, padding_index=-100)
- )
- self.control = self.callback_handler.on_prediction_step(args, self.state, self.control)
-
- # Gather all tensors and put them back on the CPU if we have done enough accumulation steps.
- if args.eval_accumulation_steps is not None and (step + 1) % args.eval_accumulation_steps == 0:
- eval_losses_gatherer.add_arrays(self._gather_and_numpify(losses_host, "eval_losses"))
- if not prediction_loss_only:
- preds_gatherer.add_arrays(self._gather_and_numpify(preds_host, "eval_preds"))
- labels_gatherer.add_arrays(self._gather_and_numpify(labels_host, "eval_label_ids"))
- inputs_gatherer.add_arrays(self._gather_and_numpify(inputs_host, "eval_inputs_ids"))
-
- # Set back to None to begin a new accumulation
- losses_host, preds_host, labels_host, inputs_host = None, None, None, None
-
- if args.past_index and hasattr(self, "_past"):
- # Clean the state at the end of the evaluation loop
- delattr(self, "_past")
-
- # Gather all remaining tensors and put them back on the CPU
- eval_losses_gatherer.add_arrays(self._gather_and_numpify(losses_host, "eval_losses"))
- if not prediction_loss_only:
- preds_gatherer.add_arrays(self._gather_and_numpify(preds_host, "eval_preds"))
- labels_gatherer.add_arrays(self._gather_and_numpify(labels_host, "eval_label_ids"))
- inputs_gatherer.add_arrays(self._gather_and_numpify(inputs_host, "eval_inputs_ids"))
-
- eval_loss = eval_losses_gatherer.finalize()
- preds = preds_gatherer.finalize() if not prediction_loss_only else None
- label_ids = labels_gatherer.finalize() if not prediction_loss_only else None
- inputs_ids = inputs_gatherer.finalize() if not prediction_loss_only else None
-
- if self.compute_metrics is not None and preds is not None and label_ids is not None:
- if args.include_inputs_for_metrics:
- metrics = self.compute_metrics(
- EvalPrediction(predictions=preds, label_ids=label_ids, inputs=inputs_ids)
- )
- else:
- metrics = self.compute_metrics(EvalPrediction(predictions=preds, label_ids=label_ids))
- else:
- metrics = {}
-
- # To be JSON-serializable, we need to remove numpy types or zero-d tensors
- metrics = denumpify_detensorize(metrics)
-
- if eval_loss is not None:
- metrics[f"{metric_key_prefix}_loss"] = eval_loss.mean().item()
-
- # Prefix all keys with metric_key_prefix + '_'
- for key in list(metrics.keys()):
- if not key.startswith(f"{metric_key_prefix}_"):
- metrics[f"{metric_key_prefix}_{key}"] = metrics.pop(key)
-
- return PredictionOutput(predictions=preds, label_ids=label_ids, metrics=metrics)
-
- def prediction_step_ort(
- self,
- model: ORTModel,
- inputs: Dict[str, Union[torch.Tensor, Any]],
- prediction_loss_only: bool,
- ignore_keys: Optional[List[str]] = None,
- **gen_kwargs,
- ) -> Tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]:
- """
- Perform an evaluation step on `model` using `inputs`.
-
- Subclass and override to inject custom behavior.
-
- Args:
- model (`ORTModel`):
- The model to evaluate.
- inputs (`Dict[str, Union[torch.Tensor, Any]]`):
- The inputs and targets of the model.
- The dictionary will be unpacked before being fed to the model. Most models expect the targets under the
- argument `labels`. Check your model's documentation for all accepted arguments.
- prediction_loss_only (`bool`):
- Whether or not to return the loss only.
- ignore_keys (`Lst[str]`, *optional*):
- A list of keys in the output of your model (if it is a dictionary) that should be ignored when
- gathering predictions.
- gen_kwargs:
- Additional `generate` specific kwargs.
-
- Return:
- Tuple[Optional[float], Optional[torch.Tensor], Optional[torch.Tensor]]: A tuple with the loss=None, generated
- tokens and labels (each being optional).
- """
- if not self.args.predict_with_generate or prediction_loss_only:
- return super().prediction_step_ort(
- model, inputs, prediction_loss_only=prediction_loss_only, ignore_keys=ignore_keys
- )
-
- has_labels = "labels" in inputs
- inputs = self._prepare_inputs(inputs)
-
- # Priority (handled in generate):
- # gen_kwargs > model.generation_config > default GenerationConfig()
-
- if len(gen_kwargs) == 0 and hasattr(self, "_gen_kwargs"):
- gen_kwargs = self._gen_kwargs.copy()
-
- if gen_kwargs.get("max_length") is None and gen_kwargs.get("max_new_tokens") is None:
- gen_kwargs["max_length"] = self.model.config.max_length
- gen_kwargs["num_beams"] = (
- gen_kwargs["num_beams"] if gen_kwargs.get("num_beams") is not None else self.model.config.num_beams
- )
- default_synced_gpus = True if is_deepspeed_zero3_enabled() else False
- gen_kwargs["synced_gpus"] = (
- gen_kwargs["synced_gpus"] if gen_kwargs.get("synced_gpus") is not None else default_synced_gpus
- )
-
- # If the `decoder_input_ids` was created from `labels`, evict the former, so that the model can freely generate
- # (otherwise, it would continue generating from the padded `decoder_input_ids`)
+ # Use legacy argument setting if a) the option is not explicitly passed; and b) the argument is set in the
+ # training args
if (
- "labels" in inputs
- and "decoder_input_ids" in inputs
- and inputs["labels"].shape == inputs["decoder_input_ids"].shape
+ gen_kwargs.get("max_length") is None
+ and gen_kwargs.get("max_new_tokens") is None
+ and self.args.generation_max_length is not None
):
- inputs = {k: v for k, v in inputs.items() if k != "decoder_input_ids"}
- generated_tokens = self.model.generate(**inputs, **gen_kwargs)
-
- # Temporary hack to ensure the generation config is not initialized for each iteration of the evaluation loop
- # TODO: remove this hack when the legacy code that initializes generation_config from a model config is
- # removed in https://github.com/huggingface/transformers/blob/98d88b23f54e5a23e741833f1e973fdf600cc2c5/src/transformers/generation/utils.py#L1183
- if self.model.generation_config._from_model_config:
- self.model.generation_config._from_model_config = False
- # Retrieves GenerationConfig from model.generation_config
- gen_config = self.model.generation_config
- # in case the batch is shorter than max length, the output should be padded
- if generated_tokens.shape[-1] < gen_config.max_length:
- generated_tokens = self._pad_tensors_to_max_len(generated_tokens, gen_config.max_length)
- elif gen_config.max_new_tokens is not None and generated_tokens.shape[-1] < gen_config.max_new_tokens + 1:
- generated_tokens = self._pad_tensors_to_max_len(generated_tokens, gen_config.max_new_tokens + 1)
-
- with torch.no_grad():
- with self.compute_loss_context_manager():
- if self.label_smoother is not None:
- onnx_inputs = {k: v for k, v in inputs.items() if k != "labels"}
- outputs = model(**onnx_inputs)
- else:
- outputs = model(**inputs)
- if has_labels:
- if self.label_smoother is not None:
- labels = inputs["labels"]
- # With label smoother, loss will be calculated out of box
- # So the outputs of InferenceSession need to be converted to tensor and sent to the same device
- loss = self.label_smoother(outputs, labels.to(outputs.logits.device)).mean().detach()
- else:
- loss = (outputs["loss"] if isinstance(outputs, dict) else outputs[0]).mean().detach()
- else:
- loss = None
-
- if self.args.prediction_loss_only:
- return loss, None, None
-
- if has_labels:
- labels = inputs["labels"]
- if labels.shape[-1] < gen_config.max_length:
- labels = self._pad_tensors_to_max_len(labels, gen_config.max_length)
- elif gen_config.max_new_tokens is not None and labels.shape[-1] < gen_config.max_new_tokens + 1:
- labels = self._pad_tensors_to_max_len(labels, gen_config.max_new_tokens + 1)
- else:
- labels = None
+ gen_kwargs["max_length"] = self.args.generation_max_length
+ if gen_kwargs.get("num_beams") is None and self.args.generation_num_beams is not None:
+ gen_kwargs["num_beams"] = self.args.generation_num_beams
+ self._gen_kwargs = gen_kwargs
- return loss, generated_tokens, labels
+ return super().predict(test_dataset, ignore_keys=ignore_keys, metric_key_prefix=metric_key_prefix)
def prediction_step(
self,
@@ -657,31 +194,31 @@ def prediction_step(
has_labels = "labels" in inputs
inputs = self._prepare_inputs(inputs)
+ # XXX: adapt synced_gpus for fairscale as well
# Priority (handled in generate):
- # gen_kwargs > model.generation_config > default GenerationConfig()
-
+ # non-`None` gen_kwargs > model.generation_config > default GenerationConfig()
if len(gen_kwargs) == 0 and hasattr(self, "_gen_kwargs"):
gen_kwargs = self._gen_kwargs.copy()
+ if "num_beams" in gen_kwargs and gen_kwargs["num_beams"] is None:
+ gen_kwargs.pop("num_beams")
+ if "max_length" in gen_kwargs and gen_kwargs["max_length"] is None:
+ gen_kwargs.pop("max_length")
- if gen_kwargs.get("max_length") is None and gen_kwargs.get("max_new_tokens") is None:
- gen_kwargs["max_length"] = self.model.config.max_length
- gen_kwargs["num_beams"] = (
- gen_kwargs["num_beams"] if gen_kwargs.get("num_beams") is not None else self.model.config.num_beams
- )
default_synced_gpus = True if is_deepspeed_zero3_enabled() else False
gen_kwargs["synced_gpus"] = (
gen_kwargs["synced_gpus"] if gen_kwargs.get("synced_gpus") is not None else default_synced_gpus
)
+ generation_inputs = inputs.copy()
# If the `decoder_input_ids` was created from `labels`, evict the former, so that the model can freely generate
# (otherwise, it would continue generating from the padded `decoder_input_ids`)
if (
- "labels" in inputs
- and "decoder_input_ids" in inputs
- and inputs["labels"].shape == inputs["decoder_input_ids"].shape
+ "labels" in generation_inputs
+ and "decoder_input_ids" in generation_inputs
+ and generation_inputs["labels"].shape == generation_inputs["decoder_input_ids"].shape
):
- inputs = {k: v for k, v in inputs.items() if k != "decoder_input_ids"}
- generated_tokens = self.model.generate(**inputs, **gen_kwargs)
+ generation_inputs = {k: v for k, v in inputs.items() if k != "decoder_input_ids"}
+ generated_tokens = self.model.generate(**generation_inputs, **gen_kwargs)
# Temporary hack to ensure the generation config is not initialized for each iteration of the evaluation loop
# TODO: remove this hack when the legacy code that initializes generation_config from a model config is
@@ -739,85 +276,3 @@ def _pad_tensors_to_max_len(self, tensor, max_length):
)
padded_tensor[:, : tensor.shape[-1]] = tensor
return padded_tensor
-
- def _export(
- self,
- save_dir: Union[str, Path],
- model: Optional[PreTrainedModel] = None,
- opset: Optional[int] = None,
- device: str = "cpu",
- with_loss: bool = True,
- decoders_only: bool = False,
- **kwargs,
- ) -> None:
- """
- Load and export a sequence-to-sequence model to ONNX models(encoder and decoder(s)).
-
- Args:
- save_dir (`str` or `Path`):
- The directory where the ONNX models(encoder, decoder...) should be saved, default to
- `transformers.file_utils.default_cache_path`, which is the cache dir for transformers.
- device (`str`, *optional*, defaults to `cpu`):
- The device on which the ONNX model will be exported. Either `cpu` or `cuda`.
- with_loss (`bool`, defaults to `True`):
- Whether to export ONNX model with the loss in outputs.
- decoders_only (`bool`, defaults to `False`):
- Whether to just export decoder models.
- """
- if model is None:
- if not (self.args.fp16 and self.args.deepspeed):
- # Taking CPU to export the model
- self.model.to("cpu")
- model = unwrap_model(self.model)
-
- onnx_config_constructor = TasksManager.get_exporter_config_constructor(
- model=model, exporter="onnx", task=self.feature
- )
- onnx_config = onnx_config_constructor(model.config)
-
- opset = onnx_config.DEFAULT_ONNX_OPSET if opset is None else opset
-
- encoder = model.get_encoder()
-
- onnx_config_encoder = onnx_config.with_behavior("encoder")
- onnx_config_decoder = onnx_config.with_behavior("decoder", use_past=False)
- onnx_config_decoder_with_past = onnx_config.with_behavior("decoder", use_past=True)
-
- if with_loss:
- # Add `loss` to the ONNX config of decoders
- onnx_config_decoder = wrap_onnx_config_for_loss(onnx_config_decoder)
- onnx_config_decoder_with_past = wrap_onnx_config_for_loss(onnx_config_decoder_with_past)
- opset = max(opset, 12) # Operators like `nll_loss`are added for opset>=12
-
- # Export the encoder
- if not decoders_only:
- _ = export(
- model=encoder,
- config=onnx_config_encoder,
- opset=opset,
- output=Path(save_dir).joinpath(ONNX_ENCODER_NAME),
- device=device,
- )
- # Export the decoder without the past key values
- export(
- model=model,
- config=onnx_config_decoder,
- opset=opset,
- output=Path(save_dir).joinpath(ONNX_DECODER_NAME),
- device=device,
- )
-
- # Export the decoder with the past key values
- use_cache = kwargs.get("use_cache", True)
- if use_cache:
- export(
- model=model,
- config=onnx_config_decoder_with_past,
- opset=opset,
- output=Path(save_dir).joinpath(ONNX_DECODER_WITH_PAST_NAME),
- device=device,
- )
-
- # TODO: Need to use merged decoder to reduce the use of GPU memory
-
- model.config.save_pretrained(save_dir)
diff --git a/optimum/onnxruntime/training_args.py b/optimum/onnxruntime/training_args.py
index 88e5fc9bc2e..a0cb7c8e983 100644
--- a/optimum/onnxruntime/training_args.py
+++ b/optimum/onnxruntime/training_args.py
@@ -34,6 +34,7 @@
from transformers.utils import (
ExplicitEnum,
get_full_repo_name,
+ is_accelerate_available,
is_safetensors_available,
is_torch_available,
is_torch_bf16_cpu_available,
@@ -41,6 +42,7 @@
is_torch_tf32_available,
logging,
)
+from transformers.utils.generic import strtobool
if is_torch_available():
@@ -137,8 +139,9 @@ def __post_init__(self):
if self.load_best_model_at_end:
if self.evaluation_strategy != self.save_strategy:
raise ValueError(
- "--load_best_model_at_end requires the save and eval strategy to match, but found\n- Evaluation "
- f"strategy: {self.evaluation_strategy}\n- Save strategy: {self.save_strategy}"
+ "--load_best_model_at_end requires the saving steps to be a multiple of the evaluation "
+ "steps, which cannot get guaranteed when mixing ratio and absolute steps for save_steps "
+ f"{self.save_steps} and eval_steps {self.eval_steps}."
)
if self.evaluation_strategy == IntervalStrategy.STEPS and self.save_steps % self.eval_steps != 0:
if self.eval_steps < 1 or self.save_steps < 1:
@@ -189,14 +192,15 @@ def __post_init__(self):
self.half_precision_backend = self.fp16_backend
if self.bf16 or self.bf16_full_eval:
- if self.no_cuda and not is_torch_bf16_cpu_available():
+ if self.use_cpu and not is_torch_bf16_cpu_available():
# cpu
raise ValueError("Your setup doesn't support bf16/(cpu, tpu, neuroncore). You need torch>=1.10")
- elif not self.no_cuda and torch.cuda.is_available() and not is_torch_bf16_gpu_available():
- # gpu
- raise ValueError(
- "Your setup doesn't support bf16/gpu. You need torch>=1.10, using Ampere GPU with cuda>=11.0"
- )
+ elif not self.use_cpu:
+ if torch.cuda.is_available() and not is_torch_bf16_gpu_available():
+ # gpu
+ raise ValueError(
+ "Your setup doesn't support bf16/gpu. You need torch>=1.10, using Ampere GPU with cuda>=11.0"
+ )
if self.fp16 and self.bf16:
raise ValueError("At most one of fp16 and bf16 can be True, but not both")
@@ -286,6 +290,7 @@ def __post_init__(self):
" otherwise."
)
torch.backends.cuda.matmul.allow_tf32 = True
+ torch.backends.cudnn.allow_tf32 = True
else:
logger.warning(
"The speedups for torchdynamo mostly come wih GPU Ampere or higher and which is not detected here."
@@ -294,13 +299,24 @@ def __post_init__(self):
if self.tf32:
if is_torch_tf32_available():
torch.backends.cuda.matmul.allow_tf32 = True
+ torch.backends.cudnn.allow_tf32 = True
else:
raise ValueError("--tf32 requires Ampere or a newer GPU arch, cuda>=11 and torch>=1.7")
else:
if is_torch_tf32_available():
torch.backends.cuda.matmul.allow_tf32 = False
+ torch.backends.cudnn.allow_tf32 = False
# no need to assert on else
+ # if training args is specified, it will override the one specified in the accelerate config
+ if self.half_precision_backend != "apex":
+ mixed_precision_dtype = os.environ.get("ACCELERATE_MIXED_PRECISION", "no")
+ if self.fp16:
+ mixed_precision_dtype = "fp16"
+ elif self.bf16:
+ mixed_precision_dtype = "bf16"
+ os.environ["ACCELERATE_MIXED_PRECISION"] = mixed_precision_dtype
+
if self.report_to is None:
logger.info(
"The default value for the training argument `--report_to` will change in v5 (from all installed "
@@ -342,44 +358,44 @@ def __post_init__(self):
self.fsdp_config = {}
if isinstance(self.fsdp_config, str):
+ if len(self.fsdp) == 0:
+ warnings.warn("`--fsdp_config` is useful only when `--fsdp` is specified.")
with io.open(self.fsdp_config, "r", encoding="utf-8") as f:
self.fsdp_config = json.load(f)
+ for k in list(self.fsdp_config.keys()):
+ if k.startswith("fsdp_"):
+ v = self.fsdp_config.pop(k)
+ self.fsdp_config[k[5:]] = v
if self.fsdp_min_num_params > 0:
warnings.warn("using `--fsdp_min_num_params` is deprecated. Use fsdp_config instead ", FutureWarning)
- self.fsdp_config["fsdp_min_num_params"] = max(
- self.fsdp_config.get("fsdp_min_num_params", 0), self.fsdp_min_num_params
- )
+ self.fsdp_config["min_num_params"] = max(self.fsdp_config.get("min_num_params", 0), self.fsdp_min_num_params)
- # if fsdp_config["fsdp_transformer_layer_cls_to_wrap"] is specified as a string, convert it to a list with a single object
- if isinstance(self.fsdp_config.get("fsdp_transformer_layer_cls_to_wrap", None), str):
- self.fsdp_config["fsdp_transformer_layer_cls_to_wrap"] = [
- self.fsdp_config["fsdp_transformer_layer_cls_to_wrap"]
- ]
+ # if fsdp_config["transformer_layer_cls_to_wrap"] is specified as a string, convert it to a list with a single object
+ if isinstance(self.fsdp_config.get("transformer_layer_cls_to_wrap", None), str):
+ self.fsdp_config["transformer_layer_cls_to_wrap"] = [self.fsdp_config["transformer_layer_cls_to_wrap"]]
if self.fsdp_transformer_layer_cls_to_wrap is not None:
warnings.warn(
"using `--fsdp_transformer_layer_cls_to_wrap` is deprecated. Use fsdp_config instead ", FutureWarning
)
- self.fsdp_config["fsdp_transformer_layer_cls_to_wrap"] = self.fsdp_config.get(
- "fsdp_transformer_layer_cls_to_wrap", []
+ self.fsdp_config["transformer_layer_cls_to_wrap"] = self.fsdp_config.get(
+ "transformer_layer_cls_to_wrap", []
) + [self.fsdp_transformer_layer_cls_to_wrap]
- if len(self.fsdp) == 0 and self.fsdp_config["fsdp_min_num_params"] > 0:
- warnings.warn("`--fsdp_min_num_params` is useful only when `--fsdp` is specified.")
+ if len(self.fsdp) == 0 and self.fsdp_config["min_num_params"] > 0:
+ warnings.warn("`min_num_params` is useful only when `--fsdp` is specified.")
- if len(self.fsdp) == 0 and self.fsdp_config.get("fsdp_transformer_layer_cls_to_wrap", None) is not None:
- warnings.warn("`--fsdp_transformer_layer_cls_to_wrap` is useful only when `--fsdp` is specified.")
+ if len(self.fsdp) == 0 and self.fsdp_config.get("transformer_layer_cls_to_wrap", None) is not None:
+ warnings.warn("`transformer_layer_cls_to_wrap` is useful only when `--fsdp` is specified.")
if (
len(self.fsdp) > 0
- and self.fsdp_config["fsdp_min_num_params"] > 0
- and self.fsdp_config.get("fsdp_transformer_layer_cls_to_wrap", None) is not None
+ and self.fsdp_config["min_num_params"] > 0
+ and self.fsdp_config.get("transformer_layer_cls_to_wrap", None) is not None
):
- raise ValueError(
- "`--fsdp_min_num_params` and `--fsdp_transformer_layer_cls_to_wrap` are mutually exclusive."
- )
+ raise ValueError("`min_num_params` and `transformer_layer_cls_to_wrap` are mutually exclusive.")
self.fsdp_config["xla"] = self.fsdp_config.get("xla", False)
self.fsdp_config["xla_fsdp_grad_ckpt"] = self.fsdp_config.get("xla_fsdp_grad_ckpt", False)
if self.fsdp_config["xla"]:
@@ -405,23 +421,29 @@ def __post_init__(self):
FSDP_SHARDING_STRATEGY,
)
+ prefix = "FSDP_"
for fsdp_option in self.fsdp:
if fsdp_option.upper() in FSDP_SHARDING_STRATEGY:
# set environment variable for FSDP sharding strategy
- os.environ["FSDP_SHARDING_STRATEGY"] = str(FSDP_SHARDING_STRATEGY.index(fsdp_option.upper()) + 1)
+ os.environ[f"{prefix}SHARDING_STRATEGY"] = str(
+ FSDP_SHARDING_STRATEGY.index(fsdp_option.upper()) + 1
+ )
elif fsdp_option == FSDPOption.OFFLOAD:
- os.environ["FSDP_OFFLOAD_PARAMS"] = "true"
+ os.environ[f"{prefix}OFFLOAD_PARAMS"] = "true"
elif fsdp_option == FSDPOption.AUTO_WRAP:
- if self.fsdp_config["fsdp_min_num_params"] > 0:
- os.environ["FSDP_MIN_NUM_PARAMS"] = str(self.fsdp_config["fsdp_min_num_params"])
- os.environ["FSDP_AUTO_WRAP_POLICY"] = FSDP_AUTO_WRAP_POLICY[1]
- elif self.fsdp_config.get("fsdp_transformer_layer_cls_to_wrap", None) is not None:
- os.environ["FSDP_TRANSFORMER_CLS_TO_WRAP"] = ",".join(
- self.fsdp_config["fsdp_transformer_layer_cls_to_wrap"]
+ os.environ[f"{prefix}AUTO_WRAP_POLICY"] = FSDP_AUTO_WRAP_POLICY[0]
+ if self.fsdp_config["min_num_params"] > 0:
+ os.environ[f"{prefix}MIN_NUM_PARAMS"] = str(self.fsdp_config["min_num_params"])
+ os.environ[f"{prefix}AUTO_WRAP_POLICY"] = FSDP_AUTO_WRAP_POLICY[1]
+ elif self.fsdp_config.get("transformer_layer_cls_to_wrap", None) is not None:
+ os.environ[f"{prefix}TRANSFORMER_CLS_TO_WRAP"] = ",".join(
+ self.fsdp_config["transformer_layer_cls_to_wrap"]
)
- os.environ["FSDP_AUTO_WRAP_POLICY"] = FSDP_AUTO_WRAP_POLICY[0]
prefetch_policy = self.fsdp_config.get("fsdp_backward_prefetch", "NO_PREFETCH")
- os.environ["FSDP_BACKWARD_PREFETCH"] = prefetch_policy.upper()
+ os.environ[f"{prefix}BACKWARD_PREFETCH"] = prefetch_policy.upper()
+ os.environ[f"{prefix}FORWARD_PREFETCH"] = self.fsdp_config.get("forward_prefect", "false")
+ os.environ[f"{prefix}SYNC_MODULE_STATES"] = self.fsdp_config.get("sync_module_states", "true")
+ os.environ[f"{prefix}USE_ORIG_PARAMS"] = self.fsdp_config.get("use_orig_params", "false")
if self.tpu_metrics_debug:
warnings.warn(
@@ -444,7 +466,9 @@ def __post_init__(self):
if self.deepspeed:
# - must be run very last in arg parsing, since it will use a lot of these settings.
# - must be run before the model is created.
- from transformers.deepspeed import HfTrainerDeepSpeedConfig
+ if not is_accelerate_available():
+ raise ValueError("--deepspeed requires Accelerate to be installed: `pip install accelerate`.")
+ from transformers.integrations.deepspeed import HfTrainerDeepSpeedConfig
# will be used later by the Trainer
# note: leave self.deepspeed unmodified in case a user relies on it not to be modified)
@@ -456,6 +480,14 @@ def __post_init__(self):
os.environ["ACCELERATE_USE_DEEPSPEED"] = "true"
self.deepspeed_plugin = DeepSpeedPlugin(hf_ds_config=self.hf_deepspeed_config)
+ elif strtobool(os.environ.get("ACCELERATE_USE_DEEPSPEED", "false")):
+ # Accelerate DeepSpeed Plugin
+ from accelerate.utils import DeepSpeedPlugin
+
+ self.deepspeed_plugin = DeepSpeedPlugin()
+ mixed_precision = os.environ.get("ACCELERATE_MIXED_PRECISION", "no")
+ self.deepspeed_plugin.set_mixed_precision(mixed_precision)
+ self.deepspeed_plugin.set_deepspeed_weakref()
if self.push_to_hub_token is not None:
warnings.warn(
diff --git a/tests/onnxruntime/docker/Dockerfile_onnxruntime_trainer b/tests/onnxruntime/docker/Dockerfile_onnxruntime_trainer
index 62f7efc8178..7266ba224a8 100644
--- a/tests/onnxruntime/docker/Dockerfile_onnxruntime_trainer
+++ b/tests/onnxruntime/docker/Dockerfile_onnxruntime_trainer
@@ -34,7 +34,7 @@ ARG TORCHVISION_VERSION=0.15.1
# Install and update tools to minimize security vulnerabilities
RUN apt-get update
RUN apt-get install -y software-properties-common wget apt-utils patchelf git libprotobuf-dev protobuf-compiler cmake \
- bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev && \
+ bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev ffmpeg && \
apt-get clean
RUN unattended-upgrade
RUN apt-get autoremove -y
@@ -65,7 +65,7 @@ RUN $PYTHON_EXE -m pip install onnx ninja
RUN $PYTHON_EXE -m pip install torch==${TORCH_VERSION} torchvision==${TORCHVISION_VERSION} -f https://download.pytorch.org/whl/${TORCH_CUDA_VERSION}
# ORT Module
-RUN $PYTHON_EXE -m pip install onnxruntime-training==1.15.1 -f https://download.onnxruntime.ai/onnxruntime_stable_cu118.html
+RUN $PYTHON_EXE -m pip install onnxruntime-training==1.16.1 -f https://download.onnxruntime.ai/onnxruntime_stable_cu118.html
RUN $PYTHON_EXE -m pip install torch-ort
ENV TORCH_CUDA_ARCH_LIST="5.2 6.0 6.1 7.0 7.5 8.0 8.6+PTX"
RUN $PYTHON_EXE -m pip install --upgrade protobuf==3.20.2
@@ -76,4 +76,5 @@ COPY . /workspace/optimum
RUN pip install /workspace/optimum[tests]
ENV TEST_LEVEL=1
-CMD RUN_SLOW=1 pytest -v -rs onnxruntime/nightly_test_trainer.py --durations=0
\ No newline at end of file
+CMD RUN_SLOW=1 pytest -v -rs onnxruntime/training/nightly_test_trainer.py --durations=0
+CMD RUN_SLOW=1 pytest -v -rs onnxruntime/training/nightly_test_examples.py --durations=0
\ No newline at end of file
diff --git a/tests/onnxruntime/training/nightly_test_examples.py b/tests/onnxruntime/training/nightly_test_examples.py
new file mode 100644
index 00000000000..a16913a097f
--- /dev/null
+++ b/tests/onnxruntime/training/nightly_test_examples.py
@@ -0,0 +1,219 @@
+# coding=utf-8
+# Copyright 2023 the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Test ONNX Runtime Training Examples in Optimum."""
+
+import subprocess
+import unittest
+
+import pytest
+from transformers.testing_utils import slow
+
+
+@slow
+class ORTTrainerExampleTest(unittest.TestCase):
+ def test_text_classification(self):
+ subprocess.run(
+ "cp ../examples/onnxruntime/training/text-classification/run_glue.py ./",
+ shell=True,
+ )
+
+ subprocess.run(
+ "torchrun"
+ " --nproc_per_node=1"
+ " run_glue.py"
+ " --model_name_or_path distilbert-base-uncased"
+ " --task_name mnli"
+ " --max_seq_length 64"
+ " --learning_rate 3e-6"
+ " --do_train"
+ " --output_dir /tmp/distilbert"
+ " --overwrite_output_dir"
+ " --max_steps 50"
+ " --logging_steps 50"
+ " --per_device_train_batch_size 8"
+ " --fp16 --optim adamw_ort_fused"
+ " --max_train_samples 20",
+ shell=True,
+ check=True,
+ )
+
+ def test_token_classification(self):
+ subprocess.run(
+ "cp ../examples/onnxruntime/training/token-classification/run_ner.py ./",
+ shell=True,
+ )
+
+ subprocess.run(
+ "torchrun"
+ " --nproc_per_node=1"
+ " run_ner.py"
+ " --model_name_or_path bert-base-cased"
+ " --dataset_name conll2003"
+ " --do_train"
+ " --output_dir /tmp/bert"
+ " --overwrite_output_dir"
+ " --max_steps 50"
+ " --logging_steps 50"
+ " --per_device_train_batch_size 8"
+ " --fp16 --optim adamw_ort_fused"
+ " --max_train_samples 20",
+ shell=True,
+ check=True,
+ )
+
+ def test_translation(self):
+ subprocess.run(
+ "cp ../examples/onnxruntime/training/translation/run_translation.py ./",
+ shell=True,
+ )
+
+ subprocess.run(
+ "torchrun"
+ " --nproc_per_node=1"
+ " run_translation.py"
+ " --model_name_or_path t5-small"
+ " --dataset_name wmt16"
+ " --dataset_config ro-en"
+ " --label_smoothing 0.1"
+ " --predict_with_generate"
+ " --source_lang en"
+ " --target_lang ro"
+ " --do_train"
+ " --max_train_samples 30"
+ " --output_dir /tmp/t5"
+ " --overwrite_output_dir"
+ " --max_steps 50"
+ " --logging_steps 50"
+ " --per_device_train_batch_size 2"
+ " --fp16 --optim adamw_ort_fused",
+ shell=True,
+ check=True,
+ )
+
+ @pytest.mark.skip(reason="skip for now")
+ def test_summarization(self):
+ subprocess.run(
+ "cp ../examples/onnxruntime/training/summarization/run_summarization.py ./",
+ shell=True,
+ )
+
+ subprocess.run(
+ "torchrun"
+ " --nproc_per_node=1"
+ " run_summarization.py"
+ " --model_name_or_path t5-small"
+ " --do_train"
+ " --do_eval"
+ " --dataset_name cnn_dailymail"
+ ' --dataset_config "3.0.0"'
+ ' --source_prefix "summarize: "'
+ " --predict_with_generate"
+ " --max_train_samples 30"
+ " --output_dir /tmp/t5"
+ " --overwrite_output_dir"
+ " --max_steps 50"
+ " --logging_steps 50"
+ " --per_device_train_batch_size 2"
+ " --per_device_eval_batch_size 2"
+ " --fp16 --optim adamw_ort_fused",
+ shell=True,
+ check=True,
+ )
+
+ # TODO: Update the example and add the test
+ def test_stable_diffusion_txt2img(self):
+ pass
+
+ @pytest.mark.skip(reason="skip for now")
+ def test_question_answering(self):
+ subprocess.run(
+ "cp ../examples/onnxruntime/training/question-answering/run_qa.py ./",
+ shell=True,
+ )
+
+ subprocess.run(
+ "torchrun"
+ " --nproc_per_node=1"
+ " run_qa.py"
+ " --model_name_or_path bert-base-uncased"
+ " --do_train"
+ " --do_eval"
+ " --dataset_name squad"
+ " --max_train_samples 30"
+ " --output_dir /tmp/bert"
+ " --overwrite_output_dir"
+ " --max_steps 50"
+ " --logging_steps 50"
+ " --per_device_train_batch_size 2"
+ " --per_device_eval_batch_size 2"
+ " --fp16 --optim adamw_ort_fused",
+ shell=True,
+ check=True,
+ )
+
+ @pytest.mark.skip(reason="skip for now")
+ def test_language_modeling(self):
+ subprocess.run(
+ "cp ../examples/onnxruntime/training/question-answering/run_qa.py ./",
+ shell=True,
+ )
+
+ subprocess.run(
+ "torchrun"
+ " --nproc_per_node=1"
+ " run_clm.py"
+ " --model_name_or_path gpt2"
+ " --do_train"
+ " --do_eval"
+ " --dataset_name wikitext"
+ " --dataset_config_name wikitext-2-raw-v1"
+ " --max_train_samples 30"
+ " --output_dir /tmp/gpt2"
+ " --overwrite_output_dir"
+ " --max_steps 50"
+ " --logging_steps 50"
+ " --per_device_train_batch_size 2"
+ " --per_device_eval_batch_size 2"
+ " --fp16 --optim adamw_ort_fused",
+ shell=True,
+ check=True,
+ )
+
+ @pytest.mark.skip(reason="skip for now")
+ def test_image_classification(self):
+ subprocess.run(
+ "cp ../examples/onnxruntime/training/image-classification/run_image_classification.py ./",
+ shell=True,
+ )
+
+ subprocess.run(
+ "torchrun"
+ " --nproc_per_node=1"
+ " run_image_classification.py"
+ " --model_name_or_path google/vit-base-patch16-224-in21k"
+ " --do_train"
+ " --do_eval"
+ " --dataset_name beans"
+ " --max_train_samples 30"
+ " --output_dir /tmp/vit"
+ " --overwrite_output_dir"
+ " --max_steps 50"
+ " --logging_steps 50"
+ " --per_device_train_batch_size 2"
+ " --per_device_eval_batch_size 2"
+ " --fp16 --optim adamw_ort_fused",
+ shell=True,
+ check=True,
+ )
diff --git a/tests/onnxruntime/nightly_test_trainer.py b/tests/onnxruntime/training/nightly_test_trainer.py
similarity index 54%
rename from tests/onnxruntime/nightly_test_trainer.py
rename to tests/onnxruntime/training/nightly_test_trainer.py
index 2eb3ca433f7..e24ee306178 100644
--- a/tests/onnxruntime/nightly_test_trainer.py
+++ b/tests/onnxruntime/training/nightly_test_trainer.py
@@ -12,11 +12,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
+"""Test ONNX Runtime Training ORTTrainer in Optimum."""
import gc
+import os
import random
-import subprocess
-import sys
import tempfile
import unittest
from itertools import chain
@@ -25,7 +25,6 @@
import nltk
import numpy as np
-import pytest
from datasets import load_dataset
from evaluate import load
from transformers import (
@@ -35,12 +34,16 @@
AutoModelForTokenClassification,
AutoTokenizer,
DataCollatorForSeq2Seq,
- DataCollatorForTokenClassification,
DataCollatorWithPadding,
default_data_collator,
is_torch_available,
)
-from transformers.testing_utils import require_deepspeed, require_torch, slow
+from transformers.testing_utils import (
+ mockenv_context,
+ require_deepspeed,
+ require_torch,
+ slow,
+)
from transformers.training_args import OptimizerNames
@@ -75,11 +78,11 @@
"data_collator": default_data_collator,
"data_collator_class": DataCollatorWithPadding,
},
- "token-classification": {
- "dataset": ["conll2003"],
- "metric": ["seqeval"],
- "data_collator_class": DataCollatorForTokenClassification,
- },
+ # "token-classification": {
+ # "dataset": ["conll2003"],
+ # "metric": ["seqeval"],
+ # "data_collator_class": DataCollatorForTokenClassification,
+ # },
}
_DECODER_TASKS_DATASETS_CONFIGS = {
@@ -88,11 +91,6 @@
"metric": ["accuracy"],
"data_collator": default_data_collator,
},
- "text-generation-with-past": {
- "dataset": ["wikitext", "wikitext-2-raw-v1"],
- "metric": ["accuracy"],
- "data_collator": default_data_collator,
- },
}
_SEQ2SEQ_TASKS_DATASETS_CONFIGS = {
@@ -101,30 +99,37 @@
"metric": ["rouge"],
"data_collator_class": DataCollatorForSeq2Seq,
},
- "text2text-generation-with-past": {
- "dataset": ["xsum"],
- "metric": ["rouge"],
- "data_collator_class": DataCollatorForSeq2Seq,
- },
}
+# List supported ORT optimizers to test
+optim_test_params = []
+if is_torch_available():
+ default_adam_kwargs = {
+ "betas": (ORTTrainingArguments.adam_beta1, ORTTrainingArguments.adam_beta2),
+ "eps": ORTTrainingArguments.adam_epsilon,
+ "lr": ORTTrainingArguments.learning_rate,
+ }
-def _get_models_to_test(model_list, task_list, both_inf_backend=False, excluded: Optional[List[str]] = None):
+ optim_test_params = [
+ (
+ ORTOptimizerNames.ADAMW_ORT_FUSED,
+ onnxruntime.training.optim.FusedAdam,
+ default_adam_kwargs,
+ ),
+ ]
+
+# default torch.distributed port
+DEFAULT_MASTER_PORT = "10999"
+
+
+def _get_models_to_test(model_list, task_list, excluded: Optional[List[str]] = None):
models_to_test = []
for name, model_name in model_list:
- for feature, data_metric_config in task_list.items():
- if excluded and (name in excluded or feature in excluded):
+ for task, data_metric_config in task_list.items():
+ if excluded and (name in excluded or task in excluded):
continue
- if both_inf_backend:
- models_to_test.append(
- (f"{name}_{feature}", model_name, feature, data_metric_config, True)
- ) # inference_with_ort=True
- models_to_test.append(
- (f"{name}_{feature}", model_name, feature, data_metric_config, False)
- ) # inference_with_ort=False
- else:
- models_to_test.append((f"{name}_{feature}", model_name, feature, data_metric_config))
+ models_to_test.append((f"{name}_{task}", model_name, task, data_metric_config))
return sorted(models_to_test)
@@ -151,17 +156,39 @@ def _get_data_collator(data_metric_config, tokenizer=None, model=None, training_
return data_collator
-def get_ort_training_args(feature, **kwargs):
- if feature in _ENCODER_TASKS_DATASETS_CONFIGS or feature in _DECODER_TASKS_DATASETS_CONFIGS:
+def get_ort_training_args(task, **kwargs):
+ if task in _ENCODER_TASKS_DATASETS_CONFIGS or task in _DECODER_TASKS_DATASETS_CONFIGS:
training_args = ORTTrainingArguments(**kwargs)
- elif feature in _SEQ2SEQ_TASKS_DATASETS_CONFIGS:
+ elif task in _SEQ2SEQ_TASKS_DATASETS_CONFIGS:
training_args = ORTSeq2SeqTrainingArguments(**kwargs)
return training_args
+def get_master_port(real_launcher=False):
+ """
+ When using a single gpu launcher emulation (i.e. not deepspeed or python -m torch.distributed)
+ the issue is that once the port is tied it can't be used anywhere else outside of this process,
+ since torch.dist doesn't free the port until the process exits. Therefore for the sake of being
+ able to run both emulated launcher and normal launcher tests we need 2 distinct ports.
+
+ This function will give the right port in the right context. For real launcher it'll give the
+ base port, for emulated launcher it'll give the base port + 1. In both cases a string is
+ returned.
+
+ Args:
+ `real_launcher`: whether a real launcher is going to be used, or the emulated one
+
+ """
+
+ master_port_base = os.environ.get("DS_TEST_PORT", DEFAULT_MASTER_PORT)
+ if not real_launcher:
+ master_port_base = str(int(master_port_base) + 1)
+ return master_port_base
+
+
def get_ort_trainer(
model_name,
- feature,
+ task,
data_metric_config,
training_args,
max_seq_length=None,
@@ -170,7 +197,7 @@ def get_ort_trainer(
max_test_samples=None,
**kwargs,
):
- training_kwargs = load_and_prepare(feature)(
+ training_kwargs = load_and_prepare(task)(
model_name,
data_metric_config,
max_seq_length,
@@ -185,26 +212,25 @@ def get_ort_trainer(
if getattr(training_args, "predict_with_generate", False) is not True:
training_kwargs.pop("compute_metrics", None)
- if feature in _ENCODER_TASKS_DATASETS_CONFIGS or feature in _DECODER_TASKS_DATASETS_CONFIGS:
- trainer = ORTTrainer(feature=feature, args=training_args, **training_kwargs)
- elif feature in _SEQ2SEQ_TASKS_DATASETS_CONFIGS:
- trainer = ORTSeq2SeqTrainer(feature=feature, args=training_args, **training_kwargs)
+ if task in _ENCODER_TASKS_DATASETS_CONFIGS or task in _DECODER_TASKS_DATASETS_CONFIGS:
+ trainer = ORTTrainer(args=training_args, **training_kwargs)
+ elif task in _SEQ2SEQ_TASKS_DATASETS_CONFIGS:
+ trainer = ORTSeq2SeqTrainer(args=training_args, **training_kwargs)
else:
raise
return trainer, test_dataset
-def load_and_prepare(feature):
+def load_and_prepare(task):
preprocess_mapping = {
"text-classification": load_and_prepare_glue,
"token-classification": load_and_prepare_ner,
"text-generation": load_and_prepare_clm,
"text-generation-with-past": load_and_prepare_clm,
"text2text-generation": load_and_prepare_xsum,
- "text2text-generation-with-past": load_and_prepare_xsum,
}
- return preprocess_mapping[feature]
+ return preprocess_mapping[task]
def load_and_prepare_glue(model_name, data_metric_config, max_seq_length, padding="max_length", **kwargs):
@@ -520,212 +546,140 @@ class ORTTrainerIntegrationTest(unittest.TestCase):
def setUp(self):
super().setUp()
args = ORTTrainingArguments("..")
+ master_port = get_master_port(real_launcher=False)
+ self.dist_env_1_gpu = {
+ "MASTER_ADDR": "localhost",
+ "MASTER_PORT": master_port,
+ "RANK": "0",
+ "LOCAL_RANK": "0",
+ "WORLD_SIZE": "1",
+ }
self.n_epochs = min(args.num_train_epochs, 1)
- self.per_device_train_batch_size = args.per_device_train_batch_size
- self.per_device_eval_batch_size = args.per_device_eval_batch_size
+ self.per_device_train_batch_size = min(args.per_device_train_batch_size, 2)
+ self.per_device_eval_batch_size = min(args.per_device_eval_batch_size, 2)
self.max_seq_length = 64
- self.max_train_samples = 50
- self.max_valid_samples = 20
- self.max_test_samples = 10
+ self.max_train_samples = 10
+ self.max_valid_samples = 5
+ self.max_test_samples = 5
self.warmup_steps = 10
self.weight_decay = 0.01
@parameterized.expand(
- _get_models_to_test(_ENCODERS_TO_TEST, _ENCODER_TASKS_DATASETS_CONFIGS, both_inf_backend=True)
- # + _get_models_to_test(_DECODERS_TO_TEST, _DECODER_TASKS_DATASETS_CONFIGS, both_inf_backend=True) # Skip test for OOM bug
- + _get_models_to_test(_SEQ2SEQ_MODELS_TO_TEST, _SEQ2SEQ_TASKS_DATASETS_CONFIGS, both_inf_backend=True),
- skip_on_empty=True,
- )
- def test_trainer_fp32(self, test_name, model_name, feature, data_metric_config, inference_with_ort):
- with tempfile.TemporaryDirectory() as tmp_dir:
- training_args = get_ort_training_args(
- feature=feature,
- output_dir=tmp_dir,
- num_train_epochs=self.n_epochs,
- per_device_train_batch_size=self.per_device_train_batch_size,
- per_device_eval_batch_size=self.per_device_eval_batch_size,
- warmup_steps=self.warmup_steps,
- weight_decay=self.weight_decay,
- logging_dir=tmp_dir,
- )
-
- trainer, test_dataset = get_ort_trainer(
- model_name,
- feature,
- data_metric_config,
- training_args,
- max_seq_length=self.max_seq_length,
- max_train_samples=self.max_train_samples,
- max_valid_samples=self.max_valid_samples,
- max_test_samples=self.max_test_samples,
- )
-
- trainer.train()
- trainer.save_model()
- trainer.evaluate(inference_with_ort=inference_with_ort)
- trainer.predict(test_dataset, inference_with_ort=inference_with_ort)
- gc.collect()
-
- @parameterized.expand(
- _get_models_to_test(_ENCODERS_TO_TEST, _ENCODER_TASKS_DATASETS_CONFIGS, both_inf_backend=True)
- # + _get_models_to_test(_DECODERS_TO_TEST, _DECODER_TASKS_DATASETS_CONFIGS, both_inf_backend=True) # Skip test for OOM bug
- + _get_models_to_test(_SEQ2SEQ_MODELS_TO_TEST, _SEQ2SEQ_TASKS_DATASETS_CONFIGS, both_inf_backend=True),
+ _get_models_to_test(_ENCODERS_TO_TEST, _ENCODER_TASKS_DATASETS_CONFIGS)
+ + _get_models_to_test(_DECODERS_TO_TEST, _DECODER_TASKS_DATASETS_CONFIGS)
+ + _get_models_to_test(_SEQ2SEQ_MODELS_TO_TEST, _SEQ2SEQ_TASKS_DATASETS_CONFIGS),
skip_on_empty=True,
)
- def test_trainer_fp32_with_label_smoothing(
- self, test_name, model_name, feature, data_metric_config, inference_with_ort
- ):
- with tempfile.TemporaryDirectory() as tmp_dir:
- training_args = get_ort_training_args(
- feature=feature,
- output_dir=tmp_dir,
- num_train_epochs=self.n_epochs,
- per_device_train_batch_size=self.per_device_train_batch_size,
- per_device_eval_batch_size=self.per_device_eval_batch_size,
- label_smoothing_factor=0.1,
- warmup_steps=self.warmup_steps,
- weight_decay=self.weight_decay,
- logging_dir=tmp_dir,
- )
-
- trainer, test_dataset = get_ort_trainer(
- model_name,
- feature,
- data_metric_config,
- training_args,
- max_seq_length=self.max_seq_length,
- max_train_samples=self.max_train_samples,
- max_valid_samples=self.max_valid_samples,
- max_test_samples=self.max_test_samples,
- )
-
- trainer.train()
- trainer.save_model()
- trainer.evaluate(inference_with_ort=inference_with_ort)
- trainer.predict(test_dataset, inference_with_ort=inference_with_ort)
- gc.collect()
+ def test_trainer_fp32(self, test_name, model_name, task, data_metric_config):
+ with mockenv_context(**self.dist_env_1_gpu):
+ with tempfile.TemporaryDirectory() as tmp_dir:
+ training_args = get_ort_training_args(
+ task=task,
+ output_dir=tmp_dir,
+ num_train_epochs=self.n_epochs,
+ per_device_train_batch_size=self.per_device_train_batch_size,
+ per_device_eval_batch_size=self.per_device_eval_batch_size,
+ warmup_steps=self.warmup_steps,
+ weight_decay=self.weight_decay,
+ logging_dir=tmp_dir,
+ )
+
+ trainer, test_dataset = get_ort_trainer(
+ model_name,
+ task,
+ data_metric_config,
+ training_args,
+ max_seq_length=self.max_seq_length,
+ max_train_samples=self.max_train_samples,
+ max_valid_samples=self.max_valid_samples,
+ max_test_samples=self.max_test_samples,
+ )
+
+ trainer.train()
+ trainer.save_model()
+ trainer.evaluate()
+ trainer.predict(test_dataset)
+ gc.collect()
@slow
@parameterized.expand(
_get_models_to_test(_ENCODERS_TO_TEST, _ENCODER_TASKS_DATASETS_CONFIGS)
- # + _get_models_to_test(_DECODERS_TO_TEST, _DECODER_TASKS_DATASETS_CONFIGS) # Skip test for OOM bug
+ + _get_models_to_test(_DECODERS_TO_TEST, _DECODER_TASKS_DATASETS_CONFIGS)
+ _get_models_to_test(_SEQ2SEQ_MODELS_TO_TEST, _SEQ2SEQ_TASKS_DATASETS_CONFIGS),
skip_on_empty=True,
)
- def test_trainer_fp16_pt_inference(self, test_name, model_name, feature, data_metric_config):
- with tempfile.TemporaryDirectory() as tmp_dir:
- training_args = get_ort_training_args(
- feature=feature,
- output_dir=tmp_dir,
- num_train_epochs=self.n_epochs,
- per_device_train_batch_size=self.per_device_train_batch_size,
- per_device_eval_batch_size=self.per_device_eval_batch_size,
- warmup_steps=self.warmup_steps,
- weight_decay=self.weight_decay,
- logging_dir=tmp_dir,
- fp16=True,
- )
-
- trainer, test_dataset = get_ort_trainer(
- model_name,
- feature,
- data_metric_config,
- training_args,
- max_seq_length=self.max_seq_length,
- max_train_samples=self.max_train_samples,
- max_valid_samples=self.max_valid_samples,
- max_test_samples=self.max_test_samples,
- )
-
- trainer.train()
- trainer.save_model()
- trainer.evaluate()
- trainer.predict(test_dataset)
- gc.collect()
+ def test_trainer_fp32_with_label_smoothing(self, test_name, model_name, task, data_metric_config):
+ with mockenv_context(**self.dist_env_1_gpu):
+ with tempfile.TemporaryDirectory() as tmp_dir:
+ training_args = get_ort_training_args(
+ task=task,
+ output_dir=tmp_dir,
+ num_train_epochs=self.n_epochs,
+ per_device_train_batch_size=self.per_device_train_batch_size,
+ per_device_eval_batch_size=self.per_device_eval_batch_size,
+ label_smoothing_factor=0.1,
+ warmup_steps=self.warmup_steps,
+ weight_decay=self.weight_decay,
+ logging_dir=tmp_dir,
+ )
+
+ trainer, test_dataset = get_ort_trainer(
+ model_name,
+ task,
+ data_metric_config,
+ training_args,
+ max_seq_length=self.max_seq_length,
+ max_train_samples=self.max_train_samples,
+ max_valid_samples=self.max_valid_samples,
+ max_test_samples=self.max_test_samples,
+ )
+
+ trainer.train()
+ trainer.save_model()
+ trainer.evaluate()
+ trainer.predict(test_dataset)
+ gc.collect()
@slow
@parameterized.expand(
_get_models_to_test(_ENCODERS_TO_TEST, _ENCODER_TASKS_DATASETS_CONFIGS)
- # Exclude "with-past" tests as they fail for ORT inference after the mixed-precision training
- # + _get_models_to_test(_DECODERS_TO_TEST, _DECODER_TASKS_DATASETS_CONFIGS, excluded=["text-generation-with-past"]) # Skip test for OOM bug
- + _get_models_to_test(
- _SEQ2SEQ_MODELS_TO_TEST, _SEQ2SEQ_TASKS_DATASETS_CONFIGS, excluded=["text2text-generation-with-past"]
- ),
+ + _get_models_to_test(_DECODERS_TO_TEST, _DECODER_TASKS_DATASETS_CONFIGS)
+ + _get_models_to_test(_SEQ2SEQ_MODELS_TO_TEST, _SEQ2SEQ_TASKS_DATASETS_CONFIGS),
skip_on_empty=True,
)
- def test_trainer_fp16_ort_inference(self, test_name, model_name, feature, data_metric_config):
- with tempfile.TemporaryDirectory() as tmp_dir:
- training_args = get_ort_training_args(
- feature=feature,
- output_dir=tmp_dir,
- num_train_epochs=self.n_epochs,
- per_device_train_batch_size=self.per_device_train_batch_size,
- per_device_eval_batch_size=self.per_device_eval_batch_size,
- warmup_steps=self.warmup_steps,
- weight_decay=self.weight_decay,
- logging_dir=tmp_dir,
- fp16=True,
- )
-
- trainer, test_dataset = get_ort_trainer(
- model_name,
- feature,
- data_metric_config,
- training_args,
- max_seq_length=self.max_seq_length,
- max_train_samples=self.max_train_samples,
- max_valid_samples=self.max_valid_samples,
- max_test_samples=self.max_test_samples,
- )
-
- trainer.train()
- trainer.save_model()
- trainer.evaluate(inference_with_ort=True)
- trainer.predict(test_dataset, inference_with_ort=True)
- gc.collect()
-
- # Skip this test as a large amount of ops don't support bf16 yet.
- # @unittest.skip("Skip BF16 test.")
- # @slow
- # @require_torch_bf16_gpu
- # @parameterized.expand(
- # _get_models_to_test(_ENCODERS_TO_TEST, _ENCODER_TASKS_DATASETS_CONFIGS)
- # + _get_models_to_test(_DECODERS_TO_TEST, _DECODER_TASKS_DATASETS_CONFIGS)
- # + _get_models_to_test(_SEQ2SEQ_MODELS_TO_TEST, _SEQ2SEQ_TASKS_DATASETS_CONFIGS),
- # skip_on_empty=True,
- # )
- # def test_trainer_bf16(self, test_name, model_name, feature, data_metric_config):
- # with tempfile.TemporaryDirectory() as tmp_dir:
- # training_args = get_ort_training_args(
- # feature=feature,
- # output_dir=tmp_dir,
- # num_train_epochs=self.n_epochs,
- # per_device_train_batch_size=self.per_device_train_batch_size,
- # per_device_eval_batch_size=self.per_device_eval_batch_size,
- # warmup_steps=self.warmup_steps,
- # weight_decay=self.weight_decay,
- # logging_dir=tmp_dir,
- # bf16=True,
- # )
-
- # trainer, test_dataset = get_ort_trainer(
- # model_name,
- # feature,
- # data_metric_config,
- # training_args,
- # max_seq_length=self.max_seq_length,
- # max_train_samples=self.max_train_samples,
- # max_valid_samples=self.max_valid_samples,
- # max_test_samples=self.max_test_samples,
- # )
-
- # trainer.train()
- # trainer.save_model()
- # trainer.evaluate()
- # trainer.predict(test_dataset)
- # gc.collect()
+ def test_trainer_fp16(self, test_name, model_name, task, data_metric_config):
+ with mockenv_context(**self.dist_env_1_gpu):
+ with tempfile.TemporaryDirectory() as tmp_dir:
+ training_args = get_ort_training_args(
+ task=task,
+ output_dir=tmp_dir,
+ num_train_epochs=self.n_epochs,
+ per_device_train_batch_size=self.per_device_train_batch_size,
+ per_device_eval_batch_size=self.per_device_eval_batch_size,
+ warmup_steps=self.warmup_steps,
+ weight_decay=self.weight_decay,
+ logging_dir=tmp_dir,
+ fp16=True,
+ )
+
+ trainer, test_dataset = get_ort_trainer(
+ model_name,
+ task,
+ data_metric_config,
+ training_args,
+ max_seq_length=self.max_seq_length,
+ max_train_samples=self.max_train_samples,
+ max_valid_samples=self.max_valid_samples,
+ max_test_samples=self.max_test_samples,
+ )
+
+ trainer.train()
+ trainer.save_model()
+ trainer.evaluate()
+ trainer.predict(test_dataset)
+ gc.collect()
@slow
@@ -734,14 +688,22 @@ class ORTTrainerIntegrationDeepSpeedTest(unittest.TestCase):
def setUp(self):
super().setUp()
args = ORTTrainingArguments("..")
+ master_port = get_master_port(real_launcher=False)
+ self.dist_env_1_gpu = {
+ "MASTER_ADDR": "localhost",
+ "MASTER_PORT": master_port,
+ "RANK": "0",
+ "LOCAL_RANK": "0",
+ "WORLD_SIZE": "1",
+ }
self.n_epochs = min(args.num_train_epochs, 1)
- self.per_device_train_batch_size = args.per_device_train_batch_size
- self.per_device_eval_batch_size = args.per_device_eval_batch_size
+ self.per_device_train_batch_size = min(args.per_device_train_batch_size, 2)
+ self.per_device_eval_batch_size = min(args.per_device_eval_batch_size, 2)
self.max_seq_length = 64
- self.max_train_samples = 30
- self.max_valid_samples = 10
- self.max_test_samples = 10
+ self.max_train_samples = 10
+ self.max_valid_samples = 5
+ self.max_test_samples = 5
self.warmup_steps = 10
self.weight_decay = 0.01
@@ -749,126 +711,80 @@ def setUp(self):
@parameterized.expand(
random.sample(
_get_models_to_test(_ENCODERS_TO_TEST, _ENCODER_TASKS_DATASETS_CONFIGS)
- # + _get_models_to_test(_DECODERS_TO_TEST, _DECODER_TASKS_DATASETS_CONFIGS)
+ + _get_models_to_test(_DECODERS_TO_TEST, _DECODER_TASKS_DATASETS_CONFIGS)
+ _get_models_to_test(_SEQ2SEQ_MODELS_TO_TEST, _SEQ2SEQ_TASKS_DATASETS_CONFIGS),
- 1,
+ 1, # only test one
),
skip_on_empty=True,
)
- def test_trainer_fp16_ds_stage1(self, test_name, model_name, feature, data_metric_config):
- with tempfile.TemporaryDirectory() as tmp_dir:
- training_args = get_ort_training_args(
- feature=feature,
- output_dir=tmp_dir,
- num_train_epochs=self.n_epochs,
- per_device_train_batch_size=self.per_device_train_batch_size,
- per_device_eval_batch_size=self.per_device_eval_batch_size,
- warmup_steps=self.warmup_steps,
- weight_decay=self.weight_decay,
- logging_dir=tmp_dir,
- fp16=True,
- deepspeed="onnxruntime/ds_configs/ds_config_zero_stage_1.json",
- )
-
- trainer, _ = get_ort_trainer(
- model_name,
- feature,
- data_metric_config,
- training_args,
- max_seq_length=self.max_seq_length,
- max_train_samples=self.max_train_samples,
- max_valid_samples=self.max_valid_samples,
- max_test_samples=self.max_test_samples,
- )
-
- trainer.train()
- gc.collect()
+ def test_trainer_fp16_ds_stage1(self, test_name, model_name, task, data_metric_config):
+ with mockenv_context(**self.dist_env_1_gpu):
+ with tempfile.TemporaryDirectory() as tmp_dir:
+ training_args = get_ort_training_args(
+ task=task,
+ output_dir=tmp_dir,
+ num_train_epochs=self.n_epochs,
+ per_device_train_batch_size=self.per_device_train_batch_size,
+ per_device_eval_batch_size=self.per_device_eval_batch_size,
+ warmup_steps=self.warmup_steps,
+ weight_decay=self.weight_decay,
+ logging_dir=tmp_dir,
+ fp16=True,
+ deepspeed="onnxruntime/ds_configs/ds_config_zero_stage_1.json",
+ )
+
+ trainer, _ = get_ort_trainer(
+ model_name,
+ task,
+ data_metric_config,
+ training_args,
+ max_seq_length=self.max_seq_length,
+ max_train_samples=self.max_train_samples,
+ max_valid_samples=self.max_valid_samples,
+ max_test_samples=self.max_test_samples,
+ )
+
+ trainer.train()
+ gc.collect()
@parameterized.expand(
random.sample(
_get_models_to_test(_ENCODERS_TO_TEST, _ENCODER_TASKS_DATASETS_CONFIGS)
- # + _get_models_to_test(_DECODERS_TO_TEST, _DECODER_TASKS_DATASETS_CONFIGS)
+ + _get_models_to_test(_DECODERS_TO_TEST, _DECODER_TASKS_DATASETS_CONFIGS)
+ _get_models_to_test(_SEQ2SEQ_MODELS_TO_TEST, _SEQ2SEQ_TASKS_DATASETS_CONFIGS),
1,
),
skip_on_empty=True,
)
- def test_trainer_fp16_ds_stage2(self, test_name, model_name, feature, data_metric_config):
- with tempfile.TemporaryDirectory() as tmp_dir:
- training_args = get_ort_training_args(
- feature=feature,
- output_dir=tmp_dir,
- num_train_epochs=self.n_epochs,
- per_device_train_batch_size=self.per_device_train_batch_size,
- per_device_eval_batch_size=self.per_device_eval_batch_size,
- warmup_steps=self.warmup_steps,
- weight_decay=self.weight_decay,
- logging_dir=tmp_dir,
- fp16=True,
- deepspeed="onnxruntime/ds_configs/ds_config_zero_stage_2.json",
- )
-
- trainer, _ = get_ort_trainer(
- model_name,
- feature,
- data_metric_config,
- training_args,
- max_seq_length=self.max_seq_length,
- max_train_samples=self.max_train_samples,
- max_valid_samples=self.max_valid_samples,
- max_test_samples=self.max_test_samples,
- )
-
- trainer.train()
- gc.collect()
-
-
-@slow
-@pytest.mark.skip(reason="skip for now, server socket error")
-class ORTTrainerIntegrationDDPTest(unittest.TestCase):
- def test_trainer_ddp_glue(self):
- subprocess.run(
- "cp ../examples/onnxruntime/training/text-classification/run_glue.py ./",
- shell=True,
- )
-
- subprocess.run(
- f"{sys.executable} -m torch.distributed.launch"
- " --nproc_per_node=1"
- " run_glue.py"
- " --model_name_or_path distilbert-base-uncased"
- " --task_name mnli"
- " --max_seq_length 128"
- " --learning_rate 3e-6"
- " --do_train"
- " --output_dir /tmp/distilbert"
- " --overwrite_output_dir"
- " --max_steps 200"
- " --logging_steps 20"
- " --per_device_train_batch_size 32"
- " --fp16 --optim adamw_ort_fused"
- " --max_train_samples 500",
- shell=True,
- check=True,
- )
-
-
-# List supported ORT optimizers to test
-optim_test_params = []
-if is_torch_available():
- default_adam_kwargs = {
- "betas": (ORTTrainingArguments.adam_beta1, ORTTrainingArguments.adam_beta2),
- "eps": ORTTrainingArguments.adam_epsilon,
- "lr": ORTTrainingArguments.learning_rate,
- }
-
- optim_test_params = [
- (
- ORTOptimizerNames.ADAMW_ORT_FUSED,
- onnxruntime.training.optim.FusedAdam,
- default_adam_kwargs,
- ),
- ]
+ def test_trainer_fp16_ds_stage2(self, test_name, model_name, task, data_metric_config):
+ with mockenv_context(**self.dist_env_1_gpu):
+ with tempfile.TemporaryDirectory() as tmp_dir:
+ training_args = get_ort_training_args(
+ task=task,
+ output_dir=tmp_dir,
+ num_train_epochs=self.n_epochs,
+ per_device_train_batch_size=self.per_device_train_batch_size,
+ per_device_eval_batch_size=self.per_device_eval_batch_size,
+ warmup_steps=self.warmup_steps,
+ weight_decay=self.weight_decay,
+ logging_dir=tmp_dir,
+ fp16=True,
+ deepspeed="onnxruntime/ds_configs/ds_config_zero_stage_2.json",
+ )
+
+ trainer, _ = get_ort_trainer(
+ model_name,
+ task,
+ data_metric_config,
+ training_args,
+ max_seq_length=self.max_seq_length,
+ max_train_samples=self.max_train_samples,
+ max_valid_samples=self.max_valid_samples,
+ max_test_samples=self.max_test_samples,
+ )
+
+ trainer.train()
+ gc.collect()
@slow
@@ -876,21 +792,6 @@ def test_trainer_ddp_glue(self):
class ORTTrainerOptimizerChoiceTest(unittest.TestCase):
def setUp(self):
super().setUp()
- args = ORTTrainingArguments("..")
- self.n_epochs = min(args.num_train_epochs, 1)
- self.per_device_train_batch_size = args.per_device_train_batch_size
- self.per_device_eval_batch_size = args.per_device_eval_batch_size
-
- self.max_seq_length = 64
- self.max_train_samples = 50
- self.max_valid_samples = 20
- self.max_test_samples = 10
-
- self.warmup_steps = 10
- self.weight_decay = 0.01
-
- self.model_name = "bert-base-cased"
- self.feature = "text-classification"
def check_optim_and_kwargs(self, optim: OptimizerNames, mandatory_kwargs, expected_cls):
args = ORTTrainingArguments(optim=optim, output_dir="None")
@@ -903,37 +804,6 @@ def check_optim_and_kwargs(self, optim: OptimizerNames, mandatory_kwargs, expect
actual_v = optim_kwargs[p]
self.assertTrue(actual_v == v, f"Failed check for {p}. Expected {v}, but got {actual_v}.")
- @parameterized.expand(optim_test_params, skip_on_empty=True)
- def test_optim_supported(self, name: str, expected_cls, mandatory_kwargs):
- # exercises all the valid --optim options
- self.check_optim_and_kwargs(name, mandatory_kwargs, expected_cls)
-
- with tempfile.TemporaryDirectory() as tmp_dir:
- training_args = ORTTrainingArguments(
- optim=name,
- output_dir=tmp_dir,
- num_train_epochs=self.n_epochs,
- per_device_train_batch_size=self.per_device_train_batch_size,
- per_device_eval_batch_size=self.per_device_eval_batch_size,
- warmup_steps=self.warmup_steps,
- weight_decay=self.weight_decay,
- logging_dir=tmp_dir,
- )
-
- trainer, _ = get_ort_trainer(
- self.model_name,
- self.feature,
- _ENCODER_TASKS_DATASETS_CONFIGS[self.feature],
- training_args,
- max_seq_length=self.max_seq_length,
- max_train_samples=self.max_train_samples,
- max_valid_samples=self.max_valid_samples,
- max_test_samples=self.max_test_samples,
- )
-
- trainer.train()
- gc.collect()
-
def test_ort_fused_adam(self):
# Pretend that onnxruntime-training is installed and mock onnxruntime.training.optim.FusedAdam exists.
# Trainer.get_optimizer_cls_and_kwargs does not use FusedAdam. It only has to return the
@@ -951,56 +821,3 @@ def test_ort_fused_adam(self):
default_adam_kwargs,
mock.optimizers.FusedAdam,
)
-
-
-class ORTSeq2SeqTrainerSpecificIntegrationTest(unittest.TestCase):
- def setUp(self):
- super().setUp()
- args = ORTTrainingArguments("..")
- self.n_epochs = min(args.num_train_epochs, 1)
- self.per_device_train_batch_size = args.per_device_train_batch_size
- self.per_device_eval_batch_size = args.per_device_eval_batch_size
-
- self.max_seq_length = 32
- self.max_train_samples = 10
- self.max_valid_samples = 10
- self.max_test_samples = 10
-
- self.warmup_steps = 10
- self.weight_decay = 0.01
-
- @parameterized.expand(
- _get_models_to_test(_SEQ2SEQ_MODELS_TO_TEST, _SEQ2SEQ_TASKS_DATASETS_CONFIGS),
- skip_on_empty=True,
- )
- def test_predict_with_generate_ort(self, test_name, model_name, feature, data_metric_config):
- with tempfile.TemporaryDirectory() as tmp_dir:
- training_args = get_ort_training_args(
- feature=feature,
- output_dir=tmp_dir,
- evaluation_strategy="epoch",
- num_train_epochs=self.n_epochs,
- per_device_train_batch_size=self.per_device_train_batch_size,
- per_device_eval_batch_size=self.per_device_eval_batch_size,
- warmup_steps=self.warmup_steps,
- weight_decay=self.weight_decay,
- logging_dir=tmp_dir,
- label_smoothing_factor=0.1,
- predict_with_generate=True,
- )
-
- trainer, test_dataset = get_ort_trainer(
- model_name,
- feature,
- data_metric_config,
- training_args,
- max_seq_length=self.max_seq_length,
- max_train_samples=self.max_train_samples,
- max_valid_samples=self.max_valid_samples,
- max_test_samples=self.max_test_samples,
- )
-
- trainer.train()
- trainer.evaluate(inference_with_ort=True)
- trainer.predict(test_dataset, inference_with_ort=True)
- gc.collect()