mindspore-lab · lvyufeng · Oct 24, 2024 · Oct 22, 2024 · Oct 23, 2024 · Oct 24, 2024
diff --git a/mindnlp/core/nn/modules/module.py b/mindnlp/core/nn/modules/module.py
@@ -572,6 +572,9 @@ def remove_from(*dicts_or_sets):
                         d.discard(name)
 
         params = self.__dict__.get('_parameters')
+
+        if isinstance(value, StubTensor):
+            value = value.stub_sync()
         if isinstance(value, Parameter):
             if params is None:
                 raise AttributeError(

diff --git a/mindnlp/core/serialization.py b/mindnlp/core/serialization.py
@@ -805,7 +805,7 @@ def _rebuild_tensor_v2(storage, storage_offset, size, stride, requires_grad, bac
     else:
         order = "C"
         array = array.reshape(size, order=order)
-    param = Tensor.from_numpy(array)
+    param = Tensor(array)
     return param
 
 def _rebuild_from_type_v2(func, new_type, args, state):
@@ -1134,7 +1134,7 @@ def persistent_load(saved_id):
         if array.dtype == bfloat16 and not SUPPORT_BF16:
             logger.warning_once("MindSpore do not support bfloat16 dtype, we will automaticlly convert to float16")
             array = array.astype(np.float16)
-        new_result[k] = Tensor.from_numpy(array)
+        new_result[k] = Tensor(array)
 
     return new_result
 
@@ -1380,9 +1380,9 @@ def legacy_safe_load_file(filename):
             arr = np.frombuffer(v["data"], dtype=dtype).reshape(v["shape"])
 
             if (not SUPPORT_BF16 and dtype != bfloat16) or SUPPORT_BF16:
-                result[k] = Tensor.from_numpy(arr)
+                result[k] = Tensor(arr)
             else:
-                result[k] = Tensor.from_numpy(arr.astype(np.float16))
+                result[k] = Tensor(arr.astype(np.float16))
         return result
 
 
@@ -1402,17 +1402,27 @@ def safe_load_file(filename):
     """
     def convert(info: dict[str, Any]):
         numpy_dtype = _NP_TYPES[info['dtype']]
+        ms_dtype = _MS_TYPES[info['dtype']]
         shape: list[int] = info['shape']
         begin, end = info['data_offsets']
         assert 0 <= begin <= end <= len(byte_buf)
         assert end - begin == math.prod(shape) * np.dtype(numpy_dtype).itemsize
         buf = byte_buf[begin:end]
-        array = np.frombuffer(buf, dtype=numpy_dtype).reshape(shape)
-        if array.dtype == bfloat16 and not SUPPORT_BF16:
-            logger.warning_once("MindSpore do not support bfloat16 dtype, we will automaticlly convert to float16")
-            array = array.astype(np.float16)
 
-        return Tensor.from_numpy(array)
+        try:
+            if info['dtype'] == 'BF16' and not SUPPORT_BF16:
+                logger.warning_once("MindSpore do not support bfloat16 dtype, we will automaticlly convert to float16")
+                ms_dtype = mindspore.float16
+            out = Tensor.convert_bytes_to_tensor(buf, tuple(shape), ms_dtype)
+        except:
+            array = np.frombuffer(buf, dtype=numpy_dtype).reshape(shape)
+
+            if array.dtype == bfloat16 and not SUPPORT_BF16:
+                logger.warning_once("MindSpore do not support bfloat16 dtype, we will automaticlly convert to float16")
+                array = array.astype(np.float16)
+            array = array.astype(array.dtype)
+            out = Tensor(array)
+        return out
 
     with open(filename, "rb") as fp:
         header_size, = struct.unpack('<Q', fp.read(8))
@@ -1506,7 +1516,7 @@ def load_checkpoint(ckpt_file_name):
                 dims = element.tensor.dims
                 param_data = np.frombuffer(data, np_type)
                 param_data = param_data.reshape(list(dims))
-                parameter = Tensor(param_data, ms_type)
+                parameter = Tensor(param_data)
                 parameter_dict[element.tag] = parameter
                 continue
             element_data = np.frombuffer(data, np_type)
@@ -1526,7 +1536,7 @@ def load_checkpoint(ckpt_file_name):
                         param_data = int(param_data[0])
                     if dims not in ([0], [1]):
                         param_data = param_data.reshape(list(dims))
-                    parameter = Tensor(param_data, ms_type)
+                    parameter = Tensor(param_data)
                     parameter_dict[element.tag] = parameter
 
     except BaseException as e:

diff --git a/mindnlp/transformers/cache_utils.py b/mindnlp/transformers/cache_utils.py
@@ -1146,7 +1146,7 @@ def _sliding_update(self, cache_position, layer_idx, key_states, value_states, k
             # into consideration when building kv cache instead of just throwing away tokens outside of the window
             return key_states, value_states
 
-        slicing = ops.ones(max_cache_len, dtype=mindspore.int64).cumsum(0)
+        slicing = ops.ones(max_cache_len, dtype=mindspore.int32).cumsum(0)
         cache_position = cache_position.clamp(0, max_cache_len - 1)
         to_shift = cache_position >= max_cache_len - 1
         indices = (slicing + to_shift[-1].int() - 1) % max_cache_len

diff --git a/mindnlp/transformers/configuration_utils.py b/mindnlp/transformers/configuration_utils.py
@@ -335,13 +335,6 @@ def __init__(self, **kwargs):
                 "but only 'regression', 'single_label_classification' and 'multi_label_classification' are valid."
             )
 
-        # TPU arguments
-        if kwargs.pop("xla_device", None) is not None:
-            logger.warning(
-                "The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can "
-                "safely remove it from your `config.json` file."
-            )
-
         # Name or path to the pretrained checkpoint
         self._name_or_path = str(kwargs.pop("name_or_path", ""))
         # Config hash

diff --git a/mindnlp/transformers/generation/utils.py b/mindnlp/transformers/generation/utils.py
@@ -1869,7 +1869,6 @@ def generate(
         # - `model_kwargs` may be updated in place with a cache as defined by the parameters in `generation_config`.
         # - different models have a different cache name expected by the model (default = "past_key_values")
         # - `max_length`, prepared above, is used to determine the maximum cache length
-        # TODO (joao): remove `user_defined_cache` after v4.47 (remove default conversion to legacy format)
         cache_name = "past_key_values" if "mamba" not in self.__class__.__name__.lower() else "cache_params"
         user_defined_cache = model_kwargs.get(cache_name)
         max_cache_length = generation_config.max_length
@@ -2174,7 +2173,7 @@ def typeerror():
 
         # Convert to legacy cache format if requested
         if (
-            generation_config.return_legacy_cache is not False  # Should check for `True` after v4.47
+            generation_config.return_legacy_cache is not False
             and hasattr(result, "past_key_values")
             and hasattr(result.past_key_values, "to_legacy_cache")
             and result.past_key_values.to_legacy_cache is not None
@@ -2192,7 +2191,7 @@ def typeerror():
             )
             if not is_user_defined_cache and is_default_cache_type:
                 logger.warning_once(
-                    "From v4.47 onwards, when a model cache is to be returned, `generate` will return a `Cache` "
+                    "When a model cache is to be returned, `generate` will return a `Cache` "
                     "instance instead by default (as opposed to the legacy tuple of tuples format). If you want to "
                     "keep returning the legacy format, please set `return_legacy_cache=True`."
                 )

diff --git a/mindnlp/transformers/modeling_rope_utils.py b/mindnlp/transformers/modeling_rope_utils.py
@@ -40,7 +40,7 @@ def _compute_default_rope_parameters(
         seq_len (`int`, *optional*):
             The current sequence length. Unused for this type of RoPE.
         rope_kwargs (`Dict`, *optional*):
-            BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
+            BC compatibility with the previous RoPE class instantiation, will be removed.
     Returns:
         Tuple of (`mindspore.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
         post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
@@ -78,7 +78,7 @@ def _compute_linear_scaling_rope_parameters(
         seq_len (`int`, *optional*):
             The current sequence length. Unused for this type of RoPE.
         rope_kwargs (`Dict`, *optional*):
-            BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
+            BC compatibility with the previous RoPE class instantiation, will be removed.
     Returns:
         Tuple of (`mindspore.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
         post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
@@ -116,7 +116,7 @@ def _compute_dynamic_ntk_parameters(
         seq_len (`int`, *optional*):
             The current sequence length, used to update the dynamic RoPE at inference time.
         rope_kwargs (`Dict`, *optional*):
-            BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
+            BC compatibility with the previous RoPE class instantiation, will be removed.
     Returns:
         Tuple of (`mindspore.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
         post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
@@ -162,7 +162,7 @@ def _compute_yarn_parameters(
         seq_len (`int`, *optional*):
             The current sequence length. Unused for this type of RoPE.
         rope_kwargs (`Dict`, *optional*):
-            BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
+            BC compatibility with the previous RoPE class instantiation, will be removed.
     Returns:
         Tuple of (`mindspore.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
         post-processing scaling factor applied to the computed cos/sin.
@@ -238,7 +238,7 @@ def _compute_longrope_parameters(
         seq_len (`int`, *optional*):
             The current sequence length. Unused for this type of RoPE.
         rope_kwargs (`Dict`, *optional*):
-            BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
+            BC compatibility with the previous RoPE class instantiation, will be removed.
     Returns:
         Tuple of (`mindspore.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
         post-processing scaling factor applied to the computed cos/sin.
@@ -300,7 +300,7 @@ def _compute_llama3_parameters(
         seq_len (`int`, *optional*):
             The current sequence length. Unused for this type of RoPE.
         rope_kwargs (`Dict`, *optional*):
-            BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
+            BC compatibility with the previous RoPE class instantiation, will be removed.
     Returns:
         Tuple of (`mindspore.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
         post-processing scaling factor applied to the computed cos/sin.

diff --git a/mindnlp/transformers/modeling_utils.py b/mindnlp/transformers/modeling_utils.py
@@ -1233,7 +1233,7 @@ def can_generate(cls) -> bool:
                 continue
             if "PreTrainedModel" not in str(base) and base.can_generate():
                 return True
-        # BC: Detects whether `prepare_inputs_for_generation` has been overwritten in the model. Prior to v4.45, this
+        # BC: Detects whether `prepare_inputs_for_generation` has been overwritten in the model. this
         # was how we detected whether a model could generate.
         if "GenerationMixin" not in str(cls.prepare_inputs_for_generation):
             logger.warning_once(
@@ -2022,7 +2022,7 @@ def save_pretrained(
                             "To avoid this behavior and this warning, we recommend you to overwrite the generation "
                             "config model attribute before calling the model's `save_pretrained`, preferably also "
                             "removing any generation kwargs from the model config. This warning will be raised to an "
-                            "exception in v4.41."
+                            "exception."
                         )
                 model_to_save.generation_config.save_pretrained(save_directory)
 

diff --git a/mindnlp/transformers/models/__init__.py b/mindnlp/transformers/models/__init__.py
@@ -203,6 +203,7 @@
     sew_d,
     speech_encoder_decoder,
     speech_to_text,
+    speech_to_text_2,
     speecht5,
     stablelm,
     splinter,
@@ -444,6 +445,7 @@
 from .sew_d import *
 from .speech_encoder_decoder import *
 from .speech_to_text import *
+from .speech_to_text_2 import *
 from .speecht5 import *
 from .stablelm import *
 from .splinter import *
@@ -685,6 +687,7 @@
 __all__.extend(sew_d.__all__)
 __all__.extend(speech_encoder_decoder.__all__)
 __all__.extend(speech_to_text.__all__)
+__all__.extend(speech_to_text_2.__all__)
 __all__.extend(speecht5.__all__)
 __all__.extend(stablelm.__all__)
 __all__.extend(splinter.__all__)

diff --git a/mindnlp/transformers/models/auto/__init__.py b/mindnlp/transformers/models/auto/__init__.py
@@ -34,6 +34,7 @@
 from .processing_auto import PROCESSOR_MAPPING, AutoProcessor
 
 from .modeling_auto import (
+    MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING,
     MODEL_FOR_AUDIO_FRAME_CLASSIFICATION_MAPPING,
     MODEL_FOR_AUDIO_XVECTOR_MAPPING,
     MODEL_FOR_BACKBONE_MAPPING,
@@ -116,6 +117,7 @@
     "FEATURE_EXTRACTOR_MAPPING", "AutoFeatureExtractor",
     "IMAGE_PROCESSOR_MAPPING", "AutoImageProcessor",
     "PROCESSOR_MAPPING", "AutoProcessor",
+    "MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING",
     'MODEL_FOR_AUDIO_FRAME_CLASSIFICATION_MAPPING',
     'MODEL_FOR_AUDIO_XVECTOR_MAPPING',
     'MODEL_FOR_BACKBONE_MAPPING',

diff --git a/mindnlp/transformers/models/auto/configuration_auto.py b/mindnlp/transformers/models/auto/configuration_auto.py
@@ -185,9 +185,12 @@
         ("roc_bert", "RoCBertConfig"),
         ("rwkv", "RwkvConfig"),
         ("sam", "SamConfig"),
+        ("seamless_m4t", "SeamlessM4TConfig"),
+        ("seamless_m4t_v2", "SeamlessM4Tv2Config"),
         ("segformer", "SegformerConfig"),
         ("speech-encoder-decoder", "SpeechEncoderDecoderConfig"),
         ("speech_to_text", "Speech2TextConfig"),
+        ("speech_to_text_2", "Speech2Text2Config"),
         ("speecht5", "SpeechT5Config"),
         ("stablelm", "StableLmConfig"),
         ("splinter", "SplinterConfig"),
@@ -674,6 +677,7 @@
         ("rwkv", "RWKV"),
         ("sam", "SAM"),
         ("seamless_m4t", "SeamlessM4T"),
+        ("seamless_m4t_v2", "SeamlessM4Tv2"),
         ("segformer", "SegFormer"),
         ("sew", "SEW"),
         ("sew-d", "SEW-D"),

diff --git a/mindnlp/transformers/models/auto/image_processing_auto.py b/mindnlp/transformers/models/auto/image_processing_auto.py
@@ -394,7 +394,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
                     "Could not find image processor class in the image processor config or the model config. Loading "
                     "based on pattern matching with the model's feature extractor configuration. Please open a "
                     "PR/issue to update `preprocessor_config.json` to use `image_processor_type` instead of "
-                    "`feature_extractor_type`. This warning will be removed in v4.40."
+                    "`feature_extractor_type`. This warning will be removed."
                 )
                 image_processor_class = feature_extractor_class.replace("FeatureExtractor", "ImageProcessor")
             if "AutoFeatureExtractor" in config_dict.get("auto_map", {}):
@@ -404,7 +404,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
                     "Could not find image processor auto map in the image processor config or the model config. "
                     "Loading based on pattern matching with the model's feature extractor configuration. Please open a "
                     "PR/issue to update `preprocessor_config.json` to use `AutoImageProcessor` instead of "
-                    "`AutoFeatureExtractor`. This warning will be removed in v4.40."
+                    "`AutoFeatureExtractor`. This warning will be removed."
                 )
 
         # If we don't find the image processor class in the image processor config, let's try the model config.