Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix from_numpy caused error #1765

Merged
merged 3 commits into from
Oct 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions mindnlp/core/nn/modules/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -572,6 +572,9 @@ def remove_from(*dicts_or_sets):
d.discard(name)

params = self.__dict__.get('_parameters')

if isinstance(value, StubTensor):
value = value.stub_sync()
if isinstance(value, Parameter):
if params is None:
raise AttributeError(
Expand Down
32 changes: 21 additions & 11 deletions mindnlp/core/serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -805,7 +805,7 @@ def _rebuild_tensor_v2(storage, storage_offset, size, stride, requires_grad, bac
else:
order = "C"
array = array.reshape(size, order=order)
param = Tensor.from_numpy(array)
param = Tensor(array)
return param

def _rebuild_from_type_v2(func, new_type, args, state):
Expand Down Expand Up @@ -1134,7 +1134,7 @@ def persistent_load(saved_id):
if array.dtype == bfloat16 and not SUPPORT_BF16:
logger.warning_once("MindSpore do not support bfloat16 dtype, we will automaticlly convert to float16")
array = array.astype(np.float16)
new_result[k] = Tensor.from_numpy(array)
new_result[k] = Tensor(array)

return new_result

Expand Down Expand Up @@ -1380,9 +1380,9 @@ def legacy_safe_load_file(filename):
arr = np.frombuffer(v["data"], dtype=dtype).reshape(v["shape"])

if (not SUPPORT_BF16 and dtype != bfloat16) or SUPPORT_BF16:
result[k] = Tensor.from_numpy(arr)
result[k] = Tensor(arr)
else:
result[k] = Tensor.from_numpy(arr.astype(np.float16))
result[k] = Tensor(arr.astype(np.float16))
return result


Expand All @@ -1402,17 +1402,27 @@ def safe_load_file(filename):
"""
def convert(info: dict[str, Any]):
numpy_dtype = _NP_TYPES[info['dtype']]
ms_dtype = _MS_TYPES[info['dtype']]
shape: list[int] = info['shape']
begin, end = info['data_offsets']
assert 0 <= begin <= end <= len(byte_buf)
assert end - begin == math.prod(shape) * np.dtype(numpy_dtype).itemsize
buf = byte_buf[begin:end]
array = np.frombuffer(buf, dtype=numpy_dtype).reshape(shape)
if array.dtype == bfloat16 and not SUPPORT_BF16:
logger.warning_once("MindSpore do not support bfloat16 dtype, we will automaticlly convert to float16")
array = array.astype(np.float16)

return Tensor.from_numpy(array)
try:
if info['dtype'] == 'BF16' and not SUPPORT_BF16:
logger.warning_once("MindSpore do not support bfloat16 dtype, we will automaticlly convert to float16")
ms_dtype = mindspore.float16
out = Tensor.convert_bytes_to_tensor(buf, tuple(shape), ms_dtype)
except:
array = np.frombuffer(buf, dtype=numpy_dtype).reshape(shape)

if array.dtype == bfloat16 and not SUPPORT_BF16:
logger.warning_once("MindSpore do not support bfloat16 dtype, we will automaticlly convert to float16")
array = array.astype(np.float16)
array = array.astype(array.dtype)
out = Tensor(array)
return out

with open(filename, "rb") as fp:
header_size, = struct.unpack('<Q', fp.read(8))
Expand Down Expand Up @@ -1506,7 +1516,7 @@ def load_checkpoint(ckpt_file_name):
dims = element.tensor.dims
param_data = np.frombuffer(data, np_type)
param_data = param_data.reshape(list(dims))
parameter = Tensor(param_data, ms_type)
parameter = Tensor(param_data)
parameter_dict[element.tag] = parameter
continue
element_data = np.frombuffer(data, np_type)
Expand All @@ -1526,7 +1536,7 @@ def load_checkpoint(ckpt_file_name):
param_data = int(param_data[0])
if dims not in ([0], [1]):
param_data = param_data.reshape(list(dims))
parameter = Tensor(param_data, ms_type)
parameter = Tensor(param_data)
parameter_dict[element.tag] = parameter

except BaseException as e:
Expand Down
2 changes: 1 addition & 1 deletion mindnlp/transformers/cache_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1146,7 +1146,7 @@ def _sliding_update(self, cache_position, layer_idx, key_states, value_states, k
# into consideration when building kv cache instead of just throwing away tokens outside of the window
return key_states, value_states

slicing = ops.ones(max_cache_len, dtype=mindspore.int64).cumsum(0)
slicing = ops.ones(max_cache_len, dtype=mindspore.int32).cumsum(0)
cache_position = cache_position.clamp(0, max_cache_len - 1)
to_shift = cache_position >= max_cache_len - 1
indices = (slicing + to_shift[-1].int() - 1) % max_cache_len
Expand Down
7 changes: 0 additions & 7 deletions mindnlp/transformers/configuration_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,13 +335,6 @@ def __init__(self, **kwargs):
"but only 'regression', 'single_label_classification' and 'multi_label_classification' are valid."
)

# TPU arguments
if kwargs.pop("xla_device", None) is not None:
logger.warning(
"The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can "
"safely remove it from your `config.json` file."
)

# Name or path to the pretrained checkpoint
self._name_or_path = str(kwargs.pop("name_or_path", ""))
# Config hash
Expand Down
5 changes: 2 additions & 3 deletions mindnlp/transformers/generation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1869,7 +1869,6 @@ def generate(
# - `model_kwargs` may be updated in place with a cache as defined by the parameters in `generation_config`.
# - different models have a different cache name expected by the model (default = "past_key_values")
# - `max_length`, prepared above, is used to determine the maximum cache length
# TODO (joao): remove `user_defined_cache` after v4.47 (remove default conversion to legacy format)
cache_name = "past_key_values" if "mamba" not in self.__class__.__name__.lower() else "cache_params"
user_defined_cache = model_kwargs.get(cache_name)
max_cache_length = generation_config.max_length
Expand Down Expand Up @@ -2174,7 +2173,7 @@ def typeerror():

# Convert to legacy cache format if requested
if (
generation_config.return_legacy_cache is not False # Should check for `True` after v4.47
generation_config.return_legacy_cache is not False
and hasattr(result, "past_key_values")
and hasattr(result.past_key_values, "to_legacy_cache")
and result.past_key_values.to_legacy_cache is not None
Expand All @@ -2192,7 +2191,7 @@ def typeerror():
)
if not is_user_defined_cache and is_default_cache_type:
logger.warning_once(
"From v4.47 onwards, when a model cache is to be returned, `generate` will return a `Cache` "
"When a model cache is to be returned, `generate` will return a `Cache` "
"instance instead by default (as opposed to the legacy tuple of tuples format). If you want to "
"keep returning the legacy format, please set `return_legacy_cache=True`."
)
Expand Down
12 changes: 6 additions & 6 deletions mindnlp/transformers/modeling_rope_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def _compute_default_rope_parameters(
seq_len (`int`, *optional*):
The current sequence length. Unused for this type of RoPE.
rope_kwargs (`Dict`, *optional*):
BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
BC compatibility with the previous RoPE class instantiation, will be removed.
Returns:
Tuple of (`mindspore.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
Expand Down Expand Up @@ -78,7 +78,7 @@ def _compute_linear_scaling_rope_parameters(
seq_len (`int`, *optional*):
The current sequence length. Unused for this type of RoPE.
rope_kwargs (`Dict`, *optional*):
BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
BC compatibility with the previous RoPE class instantiation, will be removed.
Returns:
Tuple of (`mindspore.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
Expand Down Expand Up @@ -116,7 +116,7 @@ def _compute_dynamic_ntk_parameters(
seq_len (`int`, *optional*):
The current sequence length, used to update the dynamic RoPE at inference time.
rope_kwargs (`Dict`, *optional*):
BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
BC compatibility with the previous RoPE class instantiation, will be removed.
Returns:
Tuple of (`mindspore.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
post-processing scaling factor applied to the computed cos/sin (unused in this type of RoPE).
Expand Down Expand Up @@ -162,7 +162,7 @@ def _compute_yarn_parameters(
seq_len (`int`, *optional*):
The current sequence length. Unused for this type of RoPE.
rope_kwargs (`Dict`, *optional*):
BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
BC compatibility with the previous RoPE class instantiation, will be removed.
Returns:
Tuple of (`mindspore.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
post-processing scaling factor applied to the computed cos/sin.
Expand Down Expand Up @@ -238,7 +238,7 @@ def _compute_longrope_parameters(
seq_len (`int`, *optional*):
The current sequence length. Unused for this type of RoPE.
rope_kwargs (`Dict`, *optional*):
BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
BC compatibility with the previous RoPE class instantiation, will be removed.
Returns:
Tuple of (`mindspore.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
post-processing scaling factor applied to the computed cos/sin.
Expand Down Expand Up @@ -300,7 +300,7 @@ def _compute_llama3_parameters(
seq_len (`int`, *optional*):
The current sequence length. Unused for this type of RoPE.
rope_kwargs (`Dict`, *optional*):
BC compatibility with the previous RoPE class instantiation, will be removed in v4.45.
BC compatibility with the previous RoPE class instantiation, will be removed.
Returns:
Tuple of (`mindspore.Tensor`, `float`), containing the inverse frequencies for the RoPE embeddings and the
post-processing scaling factor applied to the computed cos/sin.
Expand Down
4 changes: 2 additions & 2 deletions mindnlp/transformers/modeling_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1233,7 +1233,7 @@ def can_generate(cls) -> bool:
continue
if "PreTrainedModel" not in str(base) and base.can_generate():
return True
# BC: Detects whether `prepare_inputs_for_generation` has been overwritten in the model. Prior to v4.45, this
# BC: Detects whether `prepare_inputs_for_generation` has been overwritten in the model. this
# was how we detected whether a model could generate.
if "GenerationMixin" not in str(cls.prepare_inputs_for_generation):
logger.warning_once(
Expand Down Expand Up @@ -2022,7 +2022,7 @@ def save_pretrained(
"To avoid this behavior and this warning, we recommend you to overwrite the generation "
"config model attribute before calling the model's `save_pretrained`, preferably also "
"removing any generation kwargs from the model config. This warning will be raised to an "
"exception in v4.41."
"exception."
)
model_to_save.generation_config.save_pretrained(save_directory)

Expand Down
3 changes: 3 additions & 0 deletions mindnlp/transformers/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@
sew_d,
speech_encoder_decoder,
speech_to_text,
speech_to_text_2,
speecht5,
stablelm,
splinter,
Expand Down Expand Up @@ -444,6 +445,7 @@
from .sew_d import *
from .speech_encoder_decoder import *
from .speech_to_text import *
from .speech_to_text_2 import *
from .speecht5 import *
from .stablelm import *
from .splinter import *
Expand Down Expand Up @@ -685,6 +687,7 @@
__all__.extend(sew_d.__all__)
__all__.extend(speech_encoder_decoder.__all__)
__all__.extend(speech_to_text.__all__)
__all__.extend(speech_to_text_2.__all__)
__all__.extend(speecht5.__all__)
__all__.extend(stablelm.__all__)
__all__.extend(splinter.__all__)
Expand Down
2 changes: 2 additions & 0 deletions mindnlp/transformers/models/auto/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from .processing_auto import PROCESSOR_MAPPING, AutoProcessor

from .modeling_auto import (
MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING,
MODEL_FOR_AUDIO_FRAME_CLASSIFICATION_MAPPING,
MODEL_FOR_AUDIO_XVECTOR_MAPPING,
MODEL_FOR_BACKBONE_MAPPING,
Expand Down Expand Up @@ -116,6 +117,7 @@
"FEATURE_EXTRACTOR_MAPPING", "AutoFeatureExtractor",
"IMAGE_PROCESSOR_MAPPING", "AutoImageProcessor",
"PROCESSOR_MAPPING", "AutoProcessor",
"MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING",
'MODEL_FOR_AUDIO_FRAME_CLASSIFICATION_MAPPING',
'MODEL_FOR_AUDIO_XVECTOR_MAPPING',
'MODEL_FOR_BACKBONE_MAPPING',
Expand Down
4 changes: 4 additions & 0 deletions mindnlp/transformers/models/auto/configuration_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,9 +185,12 @@
("roc_bert", "RoCBertConfig"),
("rwkv", "RwkvConfig"),
("sam", "SamConfig"),
("seamless_m4t", "SeamlessM4TConfig"),
("seamless_m4t_v2", "SeamlessM4Tv2Config"),
("segformer", "SegformerConfig"),
("speech-encoder-decoder", "SpeechEncoderDecoderConfig"),
("speech_to_text", "Speech2TextConfig"),
("speech_to_text_2", "Speech2Text2Config"),
("speecht5", "SpeechT5Config"),
("stablelm", "StableLmConfig"),
("splinter", "SplinterConfig"),
Expand Down Expand Up @@ -674,6 +677,7 @@
("rwkv", "RWKV"),
("sam", "SAM"),
("seamless_m4t", "SeamlessM4T"),
("seamless_m4t_v2", "SeamlessM4Tv2"),
("segformer", "SegFormer"),
("sew", "SEW"),
("sew-d", "SEW-D"),
Expand Down
4 changes: 2 additions & 2 deletions mindnlp/transformers/models/auto/image_processing_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
"Could not find image processor class in the image processor config or the model config. Loading "
"based on pattern matching with the model's feature extractor configuration. Please open a "
"PR/issue to update `preprocessor_config.json` to use `image_processor_type` instead of "
"`feature_extractor_type`. This warning will be removed in v4.40."
"`feature_extractor_type`. This warning will be removed."
)
image_processor_class = feature_extractor_class.replace("FeatureExtractor", "ImageProcessor")
if "AutoFeatureExtractor" in config_dict.get("auto_map", {}):
Expand All @@ -404,7 +404,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
"Could not find image processor auto map in the image processor config or the model config. "
"Loading based on pattern matching with the model's feature extractor configuration. Please open a "
"PR/issue to update `preprocessor_config.json` to use `AutoImageProcessor` instead of "
"`AutoFeatureExtractor`. This warning will be removed in v4.40."
"`AutoFeatureExtractor`. This warning will be removed."
)

# If we don't find the image processor class in the image processor config, let's try the model config.
Expand Down
Loading
Loading