Skip to content

开源实习-LayoutLM模型应用开发-金逸 #1957

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5,631 changes: 5,631 additions & 0 deletions applications/LayoutLM/layoutlm.ipynb

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions mindnlp/peft/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@
PolyModel,
LNTuningConfig,
LNTuningModel,
VeraConfig,
VeraModel
)

from .utils import (
Expand Down
167 changes: 167 additions & 0 deletions mindnlp/peft/import_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
# # Copyright 2023-present the HuggingFace Inc. team.
# #
# # Licensed under the Apache License, Version 2.0 (the "License");
# # you may not use this file except in compliance with the License.
# # You may obtain a copy of the License at
# #
# # http://www.apache.org/licenses/LICENSE-2.0
# #
# # Unless required by applicable law or agreed to in writing, software
# # distributed under the License is distributed on an "AS IS" BASIS,
# # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# # See the License for the specific language governing permissions and
# # limitations under the License.
# import importlib
# import importlib.metadata as importlib_metadata
# import platform
# from functools import lru_cache

# import packaging.version
# import torch


# @lru_cache
# def is_bnb_available() -> bool:
# return importlib.util.find_spec("bitsandbytes") is not None


# @lru_cache
# def is_bnb_4bit_available() -> bool:
# if not is_bnb_available():
# return False

# import bitsandbytes as bnb

# return hasattr(bnb.nn, "Linear4bit")


# @lru_cache
# def is_auto_gptq_available():
# if importlib.util.find_spec("auto_gptq") is not None:
# AUTOGPTQ_MINIMUM_VERSION = packaging.version.parse("0.5.0")
# version_autogptq = packaging.version.parse(importlib_metadata.version("auto_gptq"))
# if AUTOGPTQ_MINIMUM_VERSION <= version_autogptq:
# return True
# else:
# raise ImportError(
# f"Found an incompatible version of auto-gptq. Found version {version_autogptq}, "
# f"but only versions above {AUTOGPTQ_MINIMUM_VERSION} are supported"
# )


# @lru_cache
# def is_gptqmodel_available():
# if importlib.util.find_spec("gptqmodel") is not None:
# GPTQMODEL_MINIMUM_VERSION = packaging.version.parse("1.7.0")
# OPTIMUM_MINIMUM_VERSION = packaging.version.parse("1.23.99")
# version_gptqmodel = packaging.version.parse(importlib_metadata.version("gptqmodel"))
# if GPTQMODEL_MINIMUM_VERSION <= version_gptqmodel:
# if is_optimum_available():
# version_optimum = packaging.version.parse(importlib_metadata.version("optimum"))
# if OPTIMUM_MINIMUM_VERSION <= version_optimum:
# return True
# else:
# raise ImportError(
# f"gptqmodel requires optimum version {OPTIMUM_MINIMUM_VERSION} or higher. Found version {version_optimum}, "
# f"but only versions above {OPTIMUM_MINIMUM_VERSION} are supported"
# )
# else:
# raise ImportError(
# f"gptqmodel requires optimum version {OPTIMUM_MINIMUM_VERSION} or higher to be installed."
# )
# else:
# raise ImportError(
# f"Found an incompatible version of gptqmodel. Found version {version_gptqmodel}, "
# f"but only versions above {GPTQMODEL_MINIMUM_VERSION} are supported"
# )


# @lru_cache
# def is_optimum_available() -> bool:
# return importlib.util.find_spec("optimum") is not None


# @lru_cache
# def is_torch_tpu_available(check_device=True):
# "Checks if `torch_xla` is installed and potentially if a TPU is in the environment"
# if importlib.util.find_spec("torch_xla") is not None:
# if check_device:
# # We need to check if `xla_device` can be found, will raise a RuntimeError if not
# try:
# import torch_xla.core.xla_model as xm

# _ = xm.xla_device()
# return True
# except RuntimeError:
# return False
# return True
# return False


# @lru_cache
# def is_aqlm_available():
# return importlib.util.find_spec("aqlm") is not None


# @lru_cache
# def is_auto_awq_available():
# return importlib.util.find_spec("awq") is not None


# @lru_cache
# def is_eetq_available():
# return importlib.util.find_spec("eetq") is not None


# @lru_cache
# def is_hqq_available():
# return importlib.util.find_spec("hqq") is not None


# @lru_cache
# def is_torchao_available():
# if importlib.util.find_spec("torchao") is None:
# return False

# TORCHAO_MINIMUM_VERSION = packaging.version.parse("0.4.0")
# try:
# torchao_version = packaging.version.parse(importlib_metadata.version("torchao"))
# except importlib_metadata.PackageNotFoundError:
# # Same idea as in diffusers:
# # https://github.com/huggingface/diffusers/blob/9f06a0d1a4a998ac6a463c5be728c892f95320a8/src/diffusers/utils/import_utils.py#L351-L357
# # It's not clear under what circumstances `importlib_metadata.version("torchao")` can raise an error even
# # though `importlib.util.find_spec("torchao") is not None` but it has been observed, so adding this for
# # precaution.
# return False

# if torchao_version < TORCHAO_MINIMUM_VERSION:
# raise ImportError(
# f"Found an incompatible version of torchao. Found version {torchao_version}, "
# f"but only versions above {TORCHAO_MINIMUM_VERSION} are supported"
# )
# return True


# @lru_cache
# def is_xpu_available(check_device=False):
# """
# Checks if XPU acceleration is available and potentially if a XPU is in the environment
# """

# system = platform.system()
# if system == "Darwin":
# return False
# else:
# if check_device:
# try:
# # Will raise a RuntimeError if no XPU is found
# _ = torch.xpu.device_count()
# return torch.xpu.is_available()
# except RuntimeError:
# return False
# return hasattr(torch, "xpu") and torch.xpu.is_available()


# @lru_cache
# def is_diffusers_available():
# return importlib.util.find_spec("diffusers") is not None
4 changes: 4 additions & 0 deletions mindnlp/peft/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@
LNTuningModel,
PrefixTuningConfig,
PromptEncoderConfig,
VeraConfig,
VeraModel
)

MODEL_TYPE_TO_PEFT_MODEL_MAPPING = {
Expand All @@ -70,6 +72,7 @@
"LOHA": LoHaConfig,
"POLY": PolyConfig,
"LN_TUNING": LNTuningConfig,
"VERA": VeraConfig
}

PEFT_TYPE_TO_TUNER_MAPPING = {
Expand All @@ -80,6 +83,7 @@
"LOHA": LoHaModel,
"POLY": PolyModel,
"LN_TUNING": LNTuningModel,
"VERA": VeraConfig
}


Expand Down
2 changes: 2 additions & 0 deletions mindnlp/peft/peft_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
LoHaModel,
PolyModel,
LNTuningModel,
VeraModel
)
from .utils import (
# SAFETENSORS_WEIGHTS_NAME,
Expand Down Expand Up @@ -71,6 +72,7 @@
PeftType.LOHA: LoHaModel,
PeftType.POLY: PolyModel,
PeftType.LN_TUNING: LNTuningModel,
PeftType.VERA :VeraModel
}

class PeftModel(nn.Module):
Expand Down
1 change: 1 addition & 0 deletions mindnlp/peft/tuners/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@
from .multitask_prompt_tuning import MultitaskPromptEmbedding, MultitaskPromptTuningConfig, MultitaskPromptTuningInit
from .poly import PolyConfig, PolyModel
from .ln_tuning import LNTuningConfig, LNTuningModel
from .vera import VeraConfig,VeraModel
167 changes: 167 additions & 0 deletions mindnlp/peft/tuners/_buffer_dict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
import collections
from collections import OrderedDict
import mindspore
from mindspore import Tensor
import mindspore.context
#from mindspore.nn import Cell
from mindnlp.core import nn
class BufferDict(nn.Module):
r"""
Holds buffers in a dictionary.

BufferDict can be indexed like a regular Python dictionary, but buffers it contains are properly registered, and
will be visible by all Cell methods. `mindspore.nn.BufferDict` is an **ordered** dictionary that respects

* the order of insertion, and
* in `mindspore.nn.BufferDict.update`, the order of the merged `OrderedDict`
or another `mindspore.nn.BufferDict` (the argument to
:meth:`~mindspore.nn.BufferDict.update`).

Note that :meth:`~mindspore.nn.BufferDict.update` with other unordered mapping
types (e.g., Python's plain `dict`) does not preserve the order of the
merged mapping.

Args:
buffers (iterable, optional):
a mapping (dictionary) of (string : :class:`~mindspore.Tensor`) or an iterable of key-value pairs
of type (string, :class:`~mindspore.Tensor`)

Example::

class MyCell(Cell):
def __init__(self):
super(MyCell, self).__init__()
self.buffers = BufferDict({
'left': Tensor(shape=(5, 10), dtype=mindspore.float32),
'right': Tensor(shape=(5, 10), dtype=mindspore.float32)
})

def construct(self, x, choice):
x = self.buffers[choice].matmul(x)
return x
"""

def __init__(self, buffers=None, persistent: bool = False):
r"""
Args:
buffers (`dict`):
A mapping (dictionary) from string to :class:`~mindspore.Tensor`, or an iterable of key-value pairs
of type (string, :class:`~mindspore.Tensor`).
"""
super(BufferDict, self).__init__()
if buffers is not None:
self.update(buffers)

self.persistent = persistent

def __getitem__(self, key):
return self._buffers[key]

def __setitem__(self, key, buffer):
self._buffers[key] = buffer

def __delitem__(self, key):
del self._buffers[key]

def __len__(self):
return len(self._buffers)

def __iter__(self):
return iter(self._buffers.keys())

def __contains__(self, key):
return key in self._buffers

def clear(self):
"""Remove all items from the BufferDict."""
self._buffers.clear()

def pop(self, key):
r"""Remove key from the BufferDict and return its buffer.

Args:
key (`str`):
Key to pop from the BufferDict
"""
v = self[key]
del self[key]
return v

def keys(self):
r"""Return an iterable of the BufferDict keys."""
return self._buffers.keys()

def items(self):
r"""Return an iterable of the BufferDict key/value pairs."""
return self._buffers.items()

def values(self):
r"""Return an iterable of the BufferDict values."""
return self._buffers.values()

def update(self, buffers):
r"""
Update the `mindspore.nn.BufferDict` with the key-value pairs from a
mapping or an iterable, overwriting existing keys.

Note:
If `buffers` is an `OrderedDict`, a `mindspore.nn.BufferDict`,
or an iterable of key-value pairs, the order of new elements in it is
preserved.

Args:
buffers (iterable):
a mapping (dictionary) from string to :class:`~mindspore.Tensor`,
or an iterable of key-value pairs of type (string, :class:`~mindspore.Tensor`)
"""
if not isinstance(buffers, collections.abc.Iterable):
raise TypeError(
"BuffersDict.update should be called with an "
"iterable of key/value pairs, but got " + type(buffers).__name__
)

if isinstance(buffers, collections.abc.Mapping):
if isinstance(buffers, (OrderedDict, BufferDict)):
for key, buffer in buffers.items():
self[key] = buffer
else:
for key, buffer in sorted(buffers.items()):
self[key] = buffer
else:
for j, p in enumerate(buffers):
if not isinstance(p, collections.abc.Iterable):
raise TypeError(
"BufferDict update sequence element " + str(j) + " should be Iterable; is" + type(p).__name__
)
if not len(p) == 2:
raise ValueError(
"BufferDict update sequence element " + str(j) + " has length " + str(len(p)) + "; 2 is required"
)
self[p[0]] = p[1]

def extra_repr(self):
child_lines = []
for k, p in self._buffers.items():
size_str = "x".join(str(size) for size in p.shape)
parastr = f"Buffer containing: [{type(p)} of size {size_str}]"
child_lines.append(" (" + k + "): " + parastr)
tmpstr = "\n".join(child_lines)
return tmpstr

def __call__(self, input):
raise RuntimeError("BufferDict should not be called.")
class MyCell(nn.module):
def __init__(self):
super(MyCell, self).__init__()
self.buffers = BufferDict({
'left': Tensor([[2, 3], [1, 2]]),
'right': Tensor([[2, 3, 4], [1, 2, 3]])
})

def construct(self, x, choice):
x = self.buffers[choice].matmul(x)
return x
mindspore.set_device(device_target="CPU")
#mindspore.context.set_context(device_target="CPU")
cell=MyCell()
print(cell(Tensor([[2, 3], [1, 2]]),'left'))
Loading