Skip to content

Commit

Permalink
add LoRa types Layer
Browse files Browse the repository at this point in the history
  • Loading branch information
yunss-ML committed Aug 26, 2023
1 parent 11496f7 commit 99d9a37
Show file tree
Hide file tree
Showing 7 changed files with 178 additions and 137 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ __pycache__/
# Distribution / packaging
.Python
build/
run.py
.vscode/
develop-eggs/
dist/
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,9 @@ where $x\in\mathbb{R}^{k\times n}$ is the input matrix, $W_0\in\mathbb{R}^{m\tim

1. Install ``AdapterLoRa``.

```bash
```bash
pip install git+https://github.com/Baijiong-Lin/LoRA-Torch
```
```

```python
pip install AdapterLoRa
Expand Down
38 changes: 38 additions & 0 deletions core/Adapter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import bitsandbytes as bnb
import loratorch as LoraT
import loralib as lora
import torch.nn as nn
from typing import dict , Optional , Union




class Adapters(object):

def __init__(self, layerTyep:list, Method:func)-> nn.Module:
self.layer = layerTyep

@staticmethod
def LayerType(self , layer):
layers = ["nn.Linear" , "nn.Embedding", "nn.Conv1d","nn.Conv2d"]
AdaptedLayer = []
for i in layer:
for j in layers:
if layer[i] == layers[j]:
AdaptedLayer.append(layer[i])
return f"{layers[i]} not support Please Visit \n Docs to list correct Layer support"
return AdaptedLayer

def __call__(self, fn):
if self.LayerType(self.layer):
def __fn():
print(f"Layers to adjusted Used AdapterLoRa: {[layer for layer in self.layer]}")
print("Adapter Applied:", fn.__name__)
fn()
return __fn



class Optimzer:
def __init__(self, Optimzer: nn.Module):
pass
90 changes: 90 additions & 0 deletions core/LayersAdaptes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import loralib as LoRa
import loratorch as LoRaT
import torch.nn as nn
from typing import Optional
import bitsandbytes as nn



def Layer(model, new_layer):
new_layer.weight = nn.Parameter(model.weight.detach().clone())

if model.bias is not None:
new_layer.bias = nn.Parameter(model.bias.detach().clone())

return new_layer

def LoRaLinear(method:str, model:nn.Module, Rank:Optional[int],threshold:Optional[int]):
Adapters = ["LoRa","SandBytes","LoRaTorch"]
if Adapters.__contains__(Adapters) == True:
if method == "LoRa":
new_layer = LoRa.Linear(
in_features=model.in_features,
out_features=model.out_features,
bias=model.bias is not None,
r=Rank
)
return Layer(model . new_layer)

if method == "SandBytes":
new_layer = bnb.nn.Linear8bitLt(
model.in_features,
model.out_featuresm2,
bias=model.bias is not None,
has_fp16_weights=False,
threshold=6.0
)
return Layer(model . new_layer)


if method == "LoRaTorch":
new_layer = LoRaT.Linear(
in_features=model.in_features,
out_features=model.out_features,
bias=model.bias is not None,
r=Rank
)
return Layer(model . new_layer)

else:
raise ValueError(f"there's no method support yet or may you inster invalide name method {method}")


def LoRaEmbedding(method:str,
model:nn.Module ,
Rank:Optional[int],
lora_alpha:Optional[int],
scale_grad_by_freq:Optional[int],
padding_idx:Optional[int],
max_norm:Optional[int]):

Adapters = ["LoRa","SandBytes","LoRaTorch"]
if Adapters.__contains__(Adapters) == True:
if method == "LoRa":
new_layer = LoRa.Embedding(model.num_embeddings,
model.embedding_dim,
r=Rank,
lora_alpha=lora_alpha,
max_norm=model.max_norm is not None,
scale_grad_by_freq=model.scale_grad_by_freq is not None,
padding_idx=model.padding_idx is not None
)
return new_layer

if method == "SandBytes":
new_layer= bnb.nn.StableEmbedding(model.num_embeddings,
model.embedding_dim )
return new_layer

if method == "LoRaTorch":
new_layer = LoRaT.Embedding(model.num_embeddings,
model.embedding_dim,
r=Rank,
max_norm=model.max_norm is not None,
scale_grad_by_freq=model.scale_grad_by_freq is not None,
padding_idx=model.padding_idx is not None
)
return new_layer
else:
raise ValueError(f"there's no method support yet or may you inster invalide name method {method}")

41 changes: 9 additions & 32 deletions core/Quantized.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import loratorch as LoraT
import torch.nn as nn
import loralib as lora
from .LayersAdaptes import *
from .Adapter import Adapters
from .utils import make_lora_replace

class CastOutputToFloat(nn.Module):
def forward(self, x):
return x.to(torch.float32)

class AdapterLoRa(nn.Module):
def __init__(self, model: nn.Module, method: str, Rank: int):
def __init__(self, model: nn.Module,LoRa=None,BitSand=None, method: str, Rank: int):
"""
AdapterLoRa constructor.
Expand All @@ -18,15 +18,17 @@ def __init__(self, model: nn.Module, method: str, Rank: int):
Rank (int): The rank parameter for LoRA adaptation.
"""
super(AdapterLoRa, self).__init__()


self.methods = {"LoRa": lora, "LoRaTorch": LoraT}
self.Adapters = ["LoRa","SandBytes","LoRaTorch"]
self.Rank = Rank
self.LORA = True
self.LORA = LoRa
self.BITSAND = BitSand
self.model = model
self.layer = []

if method in self.methods:
self.LoRa = self.methods[method]
if method in self.Adapters:
self.method = self.Adapters[method]
else:
raise ValueError("Invalid method provided")

Expand All @@ -43,31 +45,6 @@ def add_layer(self, layer: str):
self.layer.append(layer)
return self.layer

def lora_layer(self, layer, Rank):
"""
Create a LoRA adapted layer.
Args:
layer (nn.Module): The layer to adapt.
Rank (int): The rank parameter for LoRA adaptation.
Returns:
nn.Module: The adapted layer.
"""
new_layer = self.LoRa.Linear(
in_features=layer.in_features,
out_features=layer.out_features,
bias=layer.bias is not None,
r=Rank
)

new_layer.weight = nn.Parameter(layer.weight.detach().clone())

if layer.bias is not None:
new_layer.bias = nn.Parameter(layer.bias.detach().clone())

return new_layer

def freeze_weights(self, weight_freeze=False):
"""
Freeze model weights.
Expand Down
117 changes: 14 additions & 103 deletions exmpales/transfomerDecoderr.py
Original file line number Diff line number Diff line change
@@ -1,111 +1,22 @@
import torch.nn as nn
import torch.nn as nn
import torch
import math
import os
import sys

current_dir = os.path.dirname(__file__)
target_dir = os.path.abspath(os.path.join(current_dir, ".././"))
sys.path.insert(0, target_dir)

class ScaleDotProductAttention(nn.Module):
from core.Quantized import AdapterLoRa

"""
compute scale dot product attention
model = nn.TransformerDecoderLayer(d_model=512, nhead=8)

Query : given sentence that we focused on (decoder)
Key : every sentence to check relationship with Qeur y(encoder)
Value : every sentence same with Key (encoder)
"""
Adpate_model = AdapterLoRa(model , method="LoRa", Rank=4)
Adpate_model.add_layer("self_attn")
Adpate_model.add_layer("linear1")
Adpate_model.add_layer("linear2")
Adpate_model.reconstruct_model()
model = Adpate_model.implement_lora(verbose=True)

def __init__(self,config):
super(ScaleDotProductAttention, self).__init__()
self.softmax = nn.Softmax(dim=-1)
self.attention_dropout = nn.Dropout(config["attention_droput"])

def forward(self, q, k, v,output_attentions=False):
# input is 4 dimension tensor
# [batch_size, head, length, d_tensor]
batch_size, head, length, d_tensor = k.size()

# 1. dot product Query with Key^T to compute similarity
k_t = k.transpose(2, 3) # transpose
score = (q @ k_t) / math.sqrt(d_tensor) # scaled dot product

# 3. pass them softmax to make [0, 1] range
score = self.softmax(score)
score = self.attention_dropout(score)

# 4. multiply with Value
v = score @ v
if not output_attentions:
return (v, None)

return v, score


class MultiHeadAttention(nn.Module):

def __init__(self,config):
super(MultiHeadAttention, self).__init__()
self.n_head = Config["num_heads"]
self.attention = ScaleDotProductAttention(config)
self.w_q = nn.Linear(config["embedding_size"], config["embedding_size"],bias = config["qkv_bias"])
self.w_k = nn.Linear(config["embedding_size"], config["embedding_size"],bias = config["qkv_bias"])
self.w_v = nn.Linear(config["embedding_size"], config["embedding_size"],bias = config["qkv_bias"])
self.w_concat = nn.Linear(config["embedding_size"], config["embedding_size"])

def forward(self, q, k, v,output_attentions=False):
# 1. dot product with weight matrices
q, k, v = self.w_q(q), self.w_k(k), self.w_v(v)

# 2. split tensor by number of heads
q, k, v = self.split(q), self.split(k), self.split(v)

# 3. do scale dot product to compute similarity
out, attention = self.attention(q, k, v,output_attentions=output_attentions)

# 4. concat and pass to linear layer
out = self.concat(out)
out = self.w_concat(out)

# 5. visualize attention map
# TODO : we should implement visualization
if not output_attentions:
return (out, None)

return out , attention

def split(self, tensor):
"""
split tensor by number of head
:param tensor: [batch_size, length, d_model]
:return: [batch_size, head, length, d_tensor]
"""
batch_size, length, d_model = tensor.size()

d_tensor = d_model // self.n_head
tensor = tensor.view(batch_size, length, self.n_head, d_tensor).transpose(1, 2)
# it is similar with group convolution (split by number of heads)

return tensor

def concat(self, tensor):
"""
inverse function of self.split(tensor : torch.Tensor)
:param tensor: [batch_size, head, length, d_tensor]
:return: [batch_size, length, d_model]
"""
batch_size, head, length, d_tensor = tensor.size()
d_model = head * d_tensor

tensor = tensor.transpose(1, 2).contiguous().view(batch_size, length, d_model)
return tensor
"""
inverse function of self.split(tensor : torch.Tensor)
:param tensor: [batch_size, head, length, d_tensor]
:return: [batch_size, length, d_model]
"""
batch_size, head, length, d_tensor = tensor.size()
d_model = head * d_tensor

tensor = tensor.transpose(1, 2).contiguous().view(batch_size, length, d_model)
return tensor
24 changes: 24 additions & 0 deletions test/LayerExist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import unittest

def LayerType(layer):
layers = ["nn.Linear" , "nn.Embedding", "nn.Conv1d","nn.Conv2d"]
if layers.__contains__(layer) == False:
return f"{layer} not support Please Visit \n Docs to list correct Layer support"
return True

class TestCaseExitLayer(unittest.TestCase):

def ExitLayer(self):
layer = "nn.Linear"
ExpectOut = True
Result = LayerType(layer)
self.assertTrue(Result,ExpectOut)

if __name__ == "__main__":
unittest.main()






0 comments on commit 99d9a37

Please sign in to comment.