add LoRa types Layer

youness-elbrag · Aug 26, 2023 · 99d9a37 · 99d9a37
1 parent 11496f7
commit 99d9a37
Show file tree

Hide file tree

Showing 7 changed files with 178 additions and 137 deletions.
diff --git a/.gitignore b/.gitignore
@@ -9,6 +9,7 @@ __pycache__/
 # Distribution / packaging
 .Python
 build/
+run.py
 .vscode/
 develop-eggs/
 dist/

diff --git a/README.md b/README.md
@@ -63,9 +63,9 @@ where $x\in\mathbb{R}^{k\times n}$ is the input matrix, $W_0\in\mathbb{R}^{m\tim
 
 1. Install ``AdapterLoRa``.
 
-   ```bash
+  ```bash
    pip install git+https://github.com/Baijiong-Lin/LoRA-Torch
-   ```
+  ```
 
   ```python
   pip install AdapterLoRa

diff --git a/core/Adapter.py b/core/Adapter.py
@@ -0,0 +1,38 @@
+import bitsandbytes as bnb
+import loratorch as LoraT
+import loralib as lora
+import torch.nn as nn 
+from typing import dict , Optional , Union
+
+
+
+
+class Adapters(object):
+
+    def __init__(self, layerTyep:list, Method:func)-> nn.Module:
+        self.layer = layerTyep
+
+    @staticmethod
+    def LayerType(self , layer):
+        layers = ["nn.Linear" , "nn.Embedding", "nn.Conv1d","nn.Conv2d"]
+        AdaptedLayer = []
+        for i in layer:
+            for j in layers:
+                if layer[i] == layers[j]:
+                    AdaptedLayer.append(layer[i])
+            return f"{layers[i]} not support Please Visit \n Docs to list correct Layer support"
+        return AdaptedLayer
+
+    def __call__(self, fn):
+        if self.LayerType(self.layer):
+            def __fn():
+                print(f"Layers to adjusted Used AdapterLoRa: {[layer for layer in self.layer]}")
+                print("Adapter Applied:", fn.__name__)        
+                fn()
+            return __fn
+
+
+
+class Optimzer:
+    def __init__(self, Optimzer: nn.Module):
+        pass
diff --git a/core/LayersAdaptes.py b/core/LayersAdaptes.py
@@ -0,0 +1,90 @@
+import loralib as LoRa 
+import loratorch as LoRaT
+import torch.nn as nn
+from typing import Optional
+import bitsandbytes as nn 
+
+
+
+def Layer(model, new_layer):
+    new_layer.weight = nn.Parameter(model.weight.detach().clone())
+
+    if model.bias is not None:
+        new_layer.bias = nn.Parameter(model.bias.detach().clone())
+
+    return new_layer
+
+def LoRaLinear(method:str, model:nn.Module, Rank:Optional[int],threshold:Optional[int]):
+    Adapters = ["LoRa","SandBytes","LoRaTorch"]
+    if Adapters.__contains__(Adapters) == True:
+        if method == "LoRa":
+            new_layer = LoRa.Linear(
+                                in_features=model.in_features,
+                                out_features=model.out_features,
+                                bias=model.bias is not None,
+                                r=Rank
+            )
+            return Layer(model . new_layer)
+
+        if method == "SandBytes":
+            new_layer = bnb.nn.Linear8bitLt(
+                model.in_features,
+                model.out_featuresm2, 
+                bias=model.bias is not None, 
+                has_fp16_weights=False, 
+                threshold=6.0
+                )
+            return Layer(model . new_layer)
+
+
+        if method == "LoRaTorch": 
+            new_layer = LoRaT.Linear(
+                                in_features=model.in_features,
+                                out_features=model.out_features,
+                                bias=model.bias is not None,
+                                r=Rank
+                                        )
+            return Layer(model . new_layer)
+
+    else:
+        raise ValueError(f"there's no method support yet or may you inster invalide name method {method}")
+
+
+def LoRaEmbedding(method:str,
+     model:nn.Module , 
+     Rank:Optional[int], 
+     lora_alpha:Optional[int],
+     scale_grad_by_freq:Optional[int],
+     padding_idx:Optional[int],
+     max_norm:Optional[int]):
+
+    Adapters = ["LoRa","SandBytes","LoRaTorch"]
+    if Adapters.__contains__(Adapters) == True:
+        if method == "LoRa":
+            new_layer = LoRa.Embedding(model.num_embeddings, 
+                        model.embedding_dim, 
+                        r=Rank,
+                        lora_alpha=lora_alpha,
+                        max_norm=model.max_norm is not None,
+                        scale_grad_by_freq=model.scale_grad_by_freq is not None,
+                        padding_idx=model.padding_idx is not None
+                )
+            return new_layer
+
+        if method == "SandBytes":
+            new_layer=  bnb.nn.StableEmbedding(model.num_embeddings, 
+                        model.embedding_dim ) 
+            return new_layer
+
+        if method == "LoRaTorch":
+            new_layer = LoRaT.Embedding(model.num_embeddings, 
+                        model.embedding_dim, 
+                        r=Rank,
+                        max_norm=model.max_norm is not None,
+                        scale_grad_by_freq=model.scale_grad_by_freq is not None,
+                        padding_idx=model.padding_idx is not None
+                )
+            return new_layer
+    else:
+        raise ValueError(f"there's no method support yet or may you inster invalide name method {method}")
+
diff --git a/core/Quantized.py b/core/Quantized.py
@@ -1,14 +1,14 @@
-import loratorch as LoraT
 import torch.nn as nn
-import loralib as lora
+from .LayersAdaptes import *
+from .Adapter import Adapters
 from .utils import make_lora_replace
 
 class CastOutputToFloat(nn.Module):
     def forward(self, x):
         return x.to(torch.float32)
 
 class AdapterLoRa(nn.Module):
-    def __init__(self, model: nn.Module, method: str, Rank: int):
+    def __init__(self, model: nn.Module,LoRa=None,BitSand=None, method: str, Rank: int):
         """
         AdapterLoRa constructor.
 
@@ -18,15 +18,17 @@ def __init__(self, model: nn.Module, method: str, Rank: int):
             Rank (int): The rank parameter for LoRA adaptation.
         """
         super(AdapterLoRa, self).__init__()
+
 
-        self.methods = {"LoRa": lora, "LoRaTorch": LoraT}
+        self.Adapters = ["LoRa","SandBytes","LoRaTorch"]
         self.Rank = Rank
-        self.LORA = True
+        self.LORA = LoRa
+        self.BITSAND = BitSand
         self.model = model
         self.layer = []
 
-        if method in self.methods:
-            self.LoRa = self.methods[method]
+        if method in self.Adapters:
+            self.method = self.Adapters[method]
         else:
             raise ValueError("Invalid method provided")
 
@@ -43,31 +45,6 @@ def add_layer(self, layer: str):
         self.layer.append(layer)
         return self.layer
 
-    def lora_layer(self, layer, Rank):
-        """
-        Create a LoRA adapted layer.
-
-        Args:
-            layer (nn.Module): The layer to adapt.
-            Rank (int): The rank parameter for LoRA adaptation.
-
-        Returns:
-            nn.Module: The adapted layer.
-        """
-        new_layer = self.LoRa.Linear(
-            in_features=layer.in_features,
-            out_features=layer.out_features,
-            bias=layer.bias is not None,
-            r=Rank
-        )
-
-        new_layer.weight = nn.Parameter(layer.weight.detach().clone())
-
-        if layer.bias is not None:
-            new_layer.bias = nn.Parameter(layer.bias.detach().clone())
-
-        return new_layer
-
     def freeze_weights(self, weight_freeze=False):
         """
         Freeze model weights.

diff --git a/exmpales/transfomerDecoderr.py b/exmpales/transfomerDecoderr.py
@@ -1,111 +1,22 @@
-import torch.nn as nn 
+import torch.nn as nn
 import torch
-import math 
+import os 
+import sys
 
+current_dir = os.path.dirname(__file__)
+target_dir = os.path.abspath(os.path.join(current_dir, ".././"))
+sys.path.insert(0, target_dir)
 
-class ScaleDotProductAttention(nn.Module):
+from core.Quantized import AdapterLoRa
 
-    """
-    compute scale dot product attention
+model = nn.TransformerDecoderLayer(d_model=512, nhead=8)
 
-    Query : given sentence that we focused on (decoder)
-    Key : every sentence to check relationship with Qeur    y(encoder)
-    Value : every sentence same with Key (encoder)
-    """
+Adpate_model = AdapterLoRa(model , method="LoRa", Rank=4)
+Adpate_model.add_layer("self_attn")
+Adpate_model.add_layer("linear1")
+Adpate_model.add_layer("linear2")
+Adpate_model.reconstruct_model()
+model = Adpate_model.implement_lora(verbose=True)
 
-    def __init__(self,config):
-        super(ScaleDotProductAttention, self).__init__()
-        self.softmax = nn.Softmax(dim=-1)
-        self.attention_dropout = nn.Dropout(config["attention_droput"])
 
-    def forward(self, q, k, v,output_attentions=False):
-        # input is 4 dimension tensor
-        # [batch_size, head, length, d_tensor]
-        batch_size, head, length, d_tensor = k.size()
 
-        # 1. dot product Query with Key^T to compute similarity
-        k_t = k.transpose(2, 3)  # transpose
-        score = (q @ k_t) / math.sqrt(d_tensor)  # scaled dot product
-
-        # 3. pass them softmax to make [0, 1] range
-        score = self.softmax(score)
-        score =  self.attention_dropout(score)
-
-        # 4. multiply with Value
-        v = score @ v
-        if not output_attentions:
-            return (v, None)
-
-        return v, score
-
-
-class MultiHeadAttention(nn.Module):
-
-    def __init__(self,config):
-        super(MultiHeadAttention, self).__init__()
-        self.n_head = Config["num_heads"]
-        self.attention = ScaleDotProductAttention(config)
-        self.w_q = nn.Linear(config["embedding_size"], config["embedding_size"],bias = config["qkv_bias"])
-        self.w_k = nn.Linear(config["embedding_size"], config["embedding_size"],bias = config["qkv_bias"])
-        self.w_v = nn.Linear(config["embedding_size"], config["embedding_size"],bias = config["qkv_bias"])
-        self.w_concat = nn.Linear(config["embedding_size"], config["embedding_size"])
-
-    def forward(self, q, k, v,output_attentions=False):
-        # 1. dot product with weight matrices
-        q, k, v = self.w_q(q), self.w_k(k), self.w_v(v)
-
-        # 2. split tensor by number of heads
-        q, k, v = self.split(q), self.split(k), self.split(v)
-
-        # 3. do scale dot product to compute similarity
-        out, attention = self.attention(q, k, v,output_attentions=output_attentions)
-
-        # 4. concat and pass to linear layer
-        out = self.concat(out)
-        out = self.w_concat(out)
-
-        # 5. visualize attention map
-        # TODO : we should implement visualization
-        if not output_attentions:
-            return (out, None)
-
-        return out , attention
-
-    def split(self, tensor):
-        """
-        split tensor by number of head
-
-        :param tensor: [batch_size, length, d_model]
-        :return: [batch_size, head, length, d_tensor]
-        """
-        batch_size, length, d_model = tensor.size()
-
-        d_tensor = d_model // self.n_head
-        tensor = tensor.view(batch_size, length, self.n_head, d_tensor).transpose(1, 2)
-        # it is similar with group convolution (split by number of heads)
-
-        return tensor
-
-    def concat(self, tensor):
-        """
-        inverse function of self.split(tensor : torch.Tensor)
-
-        :param tensor: [batch_size, head, length, d_tensor]
-        :return: [batch_size, length, d_model]
-        """
-        batch_size, head, length, d_tensor = tensor.size()
-        d_model = head * d_tensor
-
-        tensor = tensor.transpose(1, 2).contiguous().view(batch_size, length, d_model)
-        return tensor
-        """
-        inverse function of self.split(tensor : torch.Tensor)
-
-        :param tensor: [batch_size, head, length, d_tensor]
-        :return: [batch_size, length, d_model]
-        """
-        batch_size, head, length, d_tensor = tensor.size()
-        d_model = head * d_tensor
-
-        tensor = tensor.transpose(1, 2).contiguous().view(batch_size, length, d_model)
-        return tensor
diff --git a/test/LayerExist.py b/test/LayerExist.py
@@ -0,0 +1,24 @@
+import unittest
+
+def LayerType(layer):
+    layers = ["nn.Linear" , "nn.Embedding", "nn.Conv1d","nn.Conv2d"]
+    if layers.__contains__(layer) == False:
+        return f"{layer} not support Please Visit \n Docs to list correct Layer support"
+    return True
+
+class TestCaseExitLayer(unittest.TestCase):
+
+    def ExitLayer(self):
+        layer = "nn.Linear"
+        ExpectOut = True
+        Result = LayerType(layer)
+        self.assertTrue(Result,ExpectOut)
+
+if __name__ == "__main__":
+    unittest.main()
+
+
+
+
+
+