Merge branch 'dev'

hbaghramyan · Aug 25, 2024 · 3498f06 · 3498f06
2 parents 508748e + 1fff7e6
commit 3498f06
Show file tree

Hide file tree

Showing 4 changed files with 59 additions and 6 deletions.
diff --git a/README.md b/README.md
@@ -30,6 +30,8 @@ To download a copy of this repository, click on the [Download ZIP](https://githu
 git clone --depth 1 https://github.com/rasbt/LLMs-from-scratch.git
 ```
 
+<br>
+
 (If you downloaded the code bundle from the Manning website, please consider visiting the official code repository on GitHub at [https://github.com/rasbt/LLMs-from-scratch](https://github.com/rasbt/LLMs-from-scratch) for the latest updates.)
 
 <br>
@@ -59,7 +61,7 @@ You can alternatively view this and other files on GitHub at [https://github.com
 
 <br>
 
-| Chapter Title                                              | Main Code (for quick access)                                                                                                    | All Code + Supplementary      |
+| Chapter Title                                              | Main Code (for Quick Access)                                                                                                    | All Code + Supplementary      |
 |------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------|-------------------------------|
 | [Setup recommendations](setup)                             | -                                                                                                                               | -                             |
 | Ch 1: Understanding Large Language Models                  | No code                                                                                                                         | -                             |
@@ -129,11 +131,9 @@ Several folders contain optional materials as a bonus for interested readers:
 ## Questions, Feedback, and Contributing to This Repository
 
 
-I welcome all sorts of feedback, best shared via the [Discussions](https://github.com/rasbt/LLMs-from-scratch/discussions) forum. Likewise, if you have any questions or just want to bounce ideas off others, please don't hesitate to post these in the forum as well.
-
-If you notice any problems or issues, please do not hesitate to file an [Issue](https://github.com/rasbt/LLMs-from-scratch/issues).
+I welcome all sorts of feedback, best shared via the [Manning Forum](https://livebook.manning.com/forum?product=raschka&page=1) or [GitHub Discussions](https://github.com/rasbt/LLMs-from-scratch/discussions). Likewise, if you have any questions or just want to bounce ideas off others, please don't hesitate to post these in the forum as well.
 
-However, since this repository contains the code corresponding to a print book, I currently cannot accept contributions that would extend the contents of the main chapter code, as it would introduce deviations from the physical book. Keeping it consistent helps ensure a smooth experience for everyone.
+Please note that since this repository contains the code corresponding to a print book, I currently cannot accept contributions that would extend the contents of the main chapter code, as it would introduce deviations from the physical book. Keeping it consistent helps ensure a smooth experience for everyone.
 
 
 &nbsp;

diff --git a/ch03/01_main-chapter-code/ch03.py b/ch03/01_main-chapter-code/ch03.py
@@ -1,6 +1,12 @@
+import os
+import sys
+
+sys.path.insert(0, os.getcwd())
+
+
 import torch
 
-from utils_ch03 import (
+from utils.utils_ch03 import (
     CasualAttention,
     MultiHeadAttention,
     MultiHeadAttentionWrapper,

diff --git a/ch04/01_main-chapter-code/ch04.py b/ch04/01_main-chapter-code/ch04.py
@@ -2,7 +2,12 @@
 import torch.nn as nn
 import tiktoken
 import matplotlib.pyplot as plt
+import os
+import sys
 
+sys.path.insert(0, os.getcwd())
+
+from utils.utils_ch03 import MultiHeadAttention
 
 GPT_CONFIG_124M = {
     "vocab_size": 50257,  # Vocabulary size
@@ -134,6 +139,47 @@ def forward(self, x):
         return x
 
 
+class TransformerBlock(nn.Module):
+    def __init__(self, cfg):
+        super().__init__()
+        self.att = MultiHeadAttention(
+            d_in=cfg["emb_dim"],
+            d_out=cfg["emb_dim"],
+            context_length=cfg["context_length"],
+            num_heads=cfg["n_heads"],
+            dropout=cfg["drop_rate"],
+            qkv_bias=cfg["qkv_bias"],
+        )
+        self.ff = FeedForward(cfg)
+        self.norm1 = LayerNorm(emb_dim=cfg["emb_dim"])
+        self.norm2 = LayerNorm(emb_dim=cfg["emb_dim"])
+        self.drop_shortcut = nn.Dropout(p=cfg["drop_rate"])
+
+    def forward(self, x):
+        # Shortcut connection for attention block
+        shortcut = x
+
+        x = self.norm1(x)
+        x = self.att(x)  # Shape [batch_size, num_tokens, emb_size]
+        x = self.drop_shortcut(x)
+        x = shortcut + x  # Add the original input back
+
+        shortcut = x
+        x = self.norm2(x)
+        x = self.ff(x)
+        x = self.drop_shortcut(x)
+        x = shortcut + x  # Add the original input back
+        return x
+
+
+torch.manual_seed(123)
+x = torch.rand(2, 4, 768)
+block = TransformerBlock(GPT_CONFIG_124M)
+output = block(x)
+
+print("Here")
+
+
 def print_gradients(model, x):
     # Forward pass
     output = model(x)

diff --git a/ch03/01_main-chapter-code/utils_ch03.py → utils/utils_ch03.py b/ch03/01_main-chapter-code/utils_ch03.py → utils/utils_ch03.py
@@ -78,6 +78,7 @@ def __init__(self, d_in, d_out, context_length, dropout, num_heads, qkv_bias=Fal
         super().__init__(d_in, d_out, qkv_bias)
         assert d_out % num_heads == 0, "d_out must be divisible by num_heads"
 
+        self.d_in = d_in
         self.d_out = d_out
         self.num_heads = num_heads
         self.head_dim = (