Merge branch 'dev'

hbaghramyan · Dec 21, 2024 · c833a8b · c833a8b
2 parents b62d98d + 05b706c
commit c833a8b
Show file tree

Hide file tree

Showing 6 changed files with 101 additions and 19 deletions.
diff --git a/ch05/01_main-chapter-code/ch05.py b/ch05/01_main-chapter-code/ch05.py
@@ -7,23 +7,28 @@
 
 sys.path.insert(0, os.getcwd())
 # from previous_chapters import GPTModel, generate_text_simple, create_dataloader_v1
-from utils.utils_prev import GPTModel, generate_text_simple, create_dataloader_v1
-
-# GPT_CONFIG_124M = {
-#     "vocab_size": 50257,  # Vocabulary size
-#     "context_length": 256,  # Shortened context length (orig: 1024)
-#     "emb_dim": 768,  # Embedding dimension
-#     "n_heads": 12,  # Number of attention heads
-#     "n_layers": 12,  # Number of layers
-#     "drop_rate": 0.1,  # Dropout rate
-#     "qkv_bias": False,  # Query-key-value bias
-# }
-
-# device = torch.device(
-#     "cpu"
-#     if torch.backends.mps.is_available()
-#     else "cuda" if torch.cuda.is_available() else "cpu"
-# )
+from utils.utils_prev import (
+    GPTModel,
+    generate,
+    generate_text_simple,
+    create_dataloader_v1,
+)
+
+GPT_CONFIG_124M = {
+    "vocab_size": 50257,  # Vocabulary size
+    "context_length": 256,  # Shortened context length (orig: 1024)
+    "emb_dim": 768,  # Embedding dimension
+    "n_heads": 12,  # Number of attention heads
+    "n_layers": 12,  # Number of layers
+    "drop_rate": 0.1,  # Dropout rate
+    "qkv_bias": False,  # Query-key-value bias
+}
+
+device = torch.device(
+    "cpu"
+    if torch.backends.mps.is_available()
+    else "cuda" if torch.cuda.is_available() else "cpu"
+)
 
 # torch.manual_seed(123)
 # model = GPTModel(GPT_CONFIG_124M)
@@ -425,3 +430,16 @@ def softmax_with_temperature(logits, temperature):
 
 topk_probas = torch.softmax(new_logits, dim=0)
 print(topk_probas)
+
+torch.manual_seed(123)
+
+token_ids = generate(
+    model=model,
+    idx=text_to_token_ids("Every effort moves you", tokenizer),
+    max_new_tokens=15,
+    context_size=GPT_CONFIG_124M["context_length"],
+    top_k=25,
+    temperature=1.4,
+)
+
+print("Output text: \n", token_ids_to_text(token_ids, tokenizer))
diff --git a/ch06/04_user_interface/README.md → ch07/04_user_interface/README.md b/ch06/04_user_interface/README.md → ch07/04_user_interface/README.md
diff --git a/ch06/04_user_interface/app.py → ch07/04_user_interface/app.py b/ch06/04_user_interface/app.py → ch07/04_user_interface/app.py
diff --git a/ch06/04_user_interface/previous_chapters.py → ch07/04_user_interface/previous_chapters.py b/ch06/04_user_interface/previous_chapters.py → ch07/04_user_interface/previous_chapters.py
diff --git a/.../04_user_interface/requirements-extra.txt → .../04_user_interface/requirements-extra.txt b/.../04_user_interface/requirements-extra.txt → .../04_user_interface/requirements-extra.txt
diff --git a/utils/utils_prev.py b/utils/utils_prev.py
@@ -2,6 +2,7 @@
 import torch.nn as nn
 import tiktoken
 from torch.utils.data import Dataset, DataLoader
+from zmq import device
 
 
 class SelfAttention_v1(nn.Module):
@@ -249,7 +250,7 @@ def generate_text_simple(
     # idx is (batch, n_tokens) array of indices in the current context
     """Return the indices of the generated text
     Args:
-        model (GPTModel): the model used for the inference
+        model (nn.Module): the model used for the inference
         idx (torch.Tensor): tokenized input
         max_new_tokens (int): maximum number of tokens to generate
         context_size (int): the maximum number of tokens the model can consider as input
@@ -260,7 +261,7 @@ def generate_text_simple(
     for _ in range(max_new_tokens):
         # Crop current context if it exceeds the supported context size
         # E.g., if LLM supports only 5 tokens, and the context size is 10
-        # the only the last 5 tokens are used as context
+        # only the last 5 tokens are used as context
         idx_cond = idx[:, -context_size:]
 
         # Get the predictions
@@ -283,6 +284,69 @@ def generate_text_simple(
     return idx
 
 
+def generate(
+    model: nn.Module,
+    idx: torch.Tensor,
+    max_new_tokens: int,
+    context_size: int,
+    temperature=0.0,
+    top_k=None,
+    eos_id=None,
+) -> torch.Tensor:
+
+    # idx is (batch, n_tokens) array of indices in the current context
+    """Return the indices of the generated text
+    Args:
+        model (GPTModel): the model used for the inference
+        idx (torch.Tensor): tokenized input
+        max_new_tokens (int): maximum number of tokens to generate
+        context_size (int): the maximum number of tokens the model can consider as input
+        temperature (float): temperature to control uniformity of the distribution
+        top_k (None): top k probability values that should be retained
+
+    Returns:
+        idx (torch.Tensor): tokenized input + output
+    """
+
+    for _ in range(max_new_tokens):
+        idx_cond = idx[:, -context_size:]
+        with torch.no_grad():
+            logits = model(idx_cond)
+        logits = logits[:, -1, :]
+
+        # New: Filter logits with top_k sampling
+        if top_k is not None:
+            # Keep only top_k values
+            top_logits, _ = torch.topk(logits, top_k)
+            min_val = top_logits[:, -1]
+            logits = torch.where(
+                logits < min_val, torch.Tensor(float("-inf")).to(logits.device), device
+            )
+
+        # New: Apply temperature scaling
+        if temperature > 0.0:
+            logits = logits / temperature
+
+            # Apply softmax to get probabilities
+            probs = torch.softmax(logits, dim=-1)
+
+            # Sample from the distrubution
+            idx_next = torch.multinomial(probs, num_samples=1)  # (batch_size, 1)
+
+        # Otherwise same as before: get idx of the vocab entry
+        # with the highest logits value
+        else:
+            idx_next = torch.argmax(logits, dim=-1, keepdim=True)  # (batch_size, 1)
+
+        if idx_next == eos_id:
+            break
+
+        # Same as before: append samples index to the running sequence
+        idx = torch.cat((idx, idx_next), dim=1)  # (batch_size, num_tokens+1)
+
+    return idx
+
+
 class GPTDatasetV1(Dataset):
     def __init__(
         self, txt: str, tokenizer: tiktoken.Encoding, max_length: int, stride: int