From 05b706c1c88bd4c74cee0017ca223561931c94be Mon Sep 17 00:00:00 2001 From: hbaghramyan Date: Sat, 21 Dec 2024 14:58:17 +0100 Subject: [PATCH] last commit --- ch05/01_main-chapter-code/ch05.py | 52 +++++++++----- {ch06 => ch07}/04_user_interface/README.md | 0 {ch06 => ch07}/04_user_interface/app.py | 0 .../04_user_interface/previous_chapters.py | 0 .../04_user_interface/requirements-extra.txt | 0 utils/utils_prev.py | 68 ++++++++++++++++++- 6 files changed, 101 insertions(+), 19 deletions(-) rename {ch06 => ch07}/04_user_interface/README.md (100%) rename {ch06 => ch07}/04_user_interface/app.py (100%) rename {ch06 => ch07}/04_user_interface/previous_chapters.py (100%) rename {ch06 => ch07}/04_user_interface/requirements-extra.txt (100%) diff --git a/ch05/01_main-chapter-code/ch05.py b/ch05/01_main-chapter-code/ch05.py index 18c9f8f9..def34908 100644 --- a/ch05/01_main-chapter-code/ch05.py +++ b/ch05/01_main-chapter-code/ch05.py @@ -7,23 +7,28 @@ sys.path.insert(0, os.getcwd()) # from previous_chapters import GPTModel, generate_text_simple, create_dataloader_v1 -from utils.utils_prev import GPTModel, generate_text_simple, create_dataloader_v1 - -# GPT_CONFIG_124M = { -# "vocab_size": 50257, # Vocabulary size -# "context_length": 256, # Shortened context length (orig: 1024) -# "emb_dim": 768, # Embedding dimension -# "n_heads": 12, # Number of attention heads -# "n_layers": 12, # Number of layers -# "drop_rate": 0.1, # Dropout rate -# "qkv_bias": False, # Query-key-value bias -# } - -# device = torch.device( -# "cpu" -# if torch.backends.mps.is_available() -# else "cuda" if torch.cuda.is_available() else "cpu" -# ) +from utils.utils_prev import ( + GPTModel, + generate, + generate_text_simple, + create_dataloader_v1, +) + +GPT_CONFIG_124M = { + "vocab_size": 50257, # Vocabulary size + "context_length": 256, # Shortened context length (orig: 1024) + "emb_dim": 768, # Embedding dimension + "n_heads": 12, # Number of attention heads + "n_layers": 12, # Number of layers + "drop_rate": 0.1, # Dropout rate + "qkv_bias": False, # Query-key-value bias +} + +device = torch.device( + "cpu" + if torch.backends.mps.is_available() + else "cuda" if torch.cuda.is_available() else "cpu" +) # torch.manual_seed(123) # model = GPTModel(GPT_CONFIG_124M) @@ -425,3 +430,16 @@ def softmax_with_temperature(logits, temperature): topk_probas = torch.softmax(new_logits, dim=0) print(topk_probas) + +torch.manual_seed(123) + +token_ids = generate( + model=model, + idx=text_to_token_ids("Every effort moves you", tokenizer), + max_new_tokens=15, + context_size=GPT_CONFIG_124M["context_length"], + top_k=25, + temperature=1.4, +) + +print("Output text: \n", token_ids_to_text(token_ids, tokenizer)) diff --git a/ch06/04_user_interface/README.md b/ch07/04_user_interface/README.md similarity index 100% rename from ch06/04_user_interface/README.md rename to ch07/04_user_interface/README.md diff --git a/ch06/04_user_interface/app.py b/ch07/04_user_interface/app.py similarity index 100% rename from ch06/04_user_interface/app.py rename to ch07/04_user_interface/app.py diff --git a/ch06/04_user_interface/previous_chapters.py b/ch07/04_user_interface/previous_chapters.py similarity index 100% rename from ch06/04_user_interface/previous_chapters.py rename to ch07/04_user_interface/previous_chapters.py diff --git a/ch06/04_user_interface/requirements-extra.txt b/ch07/04_user_interface/requirements-extra.txt similarity index 100% rename from ch06/04_user_interface/requirements-extra.txt rename to ch07/04_user_interface/requirements-extra.txt diff --git a/utils/utils_prev.py b/utils/utils_prev.py index 18bd6369..a4d75587 100644 --- a/utils/utils_prev.py +++ b/utils/utils_prev.py @@ -2,6 +2,7 @@ import torch.nn as nn import tiktoken from torch.utils.data import Dataset, DataLoader +from zmq import device class SelfAttention_v1(nn.Module): @@ -249,7 +250,7 @@ def generate_text_simple( # idx is (batch, n_tokens) array of indices in the current context """Return the indices of the generated text Args: - model (GPTModel): the model used for the inference + model (nn.Module): the model used for the inference idx (torch.Tensor): tokenized input max_new_tokens (int): maximum number of tokens to generate context_size (int): the maximum number of tokens the model can consider as input @@ -260,7 +261,7 @@ def generate_text_simple( for _ in range(max_new_tokens): # Crop current context if it exceeds the supported context size # E.g., if LLM supports only 5 tokens, and the context size is 10 - # the only the last 5 tokens are used as context + # only the last 5 tokens are used as context idx_cond = idx[:, -context_size:] # Get the predictions @@ -283,6 +284,69 @@ def generate_text_simple( return idx +def generate( + model: nn.Module, + idx: torch.Tensor, + max_new_tokens: int, + context_size: int, + temperature=0.0, + top_k=None, + eos_id=None, +) -> torch.Tensor: + + # idx is (batch, n_tokens) array of indices in the current context + """Return the indices of the generated text + Args: + model (GPTModel): the model used for the inference + idx (torch.Tensor): tokenized input + max_new_tokens (int): maximum number of tokens to generate + context_size (int): the maximum number of tokens the model can consider as input + temperature (float): temperature to control uniformity of the distribution + top_k (None): top k probability values that should be retained + + Returns: + idx (torch.Tensor): tokenized input + output + """ + + for _ in range(max_new_tokens): + idx_cond = idx[:, -context_size:] + with torch.no_grad(): + logits = model(idx_cond) + logits = logits[:, -1, :] + + # New: Filter logits with top_k sampling + if top_k is not None: + # Keep only top_k values + top_logits, _ = torch.topk(logits, top_k) + min_val = top_logits[:, -1] + logits = torch.where( + logits < min_val, torch.Tensor(float("-inf")).to(logits.device), device + ) + + # New: Apply temperature scaling + if temperature > 0.0: + logits = logits / temperature + + # Apply softmax to get probabilities + probs = torch.softmax(logits, dim=-1) + + # Sample from the distrubution + idx_next = torch.multinomial(probs, num_samples=1) # (batch_size, 1) + + # Otherwise same as before: get idx of the vocab entry + # with the highest logits value + else: + idx_next = torch.argmax(logits, dim=-1, keepdim=True) # (batch_size, 1) + + if idx_next == eos_id: + break + + # Same as before: append samples index to the running sequence + idx = torch.cat((idx, idx_next), dim=1) # (batch_size, num_tokens+1) + + return idx + + class GPTDatasetV1(Dataset): def __init__( self, txt: str, tokenizer: tiktoken.Encoding, max_length: int, stride: int