Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
hbaghramyan committed Dec 21, 2024
2 parents b62d98d + 05b706c commit c833a8b
Show file tree
Hide file tree
Showing 6 changed files with 101 additions and 19 deletions.
52 changes: 35 additions & 17 deletions ch05/01_main-chapter-code/ch05.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,28 @@

sys.path.insert(0, os.getcwd())
# from previous_chapters import GPTModel, generate_text_simple, create_dataloader_v1
from utils.utils_prev import GPTModel, generate_text_simple, create_dataloader_v1

# GPT_CONFIG_124M = {
# "vocab_size": 50257, # Vocabulary size
# "context_length": 256, # Shortened context length (orig: 1024)
# "emb_dim": 768, # Embedding dimension
# "n_heads": 12, # Number of attention heads
# "n_layers": 12, # Number of layers
# "drop_rate": 0.1, # Dropout rate
# "qkv_bias": False, # Query-key-value bias
# }

# device = torch.device(
# "cpu"
# if torch.backends.mps.is_available()
# else "cuda" if torch.cuda.is_available() else "cpu"
# )
from utils.utils_prev import (
GPTModel,
generate,
generate_text_simple,
create_dataloader_v1,
)

GPT_CONFIG_124M = {
"vocab_size": 50257, # Vocabulary size
"context_length": 256, # Shortened context length (orig: 1024)
"emb_dim": 768, # Embedding dimension
"n_heads": 12, # Number of attention heads
"n_layers": 12, # Number of layers
"drop_rate": 0.1, # Dropout rate
"qkv_bias": False, # Query-key-value bias
}

device = torch.device(
"cpu"
if torch.backends.mps.is_available()
else "cuda" if torch.cuda.is_available() else "cpu"
)

# torch.manual_seed(123)
# model = GPTModel(GPT_CONFIG_124M)
Expand Down Expand Up @@ -425,3 +430,16 @@ def softmax_with_temperature(logits, temperature):

topk_probas = torch.softmax(new_logits, dim=0)
print(topk_probas)

torch.manual_seed(123)

token_ids = generate(
model=model,
idx=text_to_token_ids("Every effort moves you", tokenizer),
max_new_tokens=15,
context_size=GPT_CONFIG_124M["context_length"],
top_k=25,
temperature=1.4,
)

print("Output text: \n", token_ids_to_text(token_ids, tokenizer))
File renamed without changes.
File renamed without changes.
File renamed without changes.
68 changes: 66 additions & 2 deletions utils/utils_prev.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import torch.nn as nn
import tiktoken
from torch.utils.data import Dataset, DataLoader
from zmq import device


class SelfAttention_v1(nn.Module):
Expand Down Expand Up @@ -249,7 +250,7 @@ def generate_text_simple(
# idx is (batch, n_tokens) array of indices in the current context
"""Return the indices of the generated text
Args:
model (GPTModel): the model used for the inference
model (nn.Module): the model used for the inference
idx (torch.Tensor): tokenized input
max_new_tokens (int): maximum number of tokens to generate
context_size (int): the maximum number of tokens the model can consider as input
Expand All @@ -260,7 +261,7 @@ def generate_text_simple(
for _ in range(max_new_tokens):
# Crop current context if it exceeds the supported context size
# E.g., if LLM supports only 5 tokens, and the context size is 10
# the only the last 5 tokens are used as context
# only the last 5 tokens are used as context
idx_cond = idx[:, -context_size:]

# Get the predictions
Expand All @@ -283,6 +284,69 @@ def generate_text_simple(
return idx


def generate(
model: nn.Module,
idx: torch.Tensor,
max_new_tokens: int,
context_size: int,
temperature=0.0,
top_k=None,
eos_id=None,
) -> torch.Tensor:

# idx is (batch, n_tokens) array of indices in the current context
"""Return the indices of the generated text
Args:
model (GPTModel): the model used for the inference
idx (torch.Tensor): tokenized input
max_new_tokens (int): maximum number of tokens to generate
context_size (int): the maximum number of tokens the model can consider as input
temperature (float): temperature to control uniformity of the distribution
top_k (None): top k probability values that should be retained
Returns:
idx (torch.Tensor): tokenized input + output
"""

for _ in range(max_new_tokens):
idx_cond = idx[:, -context_size:]
with torch.no_grad():
logits = model(idx_cond)
logits = logits[:, -1, :]

# New: Filter logits with top_k sampling
if top_k is not None:
# Keep only top_k values
top_logits, _ = torch.topk(logits, top_k)
min_val = top_logits[:, -1]
logits = torch.where(
logits < min_val, torch.Tensor(float("-inf")).to(logits.device), device
)

# New: Apply temperature scaling
if temperature > 0.0:
logits = logits / temperature

# Apply softmax to get probabilities
probs = torch.softmax(logits, dim=-1)

# Sample from the distrubution
idx_next = torch.multinomial(probs, num_samples=1) # (batch_size, 1)

# Otherwise same as before: get idx of the vocab entry
# with the highest logits value
else:
idx_next = torch.argmax(logits, dim=-1, keepdim=True) # (batch_size, 1)

if idx_next == eos_id:
break

# Same as before: append samples index to the running sequence
idx = torch.cat((idx, idx_next), dim=1) # (batch_size, num_tokens+1)

return idx


class GPTDatasetV1(Dataset):
def __init__(
self, txt: str, tokenizer: tiktoken.Encoding, max_length: int, stride: int
Expand Down

0 comments on commit c833a8b

Please sign in to comment.