From 8d02cb1ceee0c8767226b38183d6e8bff9d965f5 Mon Sep 17 00:00:00 2001 From: Sebastian Raschka Date: Fri, 19 Jul 2024 06:29:29 -0700 Subject: [PATCH 1/2] Add download help message (#274) --- .../01_main-chapter-code/gpt_download.py | 45 +++++++++++- ch05/01_main-chapter-code/gpt_download.py | 69 +++++++++++-------- ch06/01_main-chapter-code/gpt_download.py | 45 +++++++++++- .../gpt_download.py | 45 +++++++++++- .../gpt_download.py | 45 +++++++++++- ch07/01_main-chapter-code/gpt_download.py | 45 +++++++++++- 6 files changed, 259 insertions(+), 35 deletions(-) diff --git a/appendix-E/01_main-chapter-code/gpt_download.py b/appendix-E/01_main-chapter-code/gpt_download.py index 0d695d2d..aa0ea1e3 100644 --- a/appendix-E/01_main-chapter-code/gpt_download.py +++ b/appendix-E/01_main-chapter-code/gpt_download.py @@ -5,7 +5,9 @@ import os -import requests +import urllib.request + +# import requests import json import numpy as np import tensorflow as tf @@ -42,6 +44,46 @@ def download_and_load_gpt2(model_size, models_dir): return settings, params +def download_file(url, destination): + # Send a GET request to download the file + + try: + with urllib.request.urlopen(url) as response: + # Get the total file size from headers, defaulting to 0 if not present + file_size = int(response.headers.get("Content-Length", 0)) + + # Check if file exists and has the same size + if os.path.exists(destination): + file_size_local = os.path.getsize(destination) + if file_size == file_size_local: + print(f"File already exists and is up-to-date: {destination}") + return + + # Define the block size for reading the file + block_size = 1024 # 1 Kilobyte + + # Initialize the progress bar with total file size + progress_bar_description = os.path.basename(url) # Extract filename from URL + with tqdm(total=file_size, unit="iB", unit_scale=True, desc=progress_bar_description) as progress_bar: + # Open the destination file in binary write mode + with open(destination, "wb") as file: + # Read the file in chunks and write to destination + while True: + chunk = response.read(block_size) + if not chunk: + break + file.write(chunk) + progress_bar.update(len(chunk)) # Update progress bar + except urllib.error.HTTPError: + s = ( + f"The specified URL ({url}) is incorrect, the internet connection cannot be established," + "\nor the requested file is temporarily unavailable.\nPlease visit the following website" + " for help: https://github.com/rasbt/LLMs-from-scratch/discussions/273") + print(s) + + +# Alternative way using `requests` +""" def download_file(url, destination): # Send a GET request to download the file in streaming mode response = requests.get(url, stream=True) @@ -68,6 +110,7 @@ def download_file(url, destination): for chunk in response.iter_content(block_size): progress_bar.update(len(chunk)) # Update progress bar file.write(chunk) # Write the chunk to the file +""" def load_gpt2_params_from_tf_ckpt(ckpt_path, settings): diff --git a/ch05/01_main-chapter-code/gpt_download.py b/ch05/01_main-chapter-code/gpt_download.py index 3ad67781..aa0ea1e3 100644 --- a/ch05/01_main-chapter-code/gpt_download.py +++ b/ch05/01_main-chapter-code/gpt_download.py @@ -44,6 +44,45 @@ def download_and_load_gpt2(model_size, models_dir): return settings, params +def download_file(url, destination): + # Send a GET request to download the file + + try: + with urllib.request.urlopen(url) as response: + # Get the total file size from headers, defaulting to 0 if not present + file_size = int(response.headers.get("Content-Length", 0)) + + # Check if file exists and has the same size + if os.path.exists(destination): + file_size_local = os.path.getsize(destination) + if file_size == file_size_local: + print(f"File already exists and is up-to-date: {destination}") + return + + # Define the block size for reading the file + block_size = 1024 # 1 Kilobyte + + # Initialize the progress bar with total file size + progress_bar_description = os.path.basename(url) # Extract filename from URL + with tqdm(total=file_size, unit="iB", unit_scale=True, desc=progress_bar_description) as progress_bar: + # Open the destination file in binary write mode + with open(destination, "wb") as file: + # Read the file in chunks and write to destination + while True: + chunk = response.read(block_size) + if not chunk: + break + file.write(chunk) + progress_bar.update(len(chunk)) # Update progress bar + except urllib.error.HTTPError: + s = ( + f"The specified URL ({url}) is incorrect, the internet connection cannot be established," + "\nor the requested file is temporarily unavailable.\nPlease visit the following website" + " for help: https://github.com/rasbt/LLMs-from-scratch/discussions/273") + print(s) + + +# Alternative way using `requests` """ def download_file(url, destination): # Send a GET request to download the file in streaming mode @@ -74,36 +113,6 @@ def download_file(url, destination): """ -def download_file(url, destination): - # Send a GET request to download the file - with urllib.request.urlopen(url) as response: - # Get the total file size from headers, defaulting to 0 if not present - file_size = int(response.headers.get("Content-Length", 0)) - - # Check if file exists and has the same size - if os.path.exists(destination): - file_size_local = os.path.getsize(destination) - if file_size == file_size_local: - print(f"File already exists and is up-to-date: {destination}") - return - - # Define the block size for reading the file - block_size = 1024 # 1 Kilobyte - - # Initialize the progress bar with total file size - progress_bar_description = os.path.basename(url) # Extract filename from URL - with tqdm(total=file_size, unit="iB", unit_scale=True, desc=progress_bar_description) as progress_bar: - # Open the destination file in binary write mode - with open(destination, "wb") as file: - # Read the file in chunks and write to destination - while True: - chunk = response.read(block_size) - if not chunk: - break - file.write(chunk) - progress_bar.update(len(chunk)) # Update progress bar - - def load_gpt2_params_from_tf_ckpt(ckpt_path, settings): # Initialize parameters dictionary with empty blocks for each layer params = {"blocks": [{} for _ in range(settings["n_layer"])]} diff --git a/ch06/01_main-chapter-code/gpt_download.py b/ch06/01_main-chapter-code/gpt_download.py index 0d695d2d..aa0ea1e3 100644 --- a/ch06/01_main-chapter-code/gpt_download.py +++ b/ch06/01_main-chapter-code/gpt_download.py @@ -5,7 +5,9 @@ import os -import requests +import urllib.request + +# import requests import json import numpy as np import tensorflow as tf @@ -42,6 +44,46 @@ def download_and_load_gpt2(model_size, models_dir): return settings, params +def download_file(url, destination): + # Send a GET request to download the file + + try: + with urllib.request.urlopen(url) as response: + # Get the total file size from headers, defaulting to 0 if not present + file_size = int(response.headers.get("Content-Length", 0)) + + # Check if file exists and has the same size + if os.path.exists(destination): + file_size_local = os.path.getsize(destination) + if file_size == file_size_local: + print(f"File already exists and is up-to-date: {destination}") + return + + # Define the block size for reading the file + block_size = 1024 # 1 Kilobyte + + # Initialize the progress bar with total file size + progress_bar_description = os.path.basename(url) # Extract filename from URL + with tqdm(total=file_size, unit="iB", unit_scale=True, desc=progress_bar_description) as progress_bar: + # Open the destination file in binary write mode + with open(destination, "wb") as file: + # Read the file in chunks and write to destination + while True: + chunk = response.read(block_size) + if not chunk: + break + file.write(chunk) + progress_bar.update(len(chunk)) # Update progress bar + except urllib.error.HTTPError: + s = ( + f"The specified URL ({url}) is incorrect, the internet connection cannot be established," + "\nor the requested file is temporarily unavailable.\nPlease visit the following website" + " for help: https://github.com/rasbt/LLMs-from-scratch/discussions/273") + print(s) + + +# Alternative way using `requests` +""" def download_file(url, destination): # Send a GET request to download the file in streaming mode response = requests.get(url, stream=True) @@ -68,6 +110,7 @@ def download_file(url, destination): for chunk in response.iter_content(block_size): progress_bar.update(len(chunk)) # Update progress bar file.write(chunk) # Write the chunk to the file +""" def load_gpt2_params_from_tf_ckpt(ckpt_path, settings): diff --git a/ch06/02_bonus_additional-experiments/gpt_download.py b/ch06/02_bonus_additional-experiments/gpt_download.py index 0d695d2d..aa0ea1e3 100644 --- a/ch06/02_bonus_additional-experiments/gpt_download.py +++ b/ch06/02_bonus_additional-experiments/gpt_download.py @@ -5,7 +5,9 @@ import os -import requests +import urllib.request + +# import requests import json import numpy as np import tensorflow as tf @@ -42,6 +44,46 @@ def download_and_load_gpt2(model_size, models_dir): return settings, params +def download_file(url, destination): + # Send a GET request to download the file + + try: + with urllib.request.urlopen(url) as response: + # Get the total file size from headers, defaulting to 0 if not present + file_size = int(response.headers.get("Content-Length", 0)) + + # Check if file exists and has the same size + if os.path.exists(destination): + file_size_local = os.path.getsize(destination) + if file_size == file_size_local: + print(f"File already exists and is up-to-date: {destination}") + return + + # Define the block size for reading the file + block_size = 1024 # 1 Kilobyte + + # Initialize the progress bar with total file size + progress_bar_description = os.path.basename(url) # Extract filename from URL + with tqdm(total=file_size, unit="iB", unit_scale=True, desc=progress_bar_description) as progress_bar: + # Open the destination file in binary write mode + with open(destination, "wb") as file: + # Read the file in chunks and write to destination + while True: + chunk = response.read(block_size) + if not chunk: + break + file.write(chunk) + progress_bar.update(len(chunk)) # Update progress bar + except urllib.error.HTTPError: + s = ( + f"The specified URL ({url}) is incorrect, the internet connection cannot be established," + "\nor the requested file is temporarily unavailable.\nPlease visit the following website" + " for help: https://github.com/rasbt/LLMs-from-scratch/discussions/273") + print(s) + + +# Alternative way using `requests` +""" def download_file(url, destination): # Send a GET request to download the file in streaming mode response = requests.get(url, stream=True) @@ -68,6 +110,7 @@ def download_file(url, destination): for chunk in response.iter_content(block_size): progress_bar.update(len(chunk)) # Update progress bar file.write(chunk) # Write the chunk to the file +""" def load_gpt2_params_from_tf_ckpt(ckpt_path, settings): diff --git a/ch06/03_bonus_imdb-classification/gpt_download.py b/ch06/03_bonus_imdb-classification/gpt_download.py index 0d695d2d..aa0ea1e3 100644 --- a/ch06/03_bonus_imdb-classification/gpt_download.py +++ b/ch06/03_bonus_imdb-classification/gpt_download.py @@ -5,7 +5,9 @@ import os -import requests +import urllib.request + +# import requests import json import numpy as np import tensorflow as tf @@ -42,6 +44,46 @@ def download_and_load_gpt2(model_size, models_dir): return settings, params +def download_file(url, destination): + # Send a GET request to download the file + + try: + with urllib.request.urlopen(url) as response: + # Get the total file size from headers, defaulting to 0 if not present + file_size = int(response.headers.get("Content-Length", 0)) + + # Check if file exists and has the same size + if os.path.exists(destination): + file_size_local = os.path.getsize(destination) + if file_size == file_size_local: + print(f"File already exists and is up-to-date: {destination}") + return + + # Define the block size for reading the file + block_size = 1024 # 1 Kilobyte + + # Initialize the progress bar with total file size + progress_bar_description = os.path.basename(url) # Extract filename from URL + with tqdm(total=file_size, unit="iB", unit_scale=True, desc=progress_bar_description) as progress_bar: + # Open the destination file in binary write mode + with open(destination, "wb") as file: + # Read the file in chunks and write to destination + while True: + chunk = response.read(block_size) + if not chunk: + break + file.write(chunk) + progress_bar.update(len(chunk)) # Update progress bar + except urllib.error.HTTPError: + s = ( + f"The specified URL ({url}) is incorrect, the internet connection cannot be established," + "\nor the requested file is temporarily unavailable.\nPlease visit the following website" + " for help: https://github.com/rasbt/LLMs-from-scratch/discussions/273") + print(s) + + +# Alternative way using `requests` +""" def download_file(url, destination): # Send a GET request to download the file in streaming mode response = requests.get(url, stream=True) @@ -68,6 +110,7 @@ def download_file(url, destination): for chunk in response.iter_content(block_size): progress_bar.update(len(chunk)) # Update progress bar file.write(chunk) # Write the chunk to the file +""" def load_gpt2_params_from_tf_ckpt(ckpt_path, settings): diff --git a/ch07/01_main-chapter-code/gpt_download.py b/ch07/01_main-chapter-code/gpt_download.py index 0d695d2d..aa0ea1e3 100644 --- a/ch07/01_main-chapter-code/gpt_download.py +++ b/ch07/01_main-chapter-code/gpt_download.py @@ -5,7 +5,9 @@ import os -import requests +import urllib.request + +# import requests import json import numpy as np import tensorflow as tf @@ -42,6 +44,46 @@ def download_and_load_gpt2(model_size, models_dir): return settings, params +def download_file(url, destination): + # Send a GET request to download the file + + try: + with urllib.request.urlopen(url) as response: + # Get the total file size from headers, defaulting to 0 if not present + file_size = int(response.headers.get("Content-Length", 0)) + + # Check if file exists and has the same size + if os.path.exists(destination): + file_size_local = os.path.getsize(destination) + if file_size == file_size_local: + print(f"File already exists and is up-to-date: {destination}") + return + + # Define the block size for reading the file + block_size = 1024 # 1 Kilobyte + + # Initialize the progress bar with total file size + progress_bar_description = os.path.basename(url) # Extract filename from URL + with tqdm(total=file_size, unit="iB", unit_scale=True, desc=progress_bar_description) as progress_bar: + # Open the destination file in binary write mode + with open(destination, "wb") as file: + # Read the file in chunks and write to destination + while True: + chunk = response.read(block_size) + if not chunk: + break + file.write(chunk) + progress_bar.update(len(chunk)) # Update progress bar + except urllib.error.HTTPError: + s = ( + f"The specified URL ({url}) is incorrect, the internet connection cannot be established," + "\nor the requested file is temporarily unavailable.\nPlease visit the following website" + " for help: https://github.com/rasbt/LLMs-from-scratch/discussions/273") + print(s) + + +# Alternative way using `requests` +""" def download_file(url, destination): # Send a GET request to download the file in streaming mode response = requests.get(url, stream=True) @@ -68,6 +110,7 @@ def download_file(url, destination): for chunk in response.iter_content(block_size): progress_bar.update(len(chunk)) # Update progress bar file.write(chunk) # Write the chunk to the file +""" def load_gpt2_params_from_tf_ckpt(ckpt_path, settings): From d9f2b015279699ee058c222c7f047ca60770c4b0 Mon Sep 17 00:00:00 2001 From: hbaghramyan Date: Sun, 21 Jul 2024 11:29:56 +0200 Subject: [PATCH 2/2] modified_todo --- todo.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/todo.md b/todo.md index da7e8ea0..85c1da73 100644 --- a/todo.md +++ b/todo.md @@ -256,4 +256,8 @@ to discuss to discuss -* attn_scores.masked_fill_(self.mask.bool()[:num_tokens, :num_tokens], -torch.inf) \ No newline at end of file +* attn_scores.masked_fill_(self.mask.bool()[:num_tokens, :num_tokens], -torch.inf) + +**Causal attention** is crucial for autoregressive tasks, where the model generates text one token at a time, predicting the next token based on the previous ones. This is essential for maintaining the chronological order of text generation, ensuring that the model doesn’t use future information that hasn’t been generated yet. + +**Conventional attention** is useful for tasks like text classification, where understanding the entire context (both past and future tokens) is important. Models like BERT (Bidirectional Encoder Representations from Transformers) use this type of attention. \ No newline at end of file