Merge branch 'dev'

hbaghramyan · Sep 14, 2024 · 3b82a7b · 3b82a7b
2 parents 2a0cba5 + 9fd07a7
commit 3b82a7b
Show file tree

Hide file tree

Showing 7 changed files with 110 additions and 26 deletions.
diff --git a/ch04/01_main-chapter-code/ch04.py b/ch04/01_main-chapter-code/ch04.py
@@ -17,6 +17,40 @@
     "n_layers": 12,  # Number of layers
     "drop_rate": 0.1,  # Dropout rate
     "qkv_bias": False,  # Query-Key-Value bias
+    "model_name": "gpt_small",
+}
+
+GPT_MEDIUM = {
+    "vocab_size": 50257,  # Vocabulary size
+    "context_length": 1024,  # Context length
+    "emb_dim": 1024,  # Embedding dimension
+    "n_heads": 16,  # Number of attention heads
+    "n_layers": 24,  # Number of layers
+    "drop_rate": 0.1,  # Dropout rate
+    "qkv_bias": False,  # Query-Key-Value bias
+    "model_name": "gpt_medium",
+}
+
+GPT_LARGE = {
+    "vocab_size": 50257,  # Vocabulary size
+    "context_length": 1024,  # Context length
+    "emb_dim": 1280,  # Embedding dimension
+    "n_heads": 20,  # Number of attention heads
+    "n_layers": 36,  # Number of layers
+    "drop_rate": 0.1,  # Dropout rate
+    "qkv_bias": False,  # Query-Key-Value bias
+    "model_name": "gpt_large",
+}
+
+GPT_XL = {
+    "vocab_size": 50257,  # Vocabulary size
+    "context_length": 1024,  # Context length
+    "emb_dim": 1600,  # Embedding dimension
+    "n_heads": 25,  # Number of attention heads
+    "n_layers": 48,  # Number of layers
+    "drop_rate": 0.1,  # Dropout rate
+    "qkv_bias": False,  # Query-Key-Value bias
+    "model_name": "gpt_xl",
 }
 
 
@@ -264,7 +298,7 @@ def generate_text_simple(model, idx, max_new_tokens, context_size):
 )
 
 print("Output:", out)
-print("Output length:", len[out[0]])
+print("Output length:", len(out[0]))
 # exercise 01 solution
 
 num_att = 0
@@ -276,6 +310,14 @@ def generate_text_simple(model, idx, max_new_tokens, context_size):
 
 # end of exercise 01 solution
 
+# exercise 02 solution begin
+for config in [GPT_MEDIUM, GPT_LARGE, GPT_XL]:
+    model = GPTModel(config)
+    total = sum(p.numel() for p in model.parameters())
+    print(f"Number of parameters in {config['model_name']} is", total)
+    total_size = total * 4 / (1024 * 1024)
+    print(f"Size of moodel {config['model_name']} in MBs is {total_size:.2f}")
+# exercise 02 solution end
 print(f"Total elements in attention layers is: {num_att}")
 print(f"Total elements in attention layers is: {num_ff}")
 

diff --git a/ch06/01_main-chapter-code/load-finetuned-model.ipynb b/ch06/01_main-chapter-code/load-finetuned-model.ipynb
@@ -3,7 +3,9 @@
   {
    "cell_type": "markdown",
    "id": "1545a16b-bc8d-4e49-b9a6-db6631e7483d",
-   "metadata": {},
+   "metadata": {
+    "id": "1545a16b-bc8d-4e49-b9a6-db6631e7483d"
+   },
    "source": [
     "<table style=\"width:100%\">\n",
     "<tr>\n",
@@ -23,15 +25,19 @@
   {
    "cell_type": "markdown",
    "id": "f3f83194-82b9-4478-9550-5ad793467bd0",
-   "metadata": {},
+   "metadata": {
+    "id": "f3f83194-82b9-4478-9550-5ad793467bd0"
+   },
    "source": [
     "# Load And Use Finetuned Model"
    ]
   },
   {
    "cell_type": "markdown",
    "id": "466b564e-4fd5-4d76-a3a1-63f9f0993b7e",
-   "metadata": {},
+   "metadata": {
+    "id": "466b564e-4fd5-4d76-a3a1-63f9f0993b7e"
+   },
    "source": [
     "This notebook contains minimal code to load the finetuned model that was created and saved in chapter 6 via [ch06.ipynb](ch06.ipynb)."
    ]
@@ -40,7 +46,13 @@
    "cell_type": "code",
    "execution_count": 1,
    "id": "fd80e5f5-0f79-4a6c-bf31-2026e7d30e52",
-   "metadata": {},
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "fd80e5f5-0f79-4a6c-bf31-2026e7d30e52",
+    "outputId": "9eeefb8e-a7eb-4d62-cf78-c797b3ed4e2e"
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -66,7 +78,9 @@
    "cell_type": "code",
    "execution_count": 2,
    "id": "ed86d6b7-f32d-4601-b585-a2ea3dbf7201",
-   "metadata": {},
+   "metadata": {
+    "id": "ed86d6b7-f32d-4601-b585-a2ea3dbf7201"
+   },
    "outputs": [],
    "source": [
     "from pathlib import Path\n",
@@ -83,7 +97,9 @@
    "cell_type": "code",
    "execution_count": 3,
    "id": "fb02584a-5e31-45d5-8377-794876907bc6",
-   "metadata": {},
+   "metadata": {
+    "id": "fb02584a-5e31-45d5-8377-794876907bc6"
+   },
    "outputs": [],
    "source": [
     "from previous_chapters import GPTModel\n",
@@ -116,7 +132,9 @@
    "cell_type": "code",
    "execution_count": 4,
    "id": "f1ccf2b7-176e-4cfd-af7a-53fb76010b94",
-   "metadata": {},
+   "metadata": {
+    "id": "f1ccf2b7-176e-4cfd-af7a-53fb76010b94"
+   },
    "outputs": [],
    "source": [
     "import torch\n",
@@ -128,14 +146,17 @@
     "# Then load pretrained weights\n",
     "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
     "model.load_state_dict(torch.load(\"review_classifier.pth\", map_location=device, weights_only=True))\n",
+    "model.to(device)\n",
     "model.eval();"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 5,
    "id": "a1fd174e-9555-46c5-8780-19b0aa4f26e5",
-   "metadata": {},
+   "metadata": {
+    "id": "a1fd174e-9555-46c5-8780-19b0aa4f26e5"
+   },
    "outputs": [],
    "source": [
     "import tiktoken\n",
@@ -147,7 +168,9 @@
    "cell_type": "code",
    "execution_count": 6,
    "id": "2a4c0129-efe5-46e9-bb90-ba08d407c1a2",
-   "metadata": {},
+   "metadata": {
+    "id": "2a4c0129-efe5-46e9-bb90-ba08d407c1a2"
+   },
    "outputs": [],
    "source": [
     "# This function was implemented in ch06.ipynb\n",
@@ -167,7 +190,7 @@
     "\n",
     "    # Model inference\n",
     "    with torch.no_grad():\n",
-    "        logits = model(input_tensor)[:, -1, :]  # Logits of the last output token\n",
+    "        logits = model(input_tensor.to(device))[:, -1, :]  # Logits of the last output token\n",
     "    predicted_label = torch.argmax(logits, dim=-1).item()\n",
     "\n",
     "    # Return the classified result\n",
@@ -178,7 +201,13 @@
    "cell_type": "code",
    "execution_count": 7,
    "id": "1e26862c-10b5-4a0f-9dd6-b6ddbad2fc3f",
-   "metadata": {},
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "1e26862c-10b5-4a0f-9dd6-b6ddbad2fc3f",
+    "outputId": "28eb2c02-0e38-4356-b2a3-2bf6accb5316"
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -203,7 +232,13 @@
    "cell_type": "code",
    "execution_count": 8,
    "id": "78472e05-cb4e-4ec4-82e8-23777aa90cf8",
-   "metadata": {},
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
+    },
+    "id": "78472e05-cb4e-4ec4-82e8-23777aa90cf8",
+    "outputId": "0cd3cd62-f407-45f3-fa4f-51ff665355eb"
+   },
    "outputs": [
     {
      "name": "stdout",
@@ -226,6 +261,11 @@
   }
  ],
  "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "gpuType": "L4",
+   "provenance": []
+  },
   "kernelspec": {
    "display_name": "Python 3 (ipykernel)",
    "language": "python",

diff --git a/ch07/05_dataset-generation/reflection-gpt4.ipynb b/ch07/05_dataset-generation/reflection-gpt4.ipynb
@@ -110,7 +110,7 @@
     "- First, let's test if the OpenAI API is correctly set up\n",
     "- If you don't have an account yet, you need to create one at https://platform.openai.com/\n",
     "- Note that you will also have to transfer some funds to your account as the GPT-4 API is not free (see https://platform.openai.com/settings/organization/billing/overview)\n",
-    "- Running the examples in this notebook costs about \\$0.3 (3 cents) with GPT-4o-mini as of this writing\n",
+    "- Running the code exactly as it appears in this notebook costs about \\$0.03 (3 cents) with GPT-4o-mini as of this writing\n",
     "- Applying the two methodologies above to all 1100 entries in the chapter 7 instruction dataset costs about \\$0.60 (60 cents)"
    ]
   },

diff --git a/setup/01_optional-python-setup-preferences/README.md b/setup/01_optional-python-setup-preferences/README.md
@@ -32,13 +32,6 @@ where `Desktop/` is the folder where the Miniforge installer was downloaded to.
 Next, step through the download instructions, confirming with "Enter".
 
 
-
-If you work with many packages, Conda can be slow because of its thorough but complex dependency resolution process and the handling of large package indexes and metadata. To speed up Conda, you can use the following setting, which switches to a more efficient Rust reimplementation for solving dependencies:
-
-```
-conda config --set solver libmamba
-```
-
 <br>
 <br>
 
@@ -99,7 +92,7 @@ You can also still use `pip` to install libraries. By default, `pip` should be l
 PyTorch can be installed just like any other Python library or package using pip. For example:
 
 ```bash
-pip install torch==2.0.1
+pip install torch
 ```
 
 However, since PyTorch is a comprehensive library featuring CPU- and GPU-compatible codes, the installation may require additional settings and explanation (see the *A.1.3 Installing PyTorch in the book for more information*).
@@ -109,10 +102,15 @@ It's also highly recommended to consult the installation guide menu on the offic
 <img src="https://sebastianraschka.com/images/LLMs-from-scratch-images/setup/01_optional-python-setup-preferences/pytorch-installer.jpg" width="600px">
 
 
+## 5. Installing Python packages and libraries used in this book
+
+Please refer to the [Installing Python packages and libraries used in this book](../02_installing-python-libraries/README.md) document for instructions on how to install the required libraries.
+
+<br>
 
 ---
 
 
 
 
-Any questions? Please feel free to reach out in the [Discussion Forum](https://github.com/rasbt/LLMs-from-scratch/discussions).
+Any questions? Please feel free to reach out in the [Discussion Forum](https://github.com/rasbt/LLMs-from-scratch/discussions).
diff --git a/setup/02_installing-python-libraries/README.md b/setup/02_installing-python-libraries/README.md
@@ -47,7 +47,7 @@ In this case, you may want to use `watermark` to check if you opened the Jupyter
 PyTorch can be installed just like any other Python library or package using pip. For example:
 
 ```bash
-pip install torch==2.0.1
+pip install torch
 ```
 
 However, since PyTorch is a comprehensive library featuring CPU- and GPU-compatible codes, the installation may require additional settings and explanation (see the *A.1.3 Installing PyTorch in the book for more information*).
@@ -56,7 +56,7 @@ It's also highly recommended to consult the installation guide menu on the offic
 
 <img src="https://sebastianraschka.com/images/LLMs-from-scratch-images/setup/02_installing-python-libraries/pytorch-installer.jpg" width="600px">
 
-
+<br>
 
 ---
 

diff --git a/setup/README.md b/setup/README.md
@@ -47,7 +47,9 @@ There are many good options for code editors. My preferred choice is the popular
 
 ## VSCode Extensions
 
-If you are using Visual Studio Code (VSCode) as your primary code editor, you can find recommended extensions in the `.vscode` subfolder. To install these, open the `extensions.json` file in VSCode and click the "Install" button in the pop-up menu on the lower right.
+If you are using Visual Studio Code (VSCode) as your primary code editor, you can find recommended extensions in the `.vscode` subfolder. To install these, open this "setup" folder in VSCode (File -> Open Folder...) and then click the "Install" button in the pop-up menu on the lower right.
+
+<img src="https://sebastianraschka.com/images/LLMs-from-scratch-images/setup/README/vs-code-extensions.webp?1" alt="1" width="700">
 
 &nbsp;
 

diff --git a/todo.md b/todo.md
@@ -397,6 +397,8 @@ sum([p.numel() for p in block.att.parameters()])
 
 ### 16/09/2024 -
 
+0. see the todo.md in dl fundamentals
+
 1. օնլայն գրքից - 4.6 - exercises, ch04/02_performance-analysis/flops-analysis.ipynb, 4.7
 
 2. [stratificatied vs cluster vs random vs convinient vs systematic sampling](https://www.youtube.com/watch?v=9PaR1TsvnJs)