-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Expand PersHelper to support disk/memory approaches to communicate pe…
…rsistence, add unit tests.
- Loading branch information
user
committed
Apr 28, 2024
1 parent
1a1c406
commit ad7c34f
Showing
14 changed files
with
1,323 additions
and
562 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,180 +1,184 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"<h1>Character Level GPT on Text Data</h1>" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### Edit the filename below to train the GPT model on the corpus. Select \"Run\" -> \"Run All\"." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"%%scorep_env\n", | ||
"SCOREP_ENABLE_TRACING=1\n", | ||
"SCOREP_ENABLE_PROFILING=0\n", | ||
"SCOREP_TOTAL_MEMORY=3g" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"#%env SCOREP_KERNEL_PERSISTENCE_MODE MEMORY" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"%env" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"%%switch_serializer\n", | ||
"cloudpickle" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"%%scorep_python_binding_arguments\n", | ||
"--noinstrumenter" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"filename = \"fairy_test.txt\"" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"%%execute_with_scorep\n", | ||
"import scorep\n", | ||
"import logging\n", | ||
"\n", | ||
"logging.basicConfig(\n", | ||
" format=\"%(asctime)s - %(levelname)s - %(name)s - %(message)s\",\n", | ||
" datefmt=\"%d/%m/%Y %H:%M:%S\",\n", | ||
" level=logging.INFO)\n", | ||
"\n", | ||
"from utils import set_seed\n", | ||
"set_seed(42)\n", | ||
"\n", | ||
"import numpy as numpy\n", | ||
"import torch\n", | ||
"import torch.nn as nn\n", | ||
"from torch.nn import functional as F\n", | ||
"\n", | ||
"import math\n", | ||
"from torch.utils.data import Dataset\n", | ||
"\n", | ||
"class CharDataset(Dataset):\n", | ||
" def __init__(self, data, block_size):\n", | ||
" chars = sorted(list(set(data)))\n", | ||
" data_size, vocab_size = len(data), len(chars)\n", | ||
" print(\"data has %d characters, %d unique.\" % (data_size, vocab_size))\n", | ||
"\n", | ||
" self.stoi = {ch:i for i, ch in enumerate(chars)}\n", | ||
" self.itos = {i:ch for i, ch in enumerate(chars)}\n", | ||
" self.block_size = block_size\n", | ||
" self.vocab_size = vocab_size\n", | ||
" self.data = data\n", | ||
"\n", | ||
" def __len__(self):\n", | ||
" return len(self.data) - self.block_size\n", | ||
"\n", | ||
" def __getitem__(self, idx):\n", | ||
" chunk = self.data[idx : idx+self.block_size+1]\n", | ||
" dix = [self.stoi[s] for s in chunk]\n", | ||
"\n", | ||
" x = torch.tensor(dix[:-1], dtype = torch.long)\n", | ||
" y = torch.tensor(dix[1:], dtype = torch.long)\n", | ||
" return x, y\n", | ||
"\n", | ||
"with scorep.instrumenter.enable():\n", | ||
" block_size = 32\n", | ||
"\n", | ||
" text = open(\"./{}\".format(filename), \"r\").read()\n", | ||
" train_dataset = CharDataset(text, block_size)\n", | ||
"\n", | ||
" from model import GPT, GPTconfig\n", | ||
" mconf = GPTconfig(train_dataset.vocab_size, train_dataset.block_size,\n", | ||
" n_layer=1, n_head=8, n_embd=512)\n", | ||
" model = GPT(mconf)\n", | ||
"\n", | ||
" from trainer import Trainer, TrainerConfig\n", | ||
"\n", | ||
" tconf = TrainerConfig(max_epochs=1, batch_size=512, learning_rate=6e-4,\n", | ||
" lr_decay=True, warmup_tokens=512*20, final_tokens=2*len(train_dataset)*block_size,\n", | ||
" num_workers=4)\n", | ||
" trainer = Trainer(model, train_dataset, None, tconf)\n", | ||
" trainer.train()\n", | ||
"\n", | ||
" torch.save(model.state_dict(), \"./saved_models/trained_gpt_model\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"@webio": { | ||
"lastCommId": null, | ||
"lastKernelId": null | ||
}, | ||
"kernelspec": { | ||
"display_name": "scorep-python", | ||
"language": "python", | ||
"name": "scorep-python" | ||
}, | ||
"language_info": { | ||
"file_extension": ".py", | ||
"mimetype": "text/plain", | ||
"name": "python" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 4 | ||
} | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"<h1>Character Level GPT on Text Data</h1>" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### Edit the filename below to train the GPT model on the corpus. Select \"Run\" -> \"Run All\"." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"set user environment sucessfully: {'SCOREP_ENABLE_TRACING': '1', 'SCOREP_ENABLE_PROFILING': '0', 'SCOREP_TOTAL_MEMORY': '3g'}" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"%%scorep_env\n", | ||
"SCOREP_ENABLE_TRACING=1\n", | ||
"SCOREP_ENABLE_PROFILING=0\n", | ||
"SCOREP_TOTAL_MEMORY=3g" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"use the following scorep python binding arguments: --noinstrumenter" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"%%scorep_python_binding_arguments\n", | ||
"--noinstrumenter" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"filename = \"fairytales.txt\"" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"data has 49496 characters, 79 unique.\n" | ||
] | ||
}, | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"15/09/2021 15:35:08 - INFO - model - Number of parameters : 2.531738e+07\n", | ||
"/home/h9/s4122485/virtualenv_jupyterkernel_scorep_python/lib/python3.8/site-packages/torch/utils/data/dataloader.py:478: UserWarning: This DataLoader will create 4 worker processes in total. Our suggested max number of worker in current system is 1, which is smaller than what this DataLoader is going to create. Please be aware that excessive worker creation might get DataLoader running slow or even freeze, lower the worker number to avoid potential slowness/freeze if necessary.\n", | ||
" warnings.warn(_create_warning_msg(\n", | ||
"epoch 1 iter 96: train loss 2.31473. lr 0.00030152924503397155: 100%|██████████| 97/97 [02:27<00:00, 1.52s/it]\n", | ||
"[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n", | ||
"[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n", | ||
"[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n", | ||
"[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n", | ||
"epoch 2 iter 96: train loss 2.05380. lr 5.9999999999999995e-05: 100%|██████████| 97/97 [02:27<00:00, 1.52s/it]\n", | ||
"[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n", | ||
"[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n", | ||
"[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n", | ||
"[W pthreadpool-cpp.cc:90] Warning: Leaking Caffe2 thread-pool after fork. (function pthreadpool)\n", | ||
"epoch 3 iter 96: train loss 1.88871. lr 0.0003015292450339715: 100%|██████████| 97/97 [02:27<00:00, 1.52s/it] \n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"%%execute_with_scorep\n", | ||
"import scorep\n", | ||
"import logging\n", | ||
"\n", | ||
"logging.basicConfig(\n", | ||
" format=\"%(asctime)s - %(levelname)s - %(name)s - %(message)s\",\n", | ||
" datefmt=\"%d/%m/%Y %H:%M:%S\",\n", | ||
" level=logging.INFO)\n", | ||
"\n", | ||
"from utils import set_seed\n", | ||
"set_seed(42)\n", | ||
"\n", | ||
"import numpy as numpy\n", | ||
"import torch\n", | ||
"import torch.nn as nn\n", | ||
"from torch.nn import functional as F\n", | ||
"\n", | ||
"import math\n", | ||
"from torch.utils.data import Dataset\n", | ||
"\n", | ||
"class CharDataset(Dataset):\n", | ||
" def __init__(self, data, block_size):\n", | ||
" chars = sorted(list(set(data)))\n", | ||
" data_size, vocab_size = len(data), len(chars)\n", | ||
" print(\"data has %d characters, %d unique.\" % (data_size, vocab_size))\n", | ||
"\n", | ||
" self.stoi = {ch:i for i, ch in enumerate(chars)}\n", | ||
" self.itos = {i:ch for i, ch in enumerate(chars)}\n", | ||
" self.block_size = block_size\n", | ||
" self.vocab_size = vocab_size\n", | ||
" self.data = data\n", | ||
"\n", | ||
" def __len__(self):\n", | ||
" return len(self.data) - self.block_size\n", | ||
"\n", | ||
" def __getitem__(self, idx):\n", | ||
" chunk = self.data[idx : idx+self.block_size+1]\n", | ||
" dix = [self.stoi[s] for s in chunk]\n", | ||
"\n", | ||
" x = torch.tensor(dix[:-1], dtype = torch.long)\n", | ||
" y = torch.tensor(dix[1:], dtype = torch.long)\n", | ||
" return x, y\n", | ||
"\n", | ||
"with scorep.instrumenter.enable():\n", | ||
" block_size = 32\n", | ||
"\n", | ||
" text = open(\"./{}\".format(filename), \"r\").read()\n", | ||
" train_dataset = CharDataset(text, block_size)\n", | ||
"\n", | ||
" from model import GPT, GPTconfig\n", | ||
" mconf = GPTconfig(train_dataset.vocab_size, train_dataset.block_size,\n", | ||
" n_layer=8, n_head=8, n_embd=512)\n", | ||
" model = GPT(mconf)\n", | ||
"\n", | ||
" from trainer import Trainer, TrainerConfig\n", | ||
"\n", | ||
" tconf = TrainerConfig(max_epochs=3, batch_size=512, learning_rate=6e-4,\n", | ||
" lr_decay=True, warmup_tokens=512*20, final_tokens=2*len(train_dataset)*block_size,\n", | ||
" num_workers=4)\n", | ||
" trainer = Trainer(model, train_dataset, None, tconf)\n", | ||
"\n", | ||
" torch.cuda.empty_cache()\n", | ||
" trainer.train()\n", | ||
"\n", | ||
" torch.save(model.state_dict(), \"./saved_models/trained_gpt_model\")" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"@webio": { | ||
"lastCommId": null, | ||
"lastKernelId": null | ||
}, | ||
"kernelspec": { | ||
"display_name": "scorep-python3", | ||
"language": "python3", | ||
"name": "scorep-python3" | ||
}, | ||
"language_info": { | ||
"file_extension": ".py", | ||
"mimetype": "text/plain", | ||
"name": "Any text" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 4 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.