From e2c9264a7c1a63c83e90e9b0615428bc98b044c6 Mon Sep 17 00:00:00 2001 From: Douglas Orr Date: Wed, 4 Oct 2023 13:35:21 +0100 Subject: [PATCH] Improve documentation --- README.md | 21 +++++++- doc/Usage.ipynb | 110 +++++++++++++++++++++++++++++------------ tensor_tracker/core.py | 2 +- 3 files changed, 99 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index f9e74e9..6d5a9f4 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,33 @@ # Tensor tracker -Flexibly track outputs and grad-outputs of `torch.nn.Module`. [API documentation](https://graphcore-research.github.io/pytorch-tensor-tracker/). +[API documentation](https://graphcore-research.github.io/pytorch-tensor-tracker/) | [Example](https://graphcore-research.github.io/pytorch-tensor-tracker/usage.html) + +Flexibly track outputs and grad-outputs of `torch.nn.Module`. + +**Installation:** ```bash pip install git+https://github.com/graphcore-research/pytorch-tensor-tracker ``` +**Usage:** + +Use `tensor_tracker.track(module)` as a context manager to start capturing tensors from within your module's forward and backward passes: + ```python import tensor_tracker with tensor_tracker.track(module) as tracker: module(inputs).backward() +print(tracker) # => Tracker(stashes=8, tracking=0) +``` + +Now `Tracker` is filled with stashes, containing copies of fwd/bwd tensors at (sub)module outputs. (Note, this can consume a lot of memory.) + +It behaves like a list of `Stash` objects, with their attached `value`, usually a tensor or tuple of tensors. We can also use `to_frame()` to get a Pandas table of summary statistics: + +```python print(list(tracker)) # => [Stash(name="0.linear", type=nn.Linear, grad=False, value=tensor(...)), # ...] @@ -21,7 +37,8 @@ display(tracker.to_frame()) tensor tracker to_frame output -See our [example of visualising transformer activations & gradients using UMAP](doc/Example.ipynb). +See the [documentation](https://graphcore-research.github.io/pytorch-tensor-tracker/) for more info, or for a more practical example, see our demo of [visualising transformer activations & gradients using UMAP](doc/Example.ipynb). + ## License diff --git a/doc/Usage.ipynb b/doc/Usage.ipynb index ae79fbe..3da6c51 100644 --- a/doc/Usage.ipynb +++ b/doc/Usage.ipynb @@ -8,7 +8,7 @@ "\n", "# Usage example\n", "\n", - "General setup:" + "Create a toy model to track:" ] }, { @@ -40,7 +40,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Using `tensor_tracker`:" + "Use `tensor_tracker` to capture forward pass activations and backward pass gradients from our toy model. By default, the tracker saves full tensors, as a list of `tensor_tracker.Stash` objects." ] }, { @@ -52,27 +52,84 @@ "name": "stdout", "output_type": "stream", "text": [ - "[Stash(name='embed', type=, grad=False, value=tensor([[ 0.4698, 1.2426, 0.5403, -1.1454],\n", - " [-0.8425, -0.6475, -0.2189, -1.1326],\n", - " [ 0.1268, 1.3564, 0.5632, -0.1039]])), Stash(name='project', type=, grad=False, value=tensor([[-0.6237, -0.1652, 0.3782, -0.8841],\n", - " [-0.9278, -0.2848, -0.8688, -0.4719],\n", - " [-0.3449, 0.3643, 0.3935, -0.6302]])), Stash(name='unembed', type=, grad=False, value=tensor([[-0.2458, 1.0003, -0.8231, -0.1405, -0.2964, 0.5837, 0.2889, 0.2059,\n", - " -0.6114, -0.5916],\n", - " [-0.6345, 1.0882, -0.4304, -0.2196, -0.0426, 0.9428, 0.2051, 0.5897,\n", - " -0.2217, -0.9132],\n", - " [-0.0822, 0.9985, -0.7097, -0.3139, -0.4805, 0.6878, 0.2560, 0.3254,\n", - " -0.4447, -0.3332]])), Stash(name='', type=, grad=False, value=tensor(2.5663)), Stash(name='', type=, grad=True, value=(tensor(1.),)), Stash(name='unembed', type=, grad=True, value=(tensor([[ 0.0237, 0.0824, -0.3200, 0.0263, 0.0225, 0.0543, 0.0404, 0.0372,\n", - " 0.0164, 0.0168],\n", - " [ 0.0139, 0.0779, 0.0171, 0.0211, 0.0251, 0.0673, 0.0322, -0.2860,\n", - " 0.0210, 0.0105],\n", - " [-0.3066, 0.0787, 0.0143, 0.0212, 0.0179, 0.0577, 0.0374, 0.0401,\n", - " 0.0186, 0.0208]]),)), Stash(name='project', type=, grad=True, value=(tensor([[-0.1755, 0.1306, 0.0443, -0.1823],\n", - " [ 0.1202, -0.0728, 0.0066, -0.0839],\n", - " [-0.1863, 0.0470, -0.1055, -0.0353]]),)), Stash(name='embed', type=, grad=True, value=(tensor([[-0.0108, 0.1086, -0.1304, -0.0370],\n", - " [ 0.0534, -0.0029, 0.0078, -0.0074],\n", - " [-0.0829, 0.0152, -0.1170, -0.0625]]),))]\n" + "Tracker(stashes=8, tracking=0)\n" ] - }, + } + ], + "source": [ + "import tensor_tracker\n", + "\n", + "with tensor_tracker.track(module) as tracker:\n", + " module(inputs).backward()\n", + "\n", + "print(tracker)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that calls are only tracked within the `with` context. Then, the tracker behaves like a list of `Stash` objects, with attached `name`, `value` etc." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[Stash(name='embed', type=, grad=False, value=tensor([[ 0.4698, 1.2426, 0.5403, -1.1454],\n", + " [-0.8425, -0.6475, -0.2189, -1.1326],\n", + " [ 0.1268, 1.3564, 0.5632, -0.1039]])),\n", + " Stash(name='project', type=, grad=False, value=tensor([[-0.6237, -0.1652, 0.3782, -0.8841],\n", + " [-0.9278, -0.2848, -0.8688, -0.4719],\n", + " [-0.3449, 0.3643, 0.3935, -0.6302]])),\n", + " Stash(name='unembed', type=, grad=False, value=tensor([[-0.2458, 1.0003, -0.8231, -0.1405, -0.2964, 0.5837, 0.2889, 0.2059,\n", + " -0.6114, -0.5916],\n", + " [-0.6345, 1.0882, -0.4304, -0.2196, -0.0426, 0.9428, 0.2051, 0.5897,\n", + " -0.2217, -0.9132],\n", + " [-0.0822, 0.9985, -0.7097, -0.3139, -0.4805, 0.6878, 0.2560, 0.3254,\n", + " -0.4447, -0.3332]])),\n", + " Stash(name='', type=, grad=False, value=tensor(2.5663)),\n", + " Stash(name='', type=, grad=True, value=(tensor(1.),)),\n", + " Stash(name='unembed', type=, grad=True, value=(tensor([[ 0.0237, 0.0824, -0.3200, 0.0263, 0.0225, 0.0543, 0.0404, 0.0372,\n", + " 0.0164, 0.0168],\n", + " [ 0.0139, 0.0779, 0.0171, 0.0211, 0.0251, 0.0673, 0.0322, -0.2860,\n", + " 0.0210, 0.0105],\n", + " [-0.3066, 0.0787, 0.0143, 0.0212, 0.0179, 0.0577, 0.0374, 0.0401,\n", + " 0.0186, 0.0208]]),)),\n", + " Stash(name='project', type=, grad=True, value=(tensor([[-0.1755, 0.1306, 0.0443, -0.1823],\n", + " [ 0.1202, -0.0728, 0.0066, -0.0839],\n", + " [-0.1863, 0.0470, -0.1055, -0.0353]]),)),\n", + " Stash(name='embed', type=, grad=True, value=(tensor([[-0.0108, 0.1086, -0.1304, -0.0370],\n", + " [ 0.0534, -0.0029, 0.0078, -0.0074],\n", + " [-0.0829, 0.0152, -0.1170, -0.0625]]),))]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "display(list(tracker))\n", + "# => [Stash(name=\"embed\", type=nn.Embedding, grad=False, value=tensor(...)),\n", + "# ...]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As a higher-level API, `to_frame` computes summary statistics, defaulting to `torch.std`." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ { "data": { "text/html": [ @@ -178,15 +235,6 @@ } ], "source": [ - "import tensor_tracker\n", - "\n", - "with tensor_tracker.track(module) as tracker:\n", - " module(inputs).backward()\n", - "\n", - "print(list(tracker))\n", - "# => [Stash(name=\"embed\", type=nn.Embedding, grad=False, value=tensor(...)),\n", - "# ...]\n", - "\n", "display(tracker.to_frame())" ] } diff --git a/tensor_tracker/core.py b/tensor_tracker/core.py index a8931cc..b07682e 100644 --- a/tensor_tracker/core.py +++ b/tensor_tracker/core.py @@ -2,7 +2,7 @@ """Utility for tracking activations and gradients at `nn.Module` outputs. -Use `track` to start tracking a module & submodule. Then use the original module +Use `track` to start tracking a module & submodules. Then use the original module as usual. Your `Tracker` will be filled with a list of `Stash`es, containing copies of fwd/bwd tensors at (sub)module outputs. (Beware, this can consume a lot of memory.)