From 377c460f8c71f0510a2fb56805103984b1deecdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Perceval=20Wajsb=C3=BCrt?= Date: Wed, 14 Feb 2024 18:04:24 +0100 Subject: [PATCH] docs: add benchmarks --- .pre-commit-config.yaml | 2 +- README.md | 31 +++++- docs/benchmark.md | 175 ++++++++++++++++++++++++++++++ scripts/benchmark.py | 228 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 434 insertions(+), 2 deletions(-) create mode 100644 docs/benchmark.md create mode 100644 scripts/benchmark.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 45fbaae..95bc64c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -31,4 +31,4 @@ repos: hooks: - id: blacken-docs additional_dependencies: [black==20.8b1] - exclude: notebooks/ + exclude: ^(notebooks/|docs/benchmark) diff --git a/README.md b/README.md index 360aacf..5559fe2 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,24 @@ pip install foldedtensor - C++ optimized code for fast data loading from Python lists and refolding - Flexibility in data representation, making it easy to switch between different layouts when needed -## Example +## Examples + +At its simplest, `foldedtensor` can be used to convert nested Python lists into a PyTorch tensor: + +```python +from foldedtensor import as_folded_tensor + +ft = as_folded_tensor( + [ + [0, 1, 2], + [3], + ], +) +# FoldedTensor([[0, 1, 2], +# [3, 0, 0]]) +``` + +You can also specify names and flattened/unflattened dimensions at the time of creation: ```python import torch @@ -54,7 +71,11 @@ ft = as_folded_tensor( print(ft) # FoldedTensor([[1, 2, 3], # [4, 3, 0]]) +``` + +Once created, you can change the shape of the tensor by refolding it: +```python # Refold on the lines and words dims (flatten the samples dim) print(ft.refold(("lines", "words"))) # FoldedTensor([[1, 0], @@ -67,7 +88,11 @@ print(ft.refold(("lines", "words"))) # Refold on the words dim only: flatten everything print(ft.refold(("words",))) # FoldedTensor([1, 2, 3, 4, 3]) +``` +The tensor can be further used with standard PyTorch operations: + +```python # Working with PyTorch operations embedder = torch.nn.Embedding(10, 16) embedding = embedder(ft.refold(("words",))) @@ -79,6 +104,10 @@ print(refolded_embedding.shape) # torch.Size([2, 5, 16]) # 2 samples, 5 words max, 16 dims ``` +## Benchmarks + +View the comparisons of `foldedtensor` against various alternatives here: [https://github.com/aphp/foldedtensor/blob/main/docs/benchmarks.md]. + ## Comparison with alternatives Unlike other ragged or nested tensor implementations, a FoldedTensor does not enforce a specific structure on the nested data, and does not require padding all dimensions. This provides the user with greater flexibility when working with data that can be arranged in multiple ways depending on the data transformation. Moreover, the C++ optimization ensures high performance, making it ideal for handling deeply nested tensors efficiently. diff --git a/docs/benchmark.md b/docs/benchmark.md new file mode 100644 index 0000000..5b04335 --- /dev/null +++ b/docs/benchmark.md @@ -0,0 +1,175 @@ +`torch.__version__ == '2.0.1'` + + +## Case 1 (pad variable lengths nested list) + +The following 3-levelled nested lists has lengths of 32, then between 50 and 100, and then between 25 and 30. +```python +nested_list = make_nested_list(32, (50, 100), (25, 30), value=1) +``` + +Comparisons: +```python +%timeit python_padding(nested_list) +# 100 loops, best of 5: 13.86 ms per loop +``` + +```python +%timeit foldedtensor.as_folded_tensor(nested_list) +# 100 loops, best of 5: 0.61 ms per loop +``` + + +## Case 2 (same lengths nested lists) + +```python +nested_list = make_nested_list(32, 100, 30, value=1) +``` + +Comparisons: +```python +%timeit torch.tensor(nested_list) +# 100 loops, best of 5: 6.77 ms per loop +``` + +```python +%timeit torch.LongTensor(nested_list) +# 100 loops, best of 5: 2.68 ms per loop +``` + +```python +%timeit python_padding(nested_list) +# 100 loops, best of 5: 17.22 ms per loop +``` + +```python +%timeit torch.nested.nested_tensor([torch.LongTensor(sub) for sub in nested_list]).to_padded_tensor(0) +# 100 loops, best of 5: 2.91 ms per loop +``` + +```python +%timeit foldedtensor.as_folded_tensor(nested_list) +# 100 loops, best of 5: 0.90 ms per loop +``` + + +## Case 3 (simple list) + +```python +simple_list = make_nested_list(10000, value=1) +``` + +Comparisons: +```python +%timeit torch.tensor(simple_list) +# 100 loops, best of 5: 0.65 ms per loop +``` + +```python +%timeit torch.LongTensor(simple_list) +# 100 loops, best of 5: 0.27 ms per loop +``` + +```python +%timeit python_padding(simple_list) +# 100 loops, best of 5: 0.27 ms per loop +``` + +```python +%timeit foldedtensor.as_folded_tensor(simple_list) +# 100 loops, best of 5: 0.07 ms per loop +``` + + +## Case 4 (same lengths nested lists to flat tensor) + +```python +nested_list = make_nested_list(32, 100, 30, value=1) +``` + +Comparisons: +```python +%timeit torch.tensor(nested_list).view(-1) +# 100 loops, best of 5: 6.59 ms per loop +``` + +```python +%timeit torch.LongTensor(nested_list).view(-1) +# 100 loops, best of 5: 2.73 ms per loop +``` + +```python +%timeit python_padding(nested_list).view(-1) +# 100 loops, best of 5: 17.14 ms per loop +``` + +```python +%timeit foldedtensor.as_folded_tensor(nested_list).view(-1) +# 100 loops, best of 5: 0.95 ms per loop +``` + +```python +%timeit foldedtensor.as_folded_tensor(nested_list, data_dims=(2,)) +# 100 loops, best of 5: 0.92 ms per loop +``` + + +## Case 5 (variable lengths nested lists) to padded embeddings + +Nested lists with different lengths (second level lists have lengths between 50 and 150) +```python +nested_list = make_nested_list(32, (50, 150), 30, value=1) +``` + +Comparisons: +### FT vs NT: padding 0 + +```python +%timeit torch.nested.nested_tensor([torch.LongTensor(sub) for sub in nested_list]).to_padded_tensor(0) +# 100 loops, best of 5: 3.25 ms per loop +``` + +```python +%timeit foldedtensor.as_folded_tensor(nested_list).as_tensor() +# 100 loops, best of 5: 1.05 ms per loop +``` + +### FT vs NT: padding 1 + +```python +%timeit torch.nested.nested_tensor([torch.FloatTensor(sub) for sub in nested_list]).to_padded_tensor(1) +# 100 loops, best of 5: 3.81 ms per loop +``` + +```python +%timeit x = foldedtensor.as_folded_tensor(nested_list); x.masked_fill_(x.mask, 1) +# 100 loops, best of 5: 1.34 ms per loop +``` + + +## Case 6 (2d padding) + +```python +nested_list = make_nested_list(160, (50, 150), value=1) +``` + +Comparisons: +```python +%timeit python_padding(nested_list) +# 100 loops, best of 5: 1.24 ms per loop +``` + +```python +%timeit torch.nested.nested_tensor([torch.LongTensor(sub) for sub in nested_list]).to_padded_tensor(0) +# 100 loops, best of 5: 1.06 ms per loop +``` + +```python +%timeit torch.nn.utils.rnn.pad_sequence([torch.LongTensor(sub) for sub in nested_list], batch_first=True, padding_value=0) +# 100 loops, best of 5: 0.77 ms per loop +``` + +```python +%timeit foldedtensor.as_folded_tensor(nested_list) +# 100 loops, best of 5: 0.12 ms per loop +``` diff --git a/scripts/benchmark.py b/scripts/benchmark.py new file mode 100644 index 0000000..b2080fd --- /dev/null +++ b/scripts/benchmark.py @@ -0,0 +1,228 @@ +# ruff: noqa: F401, E501 + +import random +import warnings +from timeit import Timer + +import foldedtensor # noqa: F401 +import torch +import torch.nested +import torch.nn.utils.rnn + +warnings.filterwarnings("ignore") + +torch.set_default_device("cpu") + + +def pad_tensors(tensors): + """ + Takes a list of `N` M-dimensional tensors (M<4) and returns a padded tensor. + + The padded tensor is `M+1` dimensional with size `N, S1, S2, ..., SM` + where `Si` is the maximum value of dimension `i` amongst all tensors. + """ + rep = tensors[0] + padded_dim = [] + for dim in range(rep.dim()): + max_dim = max([tensor.size(dim) for tensor in tensors]) + padded_dim.append(max_dim) + padded_dim = [len(tensors)] + padded_dim + padded_tensor = torch.zeros(padded_dim) + padded_tensor = padded_tensor.type_as(rep) + for i, tensor in enumerate(tensors): + size = list(tensor.size()) + if len(size) == 1: + padded_tensor[i, : size[0]] = tensor + elif len(size) == 2: + padded_tensor[i, : size[0], : size[1]] = tensor + elif len(size) == 3: + padded_tensor[i, : size[0], : size[1], : size[2]] = tensor + else: + raise ValueError("Padding is supported for upto 3D tensors at max.") + return padded_tensor + + +def python_padding(ints): + """ + Converts a nested list of integers to a padded tensor. + """ + if isinstance(ints, torch.Tensor): + return ints + if isinstance(ints, list): + if isinstance(ints[0], int): + return torch.LongTensor(ints) + if isinstance(ints[0], torch.Tensor): + return pad_tensors(ints) + if isinstance(ints[0], list): + return python_padding([python_padding(inti) for inti in ints]) + + +def make_nested_list(arg, *rest, value): + size = random.randint(*arg) if isinstance(arg, tuple) else arg + if not rest: + return [value] * size + return [make_nested_list(*rest, value=value) for _ in range(size)] + + +def exec_and_print(code): + print("```python") + print(code) + print("```") + print() + exec(code, globals(), globals()) + + +def timeit(stmt, number=100, repeat=5): + t = Timer(stmt, globals=globals()) + + if number == 0: + # determine number so that 0.2 <= total time < 2.0 + callback = None + + try: + number, _ = t.autorange(callback) + except: + t.print_exc() + return 1 + + try: + raw_timings = t.repeat(repeat, number) + except Exception as e: + t.print_exc() + return 1 + + def format_time(dt): + return f"{dt * 1000:.2f} ms" + + timings = [dt / number for dt in raw_timings] + + best = min(timings) + print("```python") + print("%timeit " + stmt) + print( + "# %d loop%s, best of %d: %s per loop" + % (number, "s" if number != 1 else "", repeat, format_time(best)) + ) + print("```\n") + + +print(f"`torch.__version__ == {torch.__version__!r}`") +print() + +if __name__ == "__main__": + # fmt: off + cases = [1, 2, 3, 4, 5, 6] + if 1 in cases: + print("\n## Case 1 (pad variable lengths nested list)\n") + + print("The following 3-levelled nested lists has lengths of 32, then " + "between 50 and 100, and then between 25 and 30.") + + exec_and_print("nested_list = make_nested_list(32, (50, 100), (25, 30), value=1)") + + print("Comparisons:") + timeit("python_padding(nested_list)") + timeit("foldedtensor.as_folded_tensor(nested_list)") + + if 2 in cases: + print("\n## Case 2 (same lengths nested lists)\n") + + exec_and_print("nested_list = make_nested_list(32, 100, 30, value=1)") + + print("Comparisons:") + timeit("torch.tensor(nested_list)") + timeit("torch.LongTensor(nested_list)") + timeit("python_padding(nested_list)") + timeit("torch.nested.nested_tensor([torch.LongTensor(sub) for sub in nested_list]).to_padded_tensor(0)") + timeit("foldedtensor.as_folded_tensor(nested_list)") + + if 3 in cases: + print("\n## Case 3 (simple list)\n") + + exec_and_print("simple_list = make_nested_list(10000, value=1)") + + print("Comparisons:") + timeit("torch.tensor(simple_list)") + timeit("torch.LongTensor(simple_list)") + timeit("python_padding(simple_list)") + timeit("foldedtensor.as_folded_tensor(simple_list)") + + if 4 in cases: + print("\n## Case 4 (same lengths nested lists to flat tensor)\n") + + exec_and_print("nested_list = make_nested_list(32, 100, 30, value=1)") + + print("Comparisons:") + timeit("torch.tensor(nested_list).view(-1)") + timeit("torch.LongTensor(nested_list).view(-1)") + timeit("python_padding(nested_list).view(-1)") + timeit("foldedtensor.as_folded_tensor(nested_list).view(-1)") + timeit("foldedtensor.as_folded_tensor(nested_list, data_dims=(2,))") + + if 5 in cases: + print("\n## Case 5 (variable lengths nested lists) to padded embeddings\n") + print("Nested lists with different lengths (second level lists have lengths " + "between 50 and 150)") + + exec_and_print("nested_list = make_nested_list(32, (50, 150), 30, value=1)") + + print("Comparisons:") + print("### FT vs NT: padding 0\n") + + timeit("torch.nested.nested_tensor([torch.LongTensor(sub) for sub in nested_list]).to_padded_tensor(0)") + timeit("foldedtensor.as_folded_tensor(nested_list).as_tensor()") + + print("### FT vs NT: padding 1\n") + timeit("torch.nested.nested_tensor([torch.FloatTensor(sub) for sub in nested_list]).to_padded_tensor(1)") + timeit("x = foldedtensor.as_folded_tensor(nested_list); x.masked_fill_(x.mask, 1)") + + if 6 in cases: + print("\n## Case 6 (2d padding)\n") + + exec_and_print("nested_list = make_nested_list(160, (50, 150), value=1)") + + print("Comparisons:") + timeit("python_padding(nested_list)") + timeit("torch.nested.nested_tensor([torch.LongTensor(sub) for sub in nested_list]).to_padded_tensor(0)") + timeit("torch.nn.utils.rnn.pad_sequence([torch.LongTensor(sub) for sub in nested_list], batch_first=True, padding_value=0)") + timeit("foldedtensor.as_folded_tensor(nested_list)") + + if 7 in cases: + + def sum_all_words_per_sample(ft): + lengths = ft.lengths + ids = torch.arange(lengths[0][0]) + for i in range(1, len(lengths)): + ids = torch.repeat_interleave( + ids, + lengths[i], + output_size=len(lengths[i + 1]) + if i < len(lengths) - 1 + else ft.size(len(ft.data_dims) - 1), + ) + + out = torch.zeros(lengths[0][0], ft.shape[-1]) + out.index_add_(source=ft.as_tensor(), dim=0, index=ids) + + return out + + + print("\n## Case 7 (flat sums)\n") + + exec_and_print( + "embedder = torch.nn.Embedding(500, 128)\n" + "nested_list = make_nested_list(320, (150, 250), value=1)\n" + "ft = foldedtensor.as_folded_tensor(nested_list).refold(2)\n" + "nt = torch.nested.nested_tensor([torch.LongTensor(sub) for sub in nested_list])\n" + "ft = embedder(ft)\n" + "nt = embedder(nt)\n" + ) + + print("Comparisons:") + timeit("nt.sum(dim=1)") + timeit("sum_all_words_per_sample(ft)") + + # timeit("embedder(ft)") + # timeit("embedder(ft).refold(0, 1)") + # timeit("embedder(nt)") + # fmt: on