Skip to content

Commit 9e7d160

Browse files
authored
Merge branch 'pytorch:main' into main
2 parents aafe428 + 78933b1 commit 9e7d160

File tree

5 files changed

+49
-19
lines changed

5 files changed

+49
-19
lines changed

.jenkins/validate_tutorials_built.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@
5353
"intermediate_source/tensorboard_profiler_tutorial", # reenable after 2.0 release.
5454
"advanced_source/semi_structured_sparse", # reenable after 3303 is fixed.
5555
"intermediate_source/torchrec_intro_tutorial", # reenable after 3302 is fixe
56-
"intermediate_source/memory_format_tutorial", # causes other tutorials like torch_logs fail. "state" issue, reseting dynamo didn't help
5756
]
5857

5958
def tutorial_source_dirs() -> List[Path]:

conf.py

Lines changed: 40 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,6 @@
3333
sys.path.insert(0, os.path.abspath('./.jenkins'))
3434
import pytorch_sphinx_theme
3535
import torch
36-
import numpy
37-
import gc
3836
import glob
3937
import random
4038
import shutil
@@ -49,6 +47,46 @@
4947
pio.renderers.default = 'sphinx_gallery'
5048

5149

50+
import sphinx_gallery.gen_rst
51+
import multiprocessing
52+
53+
# Monkey patch sphinx gallery to run each example in an isolated process so that
54+
# we don't need to worry about examples changing global state.
55+
#
56+
# Alt option 1: Parallelism was added to sphinx gallery (a later version that we
57+
# are not using yet) using joblib, but it seems to result in errors for us, and
58+
# it has no effect if you set parallel = 1 (it will not put each file run into
59+
# its own process and run singly) so you need parallel >= 2, and there may be
60+
# tutorials that cannot be run in parallel.
61+
#
62+
# Alt option 2: Run sphinx gallery once per file (similar to how we shard in CI
63+
# but with shard sizes of 1), but running sphinx gallery for each file has a
64+
# ~5min overhead, resulting in the entire suite taking ~2x time
65+
def call_fn(func, args, kwargs, result_queue):
66+
try:
67+
result = func(*args, **kwargs)
68+
result_queue.put((True, result))
69+
except Exception as e:
70+
result_queue.put((False, str(e)))
71+
72+
def call_in_subprocess(func):
73+
def wrapper(*args, **kwargs):
74+
result_queue = multiprocessing.Queue()
75+
p = multiprocessing.Process(
76+
target=call_fn,
77+
args=(func, args, kwargs, result_queue)
78+
)
79+
p.start()
80+
p.join()
81+
success, result = result_queue.get()
82+
if success:
83+
return result
84+
else:
85+
raise RuntimeError(f"Error in subprocess: {result}")
86+
return wrapper
87+
88+
sphinx_gallery.gen_rst.generate_file_rst = call_in_subprocess(sphinx_gallery.gen_rst.generate_file_rst)
89+
5290
try:
5391
import torchvision
5492
except ImportError:
@@ -97,20 +135,6 @@
97135

98136
# -- Sphinx-gallery configuration --------------------------------------------
99137

100-
def reset_seeds(gallery_conf, fname):
101-
torch.cuda.empty_cache()
102-
torch.backends.cudnn.deterministic = True
103-
torch.backends.cudnn.benchmark = False
104-
torch._dynamo.reset()
105-
torch._inductor.config.force_disable_caches = True
106-
torch.manual_seed(42)
107-
torch.set_default_device(None)
108-
random.seed(10)
109-
numpy.random.seed(10)
110-
torch.set_grad_enabled(True)
111-
112-
gc.collect()
113-
114138
sphinx_gallery_conf = {
115139
'examples_dirs': ['beginner_source', 'intermediate_source',
116140
'advanced_source', 'recipes_source', 'prototype_source'],
@@ -121,7 +145,6 @@ def reset_seeds(gallery_conf, fname):
121145
'first_notebook_cell': ("# For tips on running notebooks in Google Colab, see\n"
122146
"# https://pytorch.org/tutorials/beginner/colab\n"
123147
"%matplotlib inline"),
124-
'reset_modules': (reset_seeds),
125148
'ignore_pattern': r'_torch_export_nightly_tutorial.py',
126149
'pypandoc': {'extra_args': ['--mathjax', '--toc'],
127150
'filters': ['.jenkins/custom_pandoc_filter.py'],

intermediate_source/transformer_building_blocks.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
"""
2+
.. meta::
3+
:description: Learn how to optimize transformer models by replacing nn.Transformer with Nested Tensors and torch.compile() for significant performance gains in PyTorch.
4+
25
Accelerating PyTorch Transformers by replacing ``nn.Transformer`` with Nested Tensors and ``torch.compile()``
36
=============================================================================================================
47
**Author:** `Mikayla Gawarecki <https://github.com/mikaylagawarecki>`_

prototype_source/nestedtensor.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,3 +369,8 @@ def benchmark(func, *args, **kwargs):
369369
# how implement multi-head attention for transformers in a way that avoids computation on padding.
370370
# For more information, check out the docs for the
371371
# `torch.nested <https://pytorch.org/docs/stable/nested.html>`__ namespace.
372+
#
373+
# See Also
374+
# --------
375+
#
376+
# * `Accelerating PyTorch Transformers by replacing nn.Transformer with Nested Tensors and torch.compile() <https://docs.pytorch.org/tutorials/intermediate/transformer_building_blocks.html`__

prototype_source/prototype_index.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ Prototype features are not available as part of binary distributions like PyPI o
273273
:header: (prototype) Using GPUDirect Storage
274274
:card_description: Learn how to use GPUDirect Storage in PyTorch.
275275
:image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png
276-
:link: ../prototype/gpudirect_storage.html
276+
:link: ../prototype/gpu_direct_storage.html
277277
:tags: GPUDirect-Storage
278278

279279
.. End of tutorial card section

0 commit comments

Comments
 (0)