Skip to content

Commit 71d9f09

Browse files
authored
Merge branch 'main' into tb-profiler-tutorial-docs-update
2 parents 5126cb8 + 16e4f2a commit 71d9f09

File tree

75 files changed

+7291
-253
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

75 files changed

+7291
-253
lines changed

.devcontainer/requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ ipython
2424
# to run examples
2525
pandas
2626
scikit-image
27-
pillow==9.3.0
27+
pillow==10.0.1
2828
wget
2929

3030
# for codespaces env

.github/scripts/docathon-label-sync.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,11 @@ def main():
2525
issue_number = int(re.findall(r'#(\d{1,5})', pull_request_body)[0])
2626
issue = repo.get_issue(issue_number)
2727
issue_labels = issue.labels
28-
docathon_label_present = any(label.name == 'docathon-h1-2023' for label in issue_labels)
28+
docathon_label_present = any(label.name == 'docathon-h2-2023' for label in issue_labels)
2929

3030
# if the issue has a docathon label, add all labels from the issue to the PR.
3131
if not docathon_label_present:
32-
print("The 'docathon-h1-2023' label is not present in the issue.")
32+
print("The 'docathon-h2-2023' label is not present in the issue.")
3333
return
3434
pull_request_labels = pull_request.get_labels()
3535
issue_label_names = [label.name for label in issue_labels]

.github/workflows/docathon-assign.yml

+16-23
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,8 @@ jobs:
99
assign:
1010
runs-on: ubuntu-latest
1111
steps:
12-
- name: Install Dependencies
13-
uses: actions/setup-node@v3
14-
with:
15-
node-version: '18'
16-
- name: Install @octokit/core
17-
run: |
18-
npm i @octokit/core @octokit/rest
1912
- name: Check for "/assigntome" in comment
20-
uses: actions/github-script@v4
13+
uses: actions/github-script@v6
2114
env:
2215
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
2316
with:
@@ -27,39 +20,39 @@ jobs:
2720
if (assignRegex.test(issueComment)) {
2821
const assignee = context.payload.comment.user.login;
2922
const issueNumber = context.payload.issue.number;
30-
const { Octokit } = require("@octokit/rest");
31-
const octokit = new Octokit({
32-
auth: process.env.GITHUB_TOKEN,
33-
});
34-
const { data: issue } = await octokit.issues.get({
35-
owner: context.repo.owner,
36-
repo: context.repo.repo,
37-
issue_number: issueNumber
38-
});
39-
const hasLabel = issue.labels.some(label => label.name === 'docathon-h1-2023');
23+
try {
24+
const { data: issue } = await github.rest.issues.get({
25+
owner: context.repo.owner,
26+
repo: context.repo.repo,
27+
issue_number: issueNumber
28+
});
29+
const hasLabel = issue.labels.some(label => label.name === 'docathon-h2-2023');
4030
if (hasLabel) {
4131
if (issue.assignee !== null) {
42-
await octokit.issues.createComment({
32+
await github.rest.issues.createComment({
4333
owner: context.repo.owner,
4434
repo: context.repo.repo,
4535
issue_number: issueNumber,
46-
body: "The issue is already assigned. Please pick an opened and unnasigned issue with the [docathon-h1-2023 label](https://github.com/pytorch/tutorials/issues?q=is%3Aopen+is%3Aissue+label%3Adocathon-h1-2023)."
36+
body: "The issue is already assigned. Please pick an opened and unnasigned issue with the [docathon-h2-2023 label](https://github.com/pytorch/pytorch/issues?q=is%3Aopen+is%3Aissue+label%3Adocathon-h2-2023)."
4737
});
4838
} else {
49-
octokit.issues.addAssignees({
39+
await github.rest.issues.addAssignees({
5040
owner: context.repo.owner,
5141
repo: context.repo.repo,
5242
issue_number: issueNumber,
5343
assignees: [assignee]
5444
});
5545
}
5646
} else {
57-
const commmentMessage = "This issue does not have the correct label. Please pick an opened and unnasigned issue with the [docathon-h1-2023 label](https://github.com/pytorch/tutorials/issues?q=is%3Aopen+is%3Aissue+label%3Adocathon-h1-2023)."
58-
await octokit.issues.createComment({
47+
const commmentMessage = "This issue does not have the correct label. Please pick an opened and unnasigned issue with the [docathon-h2-2023 label](https://github.com/pytorch/pytorch/issues?q=is%3Aopen+is%3Aissue+label%3Adocathon-h2-2023)."
48+
await github.rest.issues.createComment({
5949
owner: context.repo.owner,
6050
repo: context.repo.repo,
6151
issue_number: issueNumber,
6252
body: commmentMessage
6353
});
54+
}
55+
} catch (error) {
56+
console.error(error);
6457
}
6558
}

.jenkins/build.sh

+4-5
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,10 @@ pip install --progress-bar off -r $DIR/../requirements.txt
2424

2525
#Install PyTorch Nightly for test.
2626
# Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html
27-
# RC Link
28-
# pip uninstall -y torch torchvision torchaudio torchtext
29-
# pip install --pre --upgrade -f https://download.pytorch.org/whl/test/cu102/torch_test.html torch torchvision torchaudio torchtext
30-
# pip uninstall -y torch torchvision torchaudio torchtext
31-
# pip install --pre --upgrade -f https://download.pytorch.org/whl/test/cu116/torch_test.html torch torchdata torchvision torchaudio torchtext
27+
# Install 2.1 for testing
28+
# pip uninstall -y torch torchvision torchaudio torchtext torchdata
29+
# pip3 install torch torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu121
30+
# pip3 install torchdata torchtext --index-url https://download.pytorch.org/whl/test/cpu
3231

3332
# Install two language tokenizers for Translation with TorchText tutorial
3433
python -m spacy download en_core_web_sm

.jenkins/download_data.py

+128
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
#!/usr/bin/env python3
2+
import hashlib
3+
import os
4+
5+
from typing import Optional
6+
from urllib.request import urlopen, Request
7+
from pathlib import Path
8+
from zipfile import ZipFile
9+
10+
REPO_BASE_DIR = Path(__file__).absolute().parent.parent
11+
DATA_DIR = REPO_BASE_DIR / "_data"
12+
BEGINNER_DATA_DIR = REPO_BASE_DIR / "beginner_source" / "data"
13+
INTERMEDIATE_DATA_DIR = REPO_BASE_DIR / "intermediate_source" / "data"
14+
ADVANCED_DATA_DIR = REPO_BASE_DIR / "advanced_source" / "data"
15+
PROTOTYPE_DATA_DIR = REPO_BASE_DIR / "prototype_source" / "data"
16+
FILES_TO_RUN = os.getenv("FILES_TO_RUN")
17+
18+
19+
def size_fmt(nbytes: int) -> str:
20+
"""Returns a formatted file size string"""
21+
KB = 1024
22+
MB = 1024 * KB
23+
GB = 1024 * MB
24+
if abs(nbytes) >= GB:
25+
return f"{nbytes * 1.0 / GB:.2f} Gb"
26+
elif abs(nbytes) >= MB:
27+
return f"{nbytes * 1.0 / MB:.2f} Mb"
28+
elif abs(nbytes) >= KB:
29+
return f"{nbytes * 1.0 / KB:.2f} Kb"
30+
return str(nbytes) + " bytes"
31+
32+
33+
def download_url_to_file(url: str,
34+
dst: Optional[str] = None,
35+
prefix: Optional[Path] = None,
36+
sha256: Optional[str] = None) -> Path:
37+
dst = dst if dst is not None else Path(url).name
38+
dst = dst if prefix is None else str(prefix / dst)
39+
if Path(dst).exists():
40+
print(f"Skip downloading {url} as {dst} already exists")
41+
return Path(dst)
42+
file_size = None
43+
u = urlopen(Request(url, headers={"User-Agent": "tutorials.downloader"}))
44+
meta = u.info()
45+
if hasattr(meta, 'getheaders'):
46+
content_length = meta.getheaders("Content-Length")
47+
else:
48+
content_length = meta.get_all("Content-Length")
49+
if content_length is not None and len(content_length) > 0:
50+
file_size = int(content_length[0])
51+
sha256_sum = hashlib.sha256()
52+
with open(dst, "wb") as f:
53+
while True:
54+
buffer = u.read(32768)
55+
if len(buffer) == 0:
56+
break
57+
sha256_sum.update(buffer)
58+
f.write(buffer)
59+
digest = sha256_sum.hexdigest()
60+
if sha256 is not None and sha256 != digest:
61+
Path(dst).unlink()
62+
raise RuntimeError(f"Downloaded {url} has unexpected sha256sum {digest} should be {sha256}")
63+
print(f"Downloaded {url} sha256sum={digest} size={size_fmt(file_size)}")
64+
return Path(dst)
65+
66+
67+
def unzip(archive: Path, tgt_dir: Path) -> None:
68+
with ZipFile(str(archive), "r") as zip_ref:
69+
zip_ref.extractall(str(tgt_dir))
70+
71+
72+
def download_hymenoptera_data():
73+
# transfer learning tutorial data
74+
z = download_url_to_file("https://download.pytorch.org/tutorial/hymenoptera_data.zip",
75+
prefix=DATA_DIR,
76+
sha256="fbc41b31d544714d18dd1230b1e2b455e1557766e13e67f9f5a7a23af7c02209",
77+
)
78+
unzip(z, BEGINNER_DATA_DIR)
79+
80+
81+
def download_nlp_data() -> None:
82+
# nlp tutorial data
83+
z = download_url_to_file("https://download.pytorch.org/tutorial/data.zip",
84+
prefix=DATA_DIR,
85+
sha256="fb317e80248faeb62dc25ef3390ae24ca34b94e276bbc5141fd8862c2200bff5",
86+
)
87+
# This will unzip all files in data.zip to intermediate_source/data/ folder
88+
unzip(z, INTERMEDIATE_DATA_DIR.parent)
89+
90+
91+
def download_dcgan_data() -> None:
92+
# Download dataset for beginner_source/dcgan_faces_tutorial.py
93+
z = download_url_to_file("https://s3.amazonaws.com/pytorch-tutorial-assets/img_align_celeba.zip",
94+
prefix=DATA_DIR,
95+
sha256="46fb89443c578308acf364d7d379fe1b9efb793042c0af734b6112e4fd3a8c74",
96+
)
97+
unzip(z, BEGINNER_DATA_DIR / "celeba")
98+
99+
100+
def download_lenet_mnist() -> None:
101+
# Download model for beginner_source/fgsm_tutorial.py
102+
download_url_to_file("https://docs.google.com/uc?export=download&id=1HJV2nUHJqclXQ8flKvcWmjZ-OU5DGatl",
103+
prefix=BEGINNER_DATA_DIR,
104+
dst="lenet_mnist_model.pth",
105+
sha256="cb5f8e578aef96d5c1a2cc5695e1aa9bbf4d0fe00d25760eeebaaac6ebc2edcb",
106+
)
107+
108+
109+
def main() -> None:
110+
DATA_DIR.mkdir(exist_ok=True)
111+
BEGINNER_DATA_DIR.mkdir(exist_ok=True)
112+
ADVANCED_DATA_DIR.mkdir(exist_ok=True)
113+
INTERMEDIATE_DATA_DIR.mkdir(exist_ok=True)
114+
PROTOTYPE_DATA_DIR.mkdir(exist_ok=True)
115+
116+
if FILES_TO_RUN is None or "transfer_learning_tutorial" in FILES_TO_RUN:
117+
download_hymenoptera_data()
118+
nlp_tutorials = ["seq2seq_translation_tutorial", "char_rnn_classification_tutorial", "char_rnn_generation_tutorial"]
119+
if FILES_TO_RUN is None or any(x in FILES_TO_RUN for x in nlp_tutorials):
120+
download_nlp_data()
121+
if FILES_TO_RUN is None or "dcgan_faces_tutorial" in FILES_TO_RUN:
122+
download_dcgan_data()
123+
if FILES_TO_RUN is None or "fgsm_tutorial" in FILES_TO_RUN:
124+
download_lenet_mnist()
125+
126+
127+
if __name__ == "__main__":
128+
main()

.jenkins/get_files_to_run.py

+9-10
Original file line numberDiff line numberDiff line change
@@ -40,27 +40,26 @@ def add_to_shard(i, filename):
4040
)
4141

4242
all_other_files = all_files.copy()
43-
needs_gpu_nvidia_small_multi = list(
44-
filter(lambda x: get_needs_machine(x) == "gpu.nvidia.small.multi", all_files,)
43+
needs_multigpu = list(
44+
filter(lambda x: get_needs_machine(x) == "linux.16xlarge.nvidia.gpu", all_files,)
4545
)
46-
needs_gpu_nvidia_medium = list(
47-
filter(lambda x: get_needs_machine(x) == "gpu.nvidia.large", all_files,)
46+
needs_a10g = list(
47+
filter(lambda x: get_needs_machine(x) == "linux.g5.4xlarge.nvidia.gpu", all_files,)
4848
)
49-
for filename in needs_gpu_nvidia_small_multi:
50-
# currently, the only job that uses gpu.nvidia.small.multi is the 0th worker,
49+
for filename in needs_multigpu:
50+
# currently, the only job that has multigpu is the 0th worker,
5151
# so we'll add all the jobs that need this machine to the 0th worker
5252
add_to_shard(0, filename)
5353
all_other_files.remove(filename)
54-
for filename in needs_gpu_nvidia_medium:
55-
# currently, the only job that uses gpu.nvidia.large is the 1st worker,
54+
for filename in needs_a10g:
55+
# currently, workers 1-5 use linux.g5.4xlarge.nvidia.gpu (sm86, A10G),
5656
# so we'll add all the jobs that need this machine to the 1st worker
5757
add_to_shard(1, filename)
5858
all_other_files.remove(filename)
59-
6059
sorted_files = sorted(all_other_files, key=get_duration, reverse=True,)
6160

6261
for filename in sorted_files:
63-
min_shard_index = sorted(range(num_shards), key=lambda i: sharded_files[i][0])[
62+
min_shard_index = sorted(range(1, num_shards), key=lambda i: sharded_files[i][0])[
6463
0
6564
]
6665
add_to_shard(min_shard_index, filename)

.jenkins/metadata.json

+7-4
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,18 @@
2020
},
2121
"intermediate_source/pipeline_tutorial.py": {
2222
"duration": 320,
23-
"needs": "gpu.nvidia.small.multi"
23+
"needs": "linux.16xlarge.nvidia.gpu"
2424
},
2525
"beginner_source/blitz/data_parallel_tutorial.py": {
26-
"needs": "gpu.nvidia.small.multi"
26+
"needs": "linux.16xlarge.nvidia.gpu"
2727
},
2828
"intermediate_source/model_parallel_tutorial.py": {
29-
"needs": "gpu.nvidia.small.multi"
29+
"needs": "linux.16xlarge.nvidia.gpu"
3030
},
3131
"intermediate_source/torch_compile_tutorial.py": {
32-
"needs": "gpu.nvidia.large"
32+
"needs": "linux.g5.4xlarge.nvidia.gpu"
33+
},
34+
"intermediate_source/scaled_dot_product_attention_tutorial.py": {
35+
"needs": "linux.g5.4xlarge.nvidia.gpu"
3336
}
3437
}

.jenkins/validate_tutorials_built.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
NOT_RUN = [
1212
"beginner_source/basics/intro", # no code
13+
"beginner_source/onnx/intro_onnx",
1314
"beginner_source/translation_transformer",
1415
"beginner_source/profiler",
1516
"beginner_source/saving_loading_models",
@@ -21,10 +22,11 @@
2122
"beginner_source/former_torchies/tensor_tutorial_old",
2223
"beginner_source/examples_autograd/polynomial_autograd",
2324
"beginner_source/examples_autograd/polynomial_custom_function",
24-
"beginner_source/t5_tutorial", # re-enable after this is fixed: https://github.com/pytorch/text/issues/1756
25+
"beginner_source/t5_tutorial", # re-enable after this is fixed: https://github.com/pytorch/text/issues/1756
2526
"intermediate_source/parametrizations",
2627
"intermediate_source/mnist_train_nas", # used by ax_multiobjective_nas_tutorial.py
2728
"intermediate_source/fx_conv_bn_fuser",
29+
"intermediate_source/_torch_export_nightly_tutorial", # does not work on release
2830
"advanced_source/super_resolution_with_onnxruntime",
2931
"advanced_source/ddp_pipeline", # requires 4 gpus
3032
"prototype_source/fx_graph_mode_ptq_dynamic",

.pyspelling.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ matrix:
1919
- open: '\.\.\s+(figure|literalinclude|math|image|grid)::'
2020
close: '\n'
2121
# Exclude roles:
22-
- open: ':(?:(class|py:mod|mod|func)):`'
22+
- open: ':(?:(class|py:mod|mod|func|meth|obj)):`'
2323
content: '[^`]*'
2424
close: '`'
2525
# Exclude reStructuredText hyperlinks
@@ -70,7 +70,7 @@ matrix:
7070
- open: ':figure:.*'
7171
close: '\n'
7272
# Ignore reStructuredText roles
73-
- open: ':(?:(class|file|func|math|ref|octicon)):`'
73+
- open: ':(?:(class|file|func|math|ref|octicon|meth|obj)):`'
7474
content: '[^`]*'
7575
close: '`'
7676
- open: ':width:'

Makefile

+2-26
Original file line numberDiff line numberDiff line change
@@ -38,20 +38,8 @@ download:
3838
# Step2-2. UNTAR: tar -xzf $(DATADIR)/[SOURCE_FILE] -C [*_source/data/]
3939
# Step2-3. AS-IS: cp $(DATADIR)/[SOURCE_FILE] [*_source/data/]
4040

41-
# make data directories
42-
mkdir -p $(DATADIR)
43-
mkdir -p advanced_source/data
44-
mkdir -p beginner_source/data
45-
mkdir -p intermediate_source/data
46-
mkdir -p prototype_source/data
47-
48-
# transfer learning tutorial data
49-
wget -nv -N https://download.pytorch.org/tutorial/hymenoptera_data.zip -P $(DATADIR)
50-
unzip $(ZIPOPTS) $(DATADIR)/hymenoptera_data.zip -d beginner_source/data/
51-
52-
# nlp tutorial data
53-
wget -nv -N https://download.pytorch.org/tutorial/data.zip -P $(DATADIR)
54-
unzip $(ZIPOPTS) $(DATADIR)/data.zip -d intermediate_source/ # This will unzip all files in data.zip to intermediate_source/data/ folder
41+
# Run structured downloads first (will also make directories
42+
python3 .jenkins/download_data.py
5543

5644
# data loader tutorial
5745
wget -nv -N https://download.pytorch.org/tutorial/faces.zip -P $(DATADIR)
@@ -65,10 +53,6 @@ download:
6553
mkdir -p advanced_source/data/images/
6654
cp -r _static/img/neural-style/ advanced_source/data/images/
6755

68-
# Download dataset for beginner_source/dcgan_faces_tutorial.py
69-
wget -nv -N https://s3.amazonaws.com/pytorch-tutorial-assets/img_align_celeba.zip -P $(DATADIR)
70-
unzip $(ZIPOPTS) $(DATADIR)/img_align_celeba.zip -d beginner_source/data/celeba
71-
7256
# Download dataset for beginner_source/hybrid_frontend/introduction_to_hybrid_frontend_tutorial.py
7357
wget -nv -N https://s3.amazonaws.com/pytorch-tutorial-assets/iris.data -P $(DATADIR)
7458
cp $(DATADIR)/iris.data beginner_source/data/
@@ -77,14 +61,6 @@ download:
7761
wget -nv -N https://s3.amazonaws.com/pytorch-tutorial-assets/cornell_movie_dialogs_corpus_v2.zip -P $(DATADIR)
7862
unzip $(ZIPOPTS) $(DATADIR)/cornell_movie_dialogs_corpus_v2.zip -d beginner_source/data/
7963

80-
# Download dataset for beginner_source/audio_classifier_tutorial.py
81-
wget -nv -N https://s3.amazonaws.com/pytorch-tutorial-assets/UrbanSound8K.tar.gz -P $(DATADIR)
82-
tar $(TAROPTS) -xzf $(DATADIR)/UrbanSound8K.tar.gz -C ./beginner_source/data/
83-
84-
# Download model for beginner_source/fgsm_tutorial.py
85-
wget -nv -N 'https://docs.google.com/uc?export=download&id=1HJV2nUHJqclXQ8flKvcWmjZ-OU5DGatl' -O $(DATADIR)/lenet_mnist_model.pth
86-
cp $(DATADIR)/lenet_mnist_model.pth ./beginner_source/data/lenet_mnist_model.pth
87-
8864
# Download model for advanced_source/dynamic_quantization_tutorial.py
8965
wget -nv -N https://s3.amazonaws.com/pytorch-tutorial-assets/word_language_model_quantize.pth -P $(DATADIR)
9066
cp $(DATADIR)/word_language_model_quantize.pth advanced_source/data/word_language_model_quantize.pth

0 commit comments

Comments
 (0)