From a1668ee8bb9a0fb45f9703f928a803ff157e3dc9 Mon Sep 17 00:00:00 2001 From: Oliver Koenig Date: Thu, 7 Nov 2024 19:25:30 +0100 Subject: [PATCH 01/21] ci: Configure PyLint to run on new files Signed-off-by: Oliver Koenig --- nemo/core/classes/dataset.py | 2 +- nemo/core/classes/exportable.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nemo/core/classes/dataset.py b/nemo/core/classes/dataset.py index 3e8652367734..57fd2a9ba799 100644 --- a/nemo/core/classes/dataset.py +++ b/nemo/core/classes/dataset.py @@ -24,7 +24,7 @@ class Dataset(data.Dataset, Typing, Serialization): """Dataset with output ports - Please Note: Subclasses of IterableDataset should *not* implement input_types. + Please Note: Subclasses of IterableDataset should *not* implement input_types.. """ def _collate_fn(self, batch): diff --git a/nemo/core/classes/exportable.py b/nemo/core/classes/exportable.py index aab09d42d907..5862163bfbfb 100644 --- a/nemo/core/classes/exportable.py +++ b/nemo/core/classes/exportable.py @@ -39,7 +39,7 @@ class Exportable(ABC): """ This Interface should be implemented by particular classes derived from nemo.core.NeuralModule or nemo.core.ModelPT. - It gives these entities ability to be exported for deployment to formats such as ONNX. + It gives these entities ability to be exported for deployment to formats such as ONNX.. Usage: # exporting pre-trained model to ONNX file for deployment. From 2e8b8fdc1e890e45349886d5ea62331dba2ec051 Mon Sep 17 00:00:00 2001 From: Oliver Koenig Date: Thu, 7 Nov 2024 19:25:43 +0100 Subject: [PATCH 02/21] test Signed-off-by: Oliver Koenig --- .github/workflows/code-formatting.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/code-formatting.yml b/.github/workflows/code-formatting.yml index 5484da5fdcd9..883fcb529637 100644 --- a/.github/workflows/code-formatting.yml +++ b/.github/workflows/code-formatting.yml @@ -80,6 +80,8 @@ jobs: run: | pip install pylint + echo "$CHANGED_FILES" + ADDITIONAL_PYLINT_ARGS=() echo ${{ github.event.pull_request.labels.*.name }} From 85105a9370c8f4c3b3ce9564c0022818673bd9be Mon Sep 17 00:00:00 2001 From: Oliver Koenig Date: Thu, 7 Nov 2024 19:27:09 +0100 Subject: [PATCH 03/21] fix Signed-off-by: Oliver Koenig --- .github/workflows/code-formatting.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/code-formatting.yml b/.github/workflows/code-formatting.yml index 883fcb529637..a292e7f97d10 100644 --- a/.github/workflows/code-formatting.yml +++ b/.github/workflows/code-formatting.yml @@ -10,7 +10,7 @@ name: Isort and Black Formatting; PyLint Docs check # For details see https://github.com/orgs/community/discussions/25702 on: - pull_request_target: + pull_request: #pull_request_target: paths: - '**.py' types: [ opened, synchronize, reopened, labeled, unlabeled ] From 3cbc4f37153b77c639810d42d74f3d5a44aa336f Mon Sep 17 00:00:00 2001 From: Oliver Koenig Date: Thu, 7 Nov 2024 19:58:11 +0100 Subject: [PATCH 04/21] fix Signed-off-by: Oliver Koenig --- .github/workflows/code-formatting.yml | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/.github/workflows/code-formatting.yml b/.github/workflows/code-formatting.yml index a292e7f97d10..fc0668c337ce 100644 --- a/.github/workflows/code-formatting.yml +++ b/.github/workflows/code-formatting.yml @@ -80,10 +80,19 @@ jobs: run: | pip install pylint - echo "$CHANGED_FILES" + THRESHOLD=1731004695 # 2024/11/07 - ADDITIONAL_PYLINT_ARGS=() - echo ${{ github.event.pull_request.labels.*.name }} + FILTERED=() + for file in $CHANGED_FILES; do + DATE=$(git log --follow --format=%ad --date=unix $file | tail -1) + if [[ "$DATE" -lt "$THRESHOLD" ]]; then + FILTERED+=("$file") + fi + done + + echo "Will run on these files: + ${FILTERED[@]}" - pylint ${ADDITIONAL_PYLINT_ARGS[@]} $CHANGED_FILES || \ + echo ${{ github.event.pull_request.labels.*.name }} + pylint ${FILTERED[@]} || \ { echo "Pylint failed. In case of long strings, format docstrings and other strings manually."; exit 1; } From 39eb5e1b1c307d1898b88b4b6eec20d5c7be99f4 Mon Sep 17 00:00:00 2001 From: Oliver Koenig Date: Thu, 7 Nov 2024 20:20:56 +0100 Subject: [PATCH 05/21] fix Signed-off-by: Oliver Koenig --- .github/workflows/code-formatting.yml | 58 ++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 6 deletions(-) diff --git a/.github/workflows/code-formatting.yml b/.github/workflows/code-formatting.yml index fc0668c337ce..3a4623a4e331 100644 --- a/.github/workflows/code-formatting.yml +++ b/.github/workflows/code-formatting.yml @@ -20,7 +20,7 @@ defaults: shell: bash -x -e -u -o pipefail {0} jobs: - reformat_with_isort_and_black_and_pylint: + reformat_with_isort_and_black: runs-on: ubuntu-latest permissions: # write permissions required to commit changes @@ -72,27 +72,73 @@ jobs: message: Apply isort and black reformatting commit: --signoff + check_pylint: + runs-on: ubuntu-latest + permissions: + # write permissions required to commit changes + contents: write + env: + THRESHOLD: 1731004695 # On this date (2024/11/07) we decided to add Pylint. It shall only run in strict mode for files added past this date. For files prior to this date, we will only add a PR comment with PyLint's stdout. + strategy: + matrix: + strict-mode: [true, false] + steps: + - name: Checkout branch + uses: actions/checkout@v4 + with: + # setup repository and ref for PRs, see + # https://github.com/EndBug/add-and-commit?tab=readme-ov-file#working-with-prs + repository: ${{ github.event.pull_request.head.repo.full_name }} + ref: ${{ github.event.pull_request.head.ref }} + + # https://github.com/tj-actions/changed-files + - name: Get changed files + id: changed-files + uses: tj-actions/changed-files@v44 + with: + files: | + **.py + + - name: Setup Python env + uses: actions/setup-python@v5 + with: + python-version: "3.10" + - name: pylint if: ${{ steps.changed-files.outputs.any_changed == 'true' && !contains( github.event.pull_request.labels.*.name, 'skip-docs') }} env: # only *.py files included + STRICT_MODE: ${{ matrix.strict-mode }} CHANGED_FILES: "${{ steps.changed-files.outputs.all_changed_files }}" run: | pip install pylint - THRESHOLD=1731004695 # 2024/11/07 - FILTERED=() for file in $CHANGED_FILES; do DATE=$(git log --follow --format=%ad --date=unix $file | tail -1) - if [[ "$DATE" -lt "$THRESHOLD" ]]; then + + if [[ "$STRICT_MODE" == "true" ]]; then + if [[ "$DATE" -gt "$THRESHOLD" ]]; then + FILTERED+=("$file") + fi + else FILTERED+=("$file") fi done + if [ ${#array[@]} -eq 0 ]; then + echo "No files to check." + fi + echo "Will run on these files: ${FILTERED[@]}" echo ${{ github.event.pull_request.labels.*.name }} - pylint ${FILTERED[@]} || \ - { echo "Pylint failed. In case of long strings, format docstrings and other strings manually."; exit 1; } + set +e + LOG=$(pylint ${FILTERED[@]}) + EXIT_CODE=$? + set -e + + + echo "$LOG" + exit $([[ "$EXIT_CODE" -ne 0 && "$STRICT_MODE" == "true" ]] && echo $EXIT_CODE || echo 0) From d5f10645c9893839be26c3c821081ccce2a38621 Mon Sep 17 00:00:00 2001 From: Oliver Koenig Date: Thu, 7 Nov 2024 20:24:09 +0100 Subject: [PATCH 06/21] fix Signed-off-by: Oliver Koenig --- .github/workflows/code-formatting.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/code-formatting.yml b/.github/workflows/code-formatting.yml index 3a4623a4e331..0d1f1f21ec46 100644 --- a/.github/workflows/code-formatting.yml +++ b/.github/workflows/code-formatting.yml @@ -81,7 +81,7 @@ jobs: THRESHOLD: 1731004695 # On this date (2024/11/07) we decided to add Pylint. It shall only run in strict mode for files added past this date. For files prior to this date, we will only add a PR comment with PyLint's stdout. strategy: matrix: - strict-mode: [true, false] + strict-mode: ["true", "false"] steps: - name: Checkout branch uses: actions/checkout@v4 @@ -126,7 +126,7 @@ jobs: fi done - if [ ${#array[@]} -eq 0 ]; then + if [ ${#FILTERED[@]} -eq 0 ]; then echo "No files to check." fi From 7cc7306cc2b8892d2b721ab8d3690a47be8b1d18 Mon Sep 17 00:00:00 2001 From: Oliver Koenig Date: Thu, 7 Nov 2024 20:24:52 +0100 Subject: [PATCH 07/21] fix Signed-off-by: Oliver Koenig --- .github/workflows/code-formatting.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/code-formatting.yml b/.github/workflows/code-formatting.yml index 0d1f1f21ec46..bfcf9e3a70df 100644 --- a/.github/workflows/code-formatting.yml +++ b/.github/workflows/code-formatting.yml @@ -128,6 +128,7 @@ jobs: if [ ${#FILTERED[@]} -eq 0 ]; then echo "No files to check." + exit 0 fi echo "Will run on these files: From f82f3fe36405ab2a76ca89b9c2b7623228eaa76d Mon Sep 17 00:00:00 2001 From: Oliver Koenig Date: Thu, 7 Nov 2024 21:16:59 +0100 Subject: [PATCH 08/21] fix Signed-off-by: Oliver Koenig --- .github/workflows/code-formatting.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/code-formatting.yml b/.github/workflows/code-formatting.yml index bfcf9e3a70df..7f62575ec780 100644 --- a/.github/workflows/code-formatting.yml +++ b/.github/workflows/code-formatting.yml @@ -73,6 +73,7 @@ jobs: commit: --signoff check_pylint: + name: "check_pylint (strict-mode: ${{ matrix.strict-mode }})" runs-on: ubuntu-latest permissions: # write permissions required to commit changes From ab0b3e5a0b93a9c6594270fb83de24e2fbd3f5e8 Mon Sep 17 00:00:00 2001 From: Oliver Koenig Date: Thu, 7 Nov 2024 21:25:07 +0100 Subject: [PATCH 09/21] fix Signed-off-by: Oliver Koenig --- .github/workflows/code-formatting.yml | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/.github/workflows/code-formatting.yml b/.github/workflows/code-formatting.yml index 7f62575ec780..e6beb8377be6 100644 --- a/.github/workflows/code-formatting.yml +++ b/.github/workflows/code-formatting.yml @@ -107,6 +107,7 @@ jobs: - name: pylint if: ${{ steps.changed-files.outputs.any_changed == 'true' && !contains( github.event.pull_request.labels.*.name, 'skip-docs') }} + id: pylint env: # only *.py files included STRICT_MODE: ${{ matrix.strict-mode }} @@ -141,6 +142,22 @@ jobs: EXIT_CODE=$? set -e - echo "$LOG" + if [[ "$EXIT_CODE" -ne 0 && "$STRICT_MODE" == "true" ]]; then + echo $EXIT_CODE + else + echo "OUTPUT<> $GITHUB_ENV + echo "$LOG" >> $GITHUB_ENV + echo "EOF" >> $GITHUB_ENV + + echo "log=$LOG" + fi exit $([[ "$EXIT_CODE" -ne 0 && "$STRICT_MODE" == "true" ]] && echo $EXIT_CODE || echo 0) + + - name: Add PR comment for PyLint + if: ${{ matrix.strict-mode == 'false' && env.OUTPUT != '' }} + uses: peter-evans/create-or-update-comment@v4 + with: + issue-number: ${{ github.event.number }} + body: | + ${{ env.OUTPUT }} \ No newline at end of file From 5f51ab87fdcec30a5b5144e9e3dec1b0064a22d8 Mon Sep 17 00:00:00 2001 From: Oliver Koenig Date: Thu, 7 Nov 2024 21:27:53 +0100 Subject: [PATCH 10/21] fix Signed-off-by: Oliver Koenig --- .github/workflows/code-formatting.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/code-formatting.yml b/.github/workflows/code-formatting.yml index e6beb8377be6..63433e74a9c7 100644 --- a/.github/workflows/code-formatting.yml +++ b/.github/workflows/code-formatting.yml @@ -160,4 +160,4 @@ jobs: with: issue-number: ${{ github.event.number }} body: | - ${{ env.OUTPUT }} \ No newline at end of file + "${{ env.OUTPUT }}" \ No newline at end of file From de6751f86b85a94a6d778a88f8be8b2933ef1569 Mon Sep 17 00:00:00 2001 From: Oliver Koenig Date: Thu, 7 Nov 2024 21:28:58 +0100 Subject: [PATCH 11/21] fix Signed-off-by: Oliver Koenig --- .github/workflows/code-formatting.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/code-formatting.yml b/.github/workflows/code-formatting.yml index 63433e74a9c7..8838321ef1ad 100644 --- a/.github/workflows/code-formatting.yml +++ b/.github/workflows/code-formatting.yml @@ -159,5 +159,4 @@ jobs: uses: peter-evans/create-or-update-comment@v4 with: issue-number: ${{ github.event.number }} - body: | - "${{ env.OUTPUT }}" \ No newline at end of file + body: hello, world \ No newline at end of file From 3476e97869462eaa2509ec160d3041dcc77d91d4 Mon Sep 17 00:00:00 2001 From: Oliver Koenig Date: Thu, 7 Nov 2024 21:33:14 +0100 Subject: [PATCH 12/21] permissions Signed-off-by: Oliver Koenig --- .github/workflows/code-formatting.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/code-formatting.yml b/.github/workflows/code-formatting.yml index 8838321ef1ad..8dc00820f3b5 100644 --- a/.github/workflows/code-formatting.yml +++ b/.github/workflows/code-formatting.yml @@ -19,6 +19,10 @@ defaults: run: shell: bash -x -e -u -o pipefail {0} +permissions: + contents: write + pull-requests: write + jobs: reformat_with_isort_and_black: runs-on: ubuntu-latest From 913c7c80f5c051c330133011f8c0107186e89246 Mon Sep 17 00:00:00 2001 From: Oliver Koenig Date: Thu, 7 Nov 2024 21:34:12 +0100 Subject: [PATCH 13/21] fix Signed-off-by: Oliver Koenig --- .github/workflows/code-formatting.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/code-formatting.yml b/.github/workflows/code-formatting.yml index 8dc00820f3b5..41b81113e6a3 100644 --- a/.github/workflows/code-formatting.yml +++ b/.github/workflows/code-formatting.yml @@ -19,10 +19,6 @@ defaults: run: shell: bash -x -e -u -o pipefail {0} -permissions: - contents: write - pull-requests: write - jobs: reformat_with_isort_and_black: runs-on: ubuntu-latest @@ -80,8 +76,8 @@ jobs: name: "check_pylint (strict-mode: ${{ matrix.strict-mode }})" runs-on: ubuntu-latest permissions: - # write permissions required to commit changes contents: write + pull-requests: write env: THRESHOLD: 1731004695 # On this date (2024/11/07) we decided to add Pylint. It shall only run in strict mode for files added past this date. For files prior to this date, we will only add a PR comment with PyLint's stdout. strategy: From eb24944cc2a48813bcf56b00f9f4fc4e582f6d81 Mon Sep 17 00:00:00 2001 From: Oliver Koenig Date: Thu, 7 Nov 2024 21:36:14 +0100 Subject: [PATCH 14/21] fix Signed-off-by: Oliver Koenig --- .github/workflows/code-formatting.yml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/code-formatting.yml b/.github/workflows/code-formatting.yml index 41b81113e6a3..12b287f9863d 100644 --- a/.github/workflows/code-formatting.yml +++ b/.github/workflows/code-formatting.yml @@ -159,4 +159,11 @@ jobs: uses: peter-evans/create-or-update-comment@v4 with: issue-number: ${{ github.event.number }} - body: hello, world \ No newline at end of file + body: | + Your code was analyzed with PyLint. The following annotations have been identified: + + ``` + ${{ env.OUTPUT }} + ``` + + Please help us to raise the code-quality of NeMo! \ No newline at end of file From 9a9a646debd2632ede959145e7f9c098d7a33665 Mon Sep 17 00:00:00 2001 From: Oliver Koenig Date: Thu, 7 Nov 2024 21:37:30 +0100 Subject: [PATCH 15/21] fix Signed-off-by: Oliver Koenig --- .github/workflows/code-formatting.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/code-formatting.yml b/.github/workflows/code-formatting.yml index 12b287f9863d..5ca56a00a6e1 100644 --- a/.github/workflows/code-formatting.yml +++ b/.github/workflows/code-formatting.yml @@ -158,7 +158,6 @@ jobs: if: ${{ matrix.strict-mode == 'false' && env.OUTPUT != '' }} uses: peter-evans/create-or-update-comment@v4 with: - issue-number: ${{ github.event.number }} body: | Your code was analyzed with PyLint. The following annotations have been identified: From 108fbefcddf7d76f39e2168a172388951d3f0297 Mon Sep 17 00:00:00 2001 From: Oliver Koenig Date: Thu, 7 Nov 2024 22:24:15 +0100 Subject: [PATCH 16/21] final Signed-off-by: Oliver Koenig --- .github/workflows/code-formatting.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.github/workflows/code-formatting.yml b/.github/workflows/code-formatting.yml index 5ca56a00a6e1..c576496061b6 100644 --- a/.github/workflows/code-formatting.yml +++ b/.github/workflows/code-formatting.yml @@ -154,11 +154,22 @@ jobs: fi exit $([[ "$EXIT_CODE" -ne 0 && "$STRICT_MODE" == "true" ]] && echo $EXIT_CODE || echo 0) + - name: Find Comment + if: ${{ matrix.strict-mode == 'false' && env.OUTPUT != '' }} + uses: peter-evans/find-comment@v3 + id: fc + with: + issue-number: ${{ github.event.number }} + body-includes: + - name: Add PR comment for PyLint if: ${{ matrix.strict-mode == 'false' && env.OUTPUT != '' }} uses: peter-evans/create-or-update-comment@v4 with: + comment-id: ${{ steps.fc.outputs.comment-id }} body: | + + Your code was analyzed with PyLint. The following annotations have been identified: ``` From 51787f1952f7d9da389d7790b96a39b6039200f8 Mon Sep 17 00:00:00 2001 From: Oliver Koenig Date: Thu, 7 Nov 2024 22:25:10 +0100 Subject: [PATCH 17/21] fix Signed-off-by: Oliver Koenig --- .github/workflows/code-formatting.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/code-formatting.yml b/.github/workflows/code-formatting.yml index c576496061b6..de2274939fc3 100644 --- a/.github/workflows/code-formatting.yml +++ b/.github/workflows/code-formatting.yml @@ -167,6 +167,7 @@ jobs: uses: peter-evans/create-or-update-comment@v4 with: comment-id: ${{ steps.fc.outputs.comment-id }} + issue-number: ${{ github.event.number }} body: | From 3b3d67578a1fc9303ecc0b9ab80a63b398d896ca Mon Sep 17 00:00:00 2001 From: Oliver Koenig Date: Thu, 7 Nov 2024 22:28:44 +0100 Subject: [PATCH 18/21] fix Signed-off-by: Oliver Koenig --- .github/workflows/code-formatting.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/code-formatting.yml b/.github/workflows/code-formatting.yml index de2274939fc3..c1bfb95396d2 100644 --- a/.github/workflows/code-formatting.yml +++ b/.github/workflows/code-formatting.yml @@ -168,6 +168,7 @@ jobs: with: comment-id: ${{ steps.fc.outputs.comment-id }} issue-number: ${{ github.event.number }} + edit-mode: replace body: | From 8151761c80b5e3430b5ac9b67d5a96fdd032d168 Mon Sep 17 00:00:00 2001 From: Oliver Koenig Date: Thu, 7 Nov 2024 22:30:32 +0100 Subject: [PATCH 19/21] fix Signed-off-by: Oliver Koenig --- .github/workflows/code-formatting.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/code-formatting.yml b/.github/workflows/code-formatting.yml index c1bfb95396d2..889e3cff7e09 100644 --- a/.github/workflows/code-formatting.yml +++ b/.github/workflows/code-formatting.yml @@ -136,7 +136,6 @@ jobs: echo "Will run on these files: ${FILTERED[@]}" - echo ${{ github.event.pull_request.labels.*.name }} set +e LOG=$(pylint ${FILTERED[@]}) EXIT_CODE=$? From 6723a5443cd387c4a49a24e4da13a7b24d3a3b0a Mon Sep 17 00:00:00 2001 From: Oliver Koenig Date: Thu, 7 Nov 2024 22:34:43 +0100 Subject: [PATCH 20/21] add a new file Signed-off-by: Oliver Koenig --- nemo/core/classes/dataset2.py | 108 ++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 nemo/core/classes/dataset2.py diff --git a/nemo/core/classes/dataset2.py b/nemo/core/classes/dataset2.py new file mode 100644 index 000000000000..57fd2a9ba799 --- /dev/null +++ b/nemo/core/classes/dataset2.py @@ -0,0 +1,108 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from dataclasses import dataclass +from typing import Optional + +from torch.utils import data + +from nemo.core.classes import Serialization, Typing, typecheck + +__all__ = ['Dataset', 'IterableDataset'] + + +class Dataset(data.Dataset, Typing, Serialization): + """Dataset with output ports + + Please Note: Subclasses of IterableDataset should *not* implement input_types.. + """ + + def _collate_fn(self, batch): + """ + A default implementation of a collation function. + Users should override this method to define custom data loaders. + """ + return data.dataloader.default_collate(batch) + + @typecheck() + def collate_fn(self, batch): + """ + This is the method that user pass as functor to DataLoader. + The method optionally performs neural type checking and add types to the outputs. + + Please note, subclasses of Dataset should not implement `input_types`. + + Usage: + + .. code-block:: python + + dataloader = torch.utils.data.DataLoader( + ...., + collate_fn=dataset.collate_fn, + .... + ) + + Returns: + Collated batch, with or without types. + """ + if self.input_types is not None: + raise TypeError("Datasets should not implement `input_types` as they are not checked") + + # Simply forward the inner `_collate_fn` + return self._collate_fn(batch) + + +class IterableDataset(data.IterableDataset, Typing, Serialization): + """Iterable Dataset with output ports + + Please Note: Subclasses of IterableDataset should *not* implement input_types. + """ + + def _collate_fn(self, batch): + """ + A default implementation of a collation function. + Users should override this method to define custom data loaders. + """ + return data.dataloader.default_collate(batch) + + @typecheck() + def collate_fn(self, batch): + """ + This is the method that user pass as functor to DataLoader. + The method optionally performs neural type checking and add types to the outputs. + + # Usage: + dataloader = torch.utils.data.DataLoader( + ...., + collate_fn=dataset.collate_fn, + .... + ) + + Returns: + Collated batch, with or without types. + """ + if self.input_types is not None: + raise TypeError("Datasets should not implement `input_types` as they are not checked") + + # Simply forward the inner `_collate_fn` + return self._collate_fn(batch) + + +@dataclass +class DatasetConfig: + # ... + batch_size: int = 32 + drop_last: bool = False + shuffle: bool = False + num_workers: Optional[int] = 0 + pin_memory: bool = True From 0c058803cbd973e68b70bf91efd9628f26cdd496 Mon Sep 17 00:00:00 2001 From: Oliver Koenig Date: Thu, 7 Nov 2024 22:36:51 +0100 Subject: [PATCH 21/21] fix timestamp Signed-off-by: Oliver Koenig --- .github/workflows/code-formatting.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/code-formatting.yml b/.github/workflows/code-formatting.yml index 889e3cff7e09..96bf5c8d5514 100644 --- a/.github/workflows/code-formatting.yml +++ b/.github/workflows/code-formatting.yml @@ -79,7 +79,7 @@ jobs: contents: write pull-requests: write env: - THRESHOLD: 1731004695 # On this date (2024/11/07) we decided to add Pylint. It shall only run in strict mode for files added past this date. For files prior to this date, we will only add a PR comment with PyLint's stdout. + THRESHOLD: 1730937600 # On this date (2024/11/07) we decided to add Pylint. It shall only run in strict mode for files added past this date. For files prior to this date, we will only add a PR comment with PyLint's stdout. strategy: matrix: strict-mode: ["true", "false"]