From f233ffa02ae248e4ad2c526d5c35c4a9ade601f5 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sat, 7 Sep 2024 18:17:04 +0800 Subject: [PATCH 1/9] Add docker images for torch 2.4.1 (#1743) --- .../scripts/docker/generate_build_matrix.py | 6 +- .github/workflows/build-docker-image.yml | 2 +- docker/torch2.4.1-cuda11.8.dockerfile | 73 +++++++++++++++++++ docker/torch2.4.1-cuda12.1.dockerfile | 73 +++++++++++++++++++ docker/torch2.4.1-cuda12.4.dockerfile | 73 +++++++++++++++++++ docs/source/docker/intro.rst | 6 ++ 6 files changed, 231 insertions(+), 2 deletions(-) create mode 100644 docker/torch2.4.1-cuda11.8.dockerfile create mode 100644 docker/torch2.4.1-cuda12.1.dockerfile create mode 100644 docker/torch2.4.1-cuda12.4.dockerfile diff --git a/.github/scripts/docker/generate_build_matrix.py b/.github/scripts/docker/generate_build_matrix.py index 5a763e0441..492d3ed475 100755 --- a/.github/scripts/docker/generate_build_matrix.py +++ b/.github/scripts/docker/generate_build_matrix.py @@ -45,7 +45,7 @@ def get_torchaudio_version(torch_version): def get_matrix(): k2_version = "1.24.4.dev20240223" kaldifeat_version = "1.25.4.dev20240223" - version = "20240725" + version = "20240905" python_version = ["3.8", "3.9", "3.10", "3.11", "3.12"] torch_version = [] torch_version += ["1.13.0", "1.13.1"] @@ -54,6 +54,7 @@ def get_matrix(): torch_version += ["2.2.0", "2.2.1", "2.2.2"] torch_version += ["2.3.0", "2.3.1"] torch_version += ["2.4.0"] + torch_version += ["2.4.1"] matrix = [] for p in python_version: @@ -82,6 +83,9 @@ def get_matrix(): elif t == "2.4.0": k2_version_2 = "1.24.4.dev20240725" kaldifeat_version_2 = "1.25.4.dev20240725" + elif t == "2.4.1": + k2_version_2 = "1.24.4.dev20240905" + kaldifeat_version_2 = "1.25.4.dev20240905" matrix.append( { diff --git a/.github/workflows/build-docker-image.yml b/.github/workflows/build-docker-image.yml index 77480bd3e4..a473590a38 100644 --- a/.github/workflows/build-docker-image.yml +++ b/.github/workflows/build-docker-image.yml @@ -16,7 +16,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest] - image: ["torch2.4.0-cuda12.4", "torch2.4.0-cuda12.1", "torch2.4.0-cuda11.8", "torch2.3.1-cuda12.1", "torch2.3.1-cuda11.8", "torch2.2.2-cuda12.1", "torch2.2.2-cuda11.8", "torch2.2.1-cuda12.1", "torch2.2.1-cuda11.8", "torch2.2.0-cuda12.1", "torch2.2.0-cuda11.8", "torch2.1.0-cuda12.1", "torch2.1.0-cuda11.8", "torch2.0.0-cuda11.7", "torch1.13.0-cuda11.6", "torch1.12.1-cuda11.3", "torch1.9.0-cuda10.2"] + image: ["torch2.4.1-cuda12.4", "torch2.4.1-cuda12.1", "torch2.4.1-cuda11.8", "torch2.4.0-cuda12.4", "torch2.4.0-cuda12.1", "torch2.4.0-cuda11.8", "torch2.3.1-cuda12.1", "torch2.3.1-cuda11.8", "torch2.2.2-cuda12.1", "torch2.2.2-cuda11.8", "torch2.2.1-cuda12.1", "torch2.2.1-cuda11.8", "torch2.2.0-cuda12.1", "torch2.2.0-cuda11.8", "torch2.1.0-cuda12.1", "torch2.1.0-cuda11.8", "torch2.0.0-cuda11.7", "torch1.13.0-cuda11.6", "torch1.12.1-cuda11.3", "torch1.9.0-cuda10.2"] steps: # refer to https://github.com/actions/checkout diff --git a/docker/torch2.4.1-cuda11.8.dockerfile b/docker/torch2.4.1-cuda11.8.dockerfile new file mode 100644 index 0000000000..bc1782b0dd --- /dev/null +++ b/docker/torch2.4.1-cuda11.8.dockerfile @@ -0,0 +1,73 @@ +FROM pytorch/pytorch:2.4.1-cuda11.8-cudnn9-devel +# python 3.10 + +ENV LC_ALL C.UTF-8 + +ARG DEBIAN_FRONTEND=noninteractive + +# python 3.10 +ARG K2_VERSION="1.24.4.dev20240905+cuda11.8.torch2.4.1" +ARG KALDIFEAT_VERSION="1.25.4.dev20240905+cuda11.8.torch2.4.1" +ARG TORCHAUDIO_VERSION="2.4.1+cu118" + +LABEL authors="Fangjun Kuang " +LABEL k2_version=${K2_VERSION} +LABEL kaldifeat_version=${KALDIFEAT_VERSION} +LABEL github_repo="https://github.com/k2-fsa/icefall" + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + curl \ + vim \ + libssl-dev \ + autoconf \ + automake \ + bzip2 \ + ca-certificates \ + ffmpeg \ + g++ \ + gfortran \ + git \ + libtool \ + make \ + patch \ + sox \ + subversion \ + unzip \ + valgrind \ + wget \ + zlib1g-dev \ + && rm -rf /var/lib/apt/lists/* + +# Install dependencies +RUN pip install --no-cache-dir \ + torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \ + k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \ + git+https://github.com/lhotse-speech/lhotse \ + kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \ + kaldi_native_io \ + kaldialign \ + kaldifst \ + kaldilm \ + sentencepiece>=0.1.96 \ + tensorboard \ + typeguard \ + dill \ + onnx \ + onnxruntime \ + onnxmltools \ + onnxoptimizer \ + onnxsim \ + multi_quantization \ + typeguard \ + numpy \ + pytest \ + graphviz + +RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \ + cd /workspace/icefall && \ + pip install --no-cache-dir -r requirements.txt + +ENV PYTHONPATH /workspace/icefall:$PYTHONPATH + +WORKDIR /workspace/icefall diff --git a/docker/torch2.4.1-cuda12.1.dockerfile b/docker/torch2.4.1-cuda12.1.dockerfile new file mode 100644 index 0000000000..df2ea61a4d --- /dev/null +++ b/docker/torch2.4.1-cuda12.1.dockerfile @@ -0,0 +1,73 @@ +FROM pytorch/pytorch:2.4.1-cuda12.1-cudnn9-devel +# python 3.10 + +ENV LC_ALL C.UTF-8 + +ARG DEBIAN_FRONTEND=noninteractive + +# python 3.10 +ARG K2_VERSION="1.24.4.dev20240905+cuda12.1.torch2.4.1" +ARG KALDIFEAT_VERSION="1.25.4.dev20240905+cuda12.1.torch2.4.1" +ARG TORCHAUDIO_VERSION="2.4.1+cu121" + +LABEL authors="Fangjun Kuang " +LABEL k2_version=${K2_VERSION} +LABEL kaldifeat_version=${KALDIFEAT_VERSION} +LABEL github_repo="https://github.com/k2-fsa/icefall" + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + curl \ + vim \ + libssl-dev \ + autoconf \ + automake \ + bzip2 \ + ca-certificates \ + ffmpeg \ + g++ \ + gfortran \ + git \ + libtool \ + make \ + patch \ + sox \ + subversion \ + unzip \ + valgrind \ + wget \ + zlib1g-dev \ + && rm -rf /var/lib/apt/lists/* + +# Install dependencies +RUN pip install --no-cache-dir \ + torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \ + k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \ + git+https://github.com/lhotse-speech/lhotse \ + kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \ + kaldi_native_io \ + kaldialign \ + kaldifst \ + kaldilm \ + sentencepiece>=0.1.96 \ + tensorboard \ + typeguard \ + dill \ + onnx \ + onnxruntime \ + onnxmltools \ + onnxoptimizer \ + onnxsim \ + multi_quantization \ + typeguard \ + numpy \ + pytest \ + graphviz + +RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \ + cd /workspace/icefall && \ + pip install --no-cache-dir -r requirements.txt + +ENV PYTHONPATH /workspace/icefall:$PYTHONPATH + +WORKDIR /workspace/icefall diff --git a/docker/torch2.4.1-cuda12.4.dockerfile b/docker/torch2.4.1-cuda12.4.dockerfile new file mode 100644 index 0000000000..4d6da28043 --- /dev/null +++ b/docker/torch2.4.1-cuda12.4.dockerfile @@ -0,0 +1,73 @@ +FROM pytorch/pytorch:2.4.1-cuda12.4-cudnn9-devel +# python 3.10 + +ENV LC_ALL C.UTF-8 + +ARG DEBIAN_FRONTEND=noninteractive + +# python 3.10 +ARG K2_VERSION="1.24.4.dev20240905+cuda12.4.torch2.4.1" +ARG KALDIFEAT_VERSION="1.25.4.dev20240905+cuda12.4.torch2.4.1" +ARG TORCHAUDIO_VERSION="2.4.1+cu124" + +LABEL authors="Fangjun Kuang " +LABEL k2_version=${K2_VERSION} +LABEL kaldifeat_version=${KALDIFEAT_VERSION} +LABEL github_repo="https://github.com/k2-fsa/icefall" + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + curl \ + vim \ + libssl-dev \ + autoconf \ + automake \ + bzip2 \ + ca-certificates \ + ffmpeg \ + g++ \ + gfortran \ + git \ + libtool \ + make \ + patch \ + sox \ + subversion \ + unzip \ + valgrind \ + wget \ + zlib1g-dev \ + && rm -rf /var/lib/apt/lists/* + +# Install dependencies +RUN pip install --no-cache-dir \ + torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/torchaudio/ \ + k2==${K2_VERSION} -f https://k2-fsa.github.io/k2/cuda.html \ + git+https://github.com/lhotse-speech/lhotse \ + kaldifeat==${KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cuda.html \ + kaldi_native_io \ + kaldialign \ + kaldifst \ + kaldilm \ + sentencepiece>=0.1.96 \ + tensorboard \ + typeguard \ + dill \ + onnx \ + onnxruntime \ + onnxmltools \ + onnxoptimizer \ + onnxsim \ + multi_quantization \ + typeguard \ + numpy \ + pytest \ + graphviz + +RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \ + cd /workspace/icefall && \ + pip install --no-cache-dir -r requirements.txt + +ENV PYTHONPATH /workspace/icefall:$PYTHONPATH + +WORKDIR /workspace/icefall diff --git a/docs/source/docker/intro.rst b/docs/source/docker/intro.rst index f3d2b07273..5fc3fa4d56 100644 --- a/docs/source/docker/intro.rst +++ b/docs/source/docker/intro.rst @@ -34,6 +34,12 @@ which will give you something like below: .. code-block:: bash + "torch2.4.1-cuda12.4" + "torch2.4.1-cuda12.1" + "torch2.4.1-cuda11.8" + "torch2.4.0-cuda12.4" + "torch2.4.0-cuda12.1" + "torch2.4.0-cuda11.8" "torch2.3.1-cuda12.1" "torch2.3.1-cuda11.8" "torch2.2.2-cuda12.1" From d4b43236999da5314e889544524782fecafe8ddc Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sat, 7 Sep 2024 19:21:26 +0800 Subject: [PATCH 2/9] Fix github actions CI tests (#1744) --- .github/scripts/docker/generate_build_matrix.py | 9 +++++---- ...gigaspeech-pruned-transducer-stateless2-2022-05-12.sh | 4 ++-- .github/scripts/test-onnx-export.sh | 1 + .github/workflows/build-doc.yml | 2 ++ .github/workflows/run-gigaspeech-2022-05-13.yml | 4 +++- .../workflows/run-gigaspeech-zipformer-2023-10-17.yml | 4 ++-- ...librispeech-lstm-transducer-stateless2-2022-09-03.yml | 4 +++- .github/workflows/run-multi-corpora-zipformer.yml | 2 ++ .github/workflows/run-ptb-rnn-lm.yml | 4 +++- .github/workflows/run-swbd-conformer-ctc.yml | 2 ++ .../run-wenetspeech-pruned-transducer-stateless2.yml | 2 ++ .github/workflows/style_check.yml | 2 ++ .github/workflows/test-ncnn-export.yml | 2 ++ .github/workflows/test-onnx-export.yml | 2 ++ .github/workflows/test.yml | 2 +- 15 files changed, 34 insertions(+), 12 deletions(-) diff --git a/.github/scripts/docker/generate_build_matrix.py b/.github/scripts/docker/generate_build_matrix.py index 492d3ed475..08281151ee 100755 --- a/.github/scripts/docker/generate_build_matrix.py +++ b/.github/scripts/docker/generate_build_matrix.py @@ -48,10 +48,11 @@ def get_matrix(): version = "20240905" python_version = ["3.8", "3.9", "3.10", "3.11", "3.12"] torch_version = [] - torch_version += ["1.13.0", "1.13.1"] - torch_version += ["2.0.0", "2.0.1"] - torch_version += ["2.1.0", "2.1.1", "2.1.2"] - torch_version += ["2.2.0", "2.2.1", "2.2.2"] + # torch_version += ["1.13.0", "1.13.1"] + # torch_version += ["2.0.0", "2.0.1"] + # torch_version += ["2.1.0", "2.1.1", "2.1.2"] + # torch_version += ["2.2.0", "2.2.1", "2.2.2"] + # Test only torch >= 2.3.0 torch_version += ["2.3.0", "2.3.1"] torch_version += ["2.4.0"] torch_version += ["2.4.1"] diff --git a/.github/scripts/run-gigaspeech-pruned-transducer-stateless2-2022-05-12.sh b/.github/scripts/run-gigaspeech-pruned-transducer-stateless2-2022-05-12.sh index b61a9d7b63..c9e798a68d 100755 --- a/.github/scripts/run-gigaspeech-pruned-transducer-stateless2-2022-05-12.sh +++ b/.github/scripts/run-gigaspeech-pruned-transducer-stateless2-2022-05-12.sh @@ -29,8 +29,8 @@ if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == ls -lh data/fbank ls -lh pruned_transducer_stateless2/exp - ln -s data/fbank/cuts_DEV.jsonl.gz data/fbank/gigaspeech_cuts_DEV.jsonl.gz - ln -s data/fbank/cuts_TEST.jsonl.gz data/fbank/gigaspeech_cuts_TEST.jsonl.gz + ln -sf data/fbank/cuts_DEV.jsonl.gz data/fbank/gigaspeech_cuts_DEV.jsonl.gz + ln -sf data/fbank/cuts_TEST.jsonl.gz data/fbank/gigaspeech_cuts_TEST.jsonl.gz log "Decoding dev and test" diff --git a/.github/scripts/test-onnx-export.sh b/.github/scripts/test-onnx-export.sh index fcfc11fa6f..3252c37f12 100755 --- a/.github/scripts/test-onnx-export.sh +++ b/.github/scripts/test-onnx-export.sh @@ -25,6 +25,7 @@ popd log "Export via torch.jit.script()" ./zipformer/export.py \ + --use-averaged-model 0 \ --exp-dir $repo/exp \ --tokens $repo/data/lang_bpe_500/tokens.txt \ --epoch 99 \ diff --git a/.github/workflows/build-doc.yml b/.github/workflows/build-doc.yml index c622476f2d..ca96e6de5e 100644 --- a/.github/workflows/build-doc.yml +++ b/.github/workflows/build-doc.yml @@ -26,6 +26,8 @@ on: pull_request: types: [labeled] + workflow_dispatch: + concurrency: group: build_doc-${{ github.ref }} cancel-in-progress: true diff --git a/.github/workflows/run-gigaspeech-2022-05-13.yml b/.github/workflows/run-gigaspeech-2022-05-13.yml index 3121520c1b..2c1d44fbfe 100644 --- a/.github/workflows/run-gigaspeech-2022-05-13.yml +++ b/.github/workflows/run-gigaspeech-2022-05-13.yml @@ -33,6 +33,8 @@ on: # nightly build at 15:50 UTC time every day - cron: "50 15 * * *" + workflow_dispatch: + concurrency: group: run_gigaspeech_2022_05_13-${{ github.ref }} cancel-in-progress: true @@ -119,7 +121,7 @@ jobs: find exp/greedy_search -name "log-*" -exec grep -n --color "best for test" {} + | sort -n -k2 - name: Upload decoding results for gigaspeech pruned_transducer_stateless2 - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' with: name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-gigaspeech-pruned_transducer_stateless2-2022-05-12 diff --git a/.github/workflows/run-gigaspeech-zipformer-2023-10-17.yml b/.github/workflows/run-gigaspeech-zipformer-2023-10-17.yml index 87090e3106..4ecc2aea04 100644 --- a/.github/workflows/run-gigaspeech-zipformer-2023-10-17.yml +++ b/.github/workflows/run-gigaspeech-zipformer-2023-10-17.yml @@ -42,7 +42,7 @@ concurrency: jobs: run_gigaspeech_2023_10_17_zipformer: - if: github.event.label.name == 'zipformer' ||github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule' + if: github.event.label.name == 'zipformer' ||github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' runs-on: ${{ matrix.os }} strategy: matrix: @@ -133,7 +133,7 @@ jobs: find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - name: Upload decoding results for gigaspeech zipformer - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' with: name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-zipformer-2022-11-11 diff --git a/.github/workflows/run-librispeech-lstm-transducer-stateless2-2022-09-03.yml b/.github/workflows/run-librispeech-lstm-transducer-stateless2-2022-09-03.yml index 501fae38c9..6a3f4eb40a 100644 --- a/.github/workflows/run-librispeech-lstm-transducer-stateless2-2022-09-03.yml +++ b/.github/workflows/run-librispeech-lstm-transducer-stateless2-2022-09-03.yml @@ -16,6 +16,8 @@ on: # nightly build at 15:50 UTC time every day - cron: "50 15 * * *" + workflow_dispatch: + concurrency: group: run_librispeech_lstm_transducer_stateless2_2022_09_03-${{ github.ref }} cancel-in-progress: true @@ -156,7 +158,7 @@ jobs: find modified_beam_search_LODR -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2 - name: Upload decoding results for lstm_transducer_stateless2 - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 if: github.event_name == 'schedule' || github.event.label.name == 'shallow-fusion' || github.event.label.name == 'LODR' with: name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-lstm_transducer_stateless2-2022-09-03 diff --git a/.github/workflows/run-multi-corpora-zipformer.yml b/.github/workflows/run-multi-corpora-zipformer.yml index 38f7eb9087..84f9f3a0d3 100644 --- a/.github/workflows/run-multi-corpora-zipformer.yml +++ b/.github/workflows/run-multi-corpora-zipformer.yml @@ -23,6 +23,8 @@ on: pull_request: types: [labeled] + workflow_dispatch: + concurrency: group: run_multi-corpora_zipformer-${{ github.ref }} cancel-in-progress: true diff --git a/.github/workflows/run-ptb-rnn-lm.yml b/.github/workflows/run-ptb-rnn-lm.yml index f8d9c02c58..6e4077cf44 100644 --- a/.github/workflows/run-ptb-rnn-lm.yml +++ b/.github/workflows/run-ptb-rnn-lm.yml @@ -16,6 +16,8 @@ on: # nightly build at 15:50 UTC time every day - cron: "50 15 * * *" + workflow_dispatch: + concurrency: group: run_ptb_rnn_lm_training-${{ github.ref }} cancel-in-progress: true @@ -64,7 +66,7 @@ jobs: ./train-rnn-lm.sh --world-size 1 --num-epochs 5 --use-epoch 4 --use-avg 2 - name: Upload pretrained models - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 if: github.event.label.name == 'ready' || github.event.label.name == 'rnnlm' || github.event_name == 'push' || github.event_name == 'schedule' with: name: python-${{ matrix.python-version }}-ubuntu-rnn-lm-ptb diff --git a/.github/workflows/run-swbd-conformer-ctc.yml b/.github/workflows/run-swbd-conformer-ctc.yml index 842691d38c..b0178bedd0 100644 --- a/.github/workflows/run-swbd-conformer-ctc.yml +++ b/.github/workflows/run-swbd-conformer-ctc.yml @@ -23,6 +23,8 @@ on: pull_request: types: [labeled] + workflow_dispatch: + concurrency: group: run-swbd-conformer_ctc-${{ github.ref }} cancel-in-progress: true diff --git a/.github/workflows/run-wenetspeech-pruned-transducer-stateless2.yml b/.github/workflows/run-wenetspeech-pruned-transducer-stateless2.yml index 319a5558a3..e76497ec36 100644 --- a/.github/workflows/run-wenetspeech-pruned-transducer-stateless2.yml +++ b/.github/workflows/run-wenetspeech-pruned-transducer-stateless2.yml @@ -23,6 +23,8 @@ on: pull_request: types: [labeled] + workflow_dispatch: + concurrency: group: run_wenetspeech_pruned_transducer_stateless2-${{ github.ref }} cancel-in-progress: true diff --git a/.github/workflows/style_check.yml b/.github/workflows/style_check.yml index 1c37f13edd..0681ece606 100644 --- a/.github/workflows/style_check.yml +++ b/.github/workflows/style_check.yml @@ -24,6 +24,8 @@ on: branches: - master + workflow_dispatch: + concurrency: group: style_check-${{ github.ref }} cancel-in-progress: true diff --git a/.github/workflows/test-ncnn-export.yml b/.github/workflows/test-ncnn-export.yml index 5709f8ebb0..ec419d65f8 100644 --- a/.github/workflows/test-ncnn-export.yml +++ b/.github/workflows/test-ncnn-export.yml @@ -16,6 +16,8 @@ on: # nightly build at 15:50 UTC time every day - cron: "50 15 * * *" + workflow_dispatch: + concurrency: group: test_ncnn_export-${{ github.ref }} cancel-in-progress: true diff --git a/.github/workflows/test-onnx-export.yml b/.github/workflows/test-onnx-export.yml index c05cde3ba0..646ca05692 100644 --- a/.github/workflows/test-onnx-export.yml +++ b/.github/workflows/test-onnx-export.yml @@ -16,6 +16,8 @@ on: # nightly build at 15:50 UTC time every day - cron: "50 15 * * *" + workflow_dispatch: + concurrency: group: test_onnx_export-${{ github.ref }} cancel-in-progress: true diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 659681b37e..9eb7e403ce 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -105,7 +105,7 @@ jobs: cd ../zipformer pytest -v -s - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v4 with: path: egs/librispeech/ASR/zipformer/swoosh.pdf name: swoosh.pdf From 559c8a716039bc1f3da2a4d1487292830fd21f06 Mon Sep 17 00:00:00 2001 From: zr_jin Date: Sun, 8 Sep 2024 17:10:17 +0800 Subject: [PATCH 3/9] fixed a typo in `prepare.sh` for alimeeting recipes (#1747) --- egs/alimeeting/ASR/prepare.sh | 2 +- egs/alimeeting/ASR_v2/prepare.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/egs/alimeeting/ASR/prepare.sh b/egs/alimeeting/ASR/prepare.sh index 996a1da2d4..55f9f019b6 100755 --- a/egs/alimeeting/ASR/prepare.sh +++ b/egs/alimeeting/ASR/prepare.sh @@ -87,7 +87,7 @@ fi if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then log "Stage 3: Prepare musan manifest" # We assume that you have downloaded the musan corpus - # to data/musan + # to $dl_dir/musan if [ ! -f data/manifests/.musan_manifests.done ]; then log "It may take 6 minutes" mkdir -p data/manifests diff --git a/egs/alimeeting/ASR_v2/prepare.sh b/egs/alimeeting/ASR_v2/prepare.sh index 15c20692da..1881cd75c0 100755 --- a/egs/alimeeting/ASR_v2/prepare.sh +++ b/egs/alimeeting/ASR_v2/prepare.sh @@ -65,7 +65,7 @@ fi if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then log "Stage 2: Prepare musan manifest" # We assume that you have downloaded the musan corpus - # to data/musan + # to $dl_dir/musan mkdir -p data/manifests lhotse prepare musan $dl_dir/musan data/manifests fi From 2ff0bb6a884c8f5aafa48551fba8c7d0eeb15b96 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sun, 8 Sep 2024 17:42:55 +0800 Subject: [PATCH 4/9] fix CI tests (#1748) --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9eb7e403ce..c22f2edb5a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -108,4 +108,4 @@ jobs: - uses: actions/upload-artifact@v4 with: path: egs/librispeech/ASR/zipformer/swoosh.pdf - name: swoosh.pdf + name: swoosh-${{ matrix.python-version }}-${{ matrix.torch-version }} From 65b8a6c730568ed12fccccb244e013f6ae3d7745 Mon Sep 17 00:00:00 2001 From: zr_jin Date: Sun, 8 Sep 2024 20:34:49 +0800 Subject: [PATCH 5/9] fixed wrong default value for the `alimeeting` recipe (#1750) --- .../pruned_transducer_stateless7/asr_datamodule.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/egs/alimeeting/ASR_v2/pruned_transducer_stateless7/asr_datamodule.py b/egs/alimeeting/ASR_v2/pruned_transducer_stateless7/asr_datamodule.py index 6b56c8a6a2..9da8203154 100644 --- a/egs/alimeeting/ASR_v2/pruned_transducer_stateless7/asr_datamodule.py +++ b/egs/alimeeting/ASR_v2/pruned_transducer_stateless7/asr_datamodule.py @@ -82,7 +82,7 @@ def add_arguments(cls, parser: argparse.ArgumentParser): group.add_argument( "--manifest-dir", type=Path, - default=Path("data/manifests"), + default=Path("data/fbank"), help="Path to directory with train/valid/test cuts.", ) group.add_argument( @@ -327,9 +327,11 @@ def valid_dataloaders(self, cuts_valid: CutSet) -> DataLoader: def test_dataloaders(self, cuts: CutSet) -> DataLoader: logging.debug("About to create test dataset") test = K2SpeechRecognitionDataset( - input_strategy=OnTheFlyFeatures(Fbank(FbankConfig(num_mel_bins=80))) - if self.args.on_the_fly_feats - else PrecomputedFeatures(), + input_strategy=( + OnTheFlyFeatures(Fbank(FbankConfig(num_mel_bins=80))) + if self.args.on_the_fly_feats + else PrecomputedFeatures() + ), return_cuts=True, ) sampler = DynamicBucketingSampler( From a394bf74742c0242f35a514e016df74d6ba42505 Mon Sep 17 00:00:00 2001 From: zr_jin Date: Sun, 8 Sep 2024 20:35:07 +0800 Subject: [PATCH 6/9] fixed gss scripts for `alimeeting` and `ami` recipes (#1749) --- egs/alimeeting/ASR_v2/local/prepare_alimeeting_gss.sh | 4 ++-- egs/ami/ASR/local/prepare_ami_gss.sh | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/egs/alimeeting/ASR_v2/local/prepare_alimeeting_gss.sh b/egs/alimeeting/ASR_v2/local/prepare_alimeeting_gss.sh index 76db19832c..bd25bc9e51 100755 --- a/egs/alimeeting/ASR_v2/local/prepare_alimeeting_gss.sh +++ b/egs/alimeeting/ASR_v2/local/prepare_alimeeting_gss.sh @@ -58,7 +58,7 @@ if [ $stage -le 4 ]; then # for train, we use smaller context and larger batches to speed-up processing for JOB in $(seq $nj); do gss enhance cuts $EXP_DIR/cuts_train.jsonl.gz \ - $EXP_DIR/cuts_per_segment_train_split$nj/cuts_per_segment_train.JOB.jsonl.gz $EXP_DIR/enhanced \ + $EXP_DIR/cuts_per_segment_train_split$nj/cuts_per_segment_train.$JOB.jsonl.gz $EXP_DIR/enhanced \ --bss-iterations 10 \ --context-duration 5.0 \ --use-garbage-class \ @@ -77,7 +77,7 @@ if [ $stage -le 5 ]; then for part in eval test; do for JOB in $(seq $nj); do gss enhance cuts $EXP_DIR/cuts_${part}.jsonl.gz \ - $EXP_DIR/cuts_per_segment_${part}_split$nj/cuts_per_segment_${part}.JOB.jsonl.gz \ + $EXP_DIR/cuts_per_segment_${part}_split$nj/cuts_per_segment_${part}.$JOB.jsonl.gz \ $EXP_DIR/enhanced \ --bss-iterations 10 \ --context-duration 15.0 \ diff --git a/egs/ami/ASR/local/prepare_ami_gss.sh b/egs/ami/ASR/local/prepare_ami_gss.sh index d5422458bd..414c22b123 100755 --- a/egs/ami/ASR/local/prepare_ami_gss.sh +++ b/egs/ami/ASR/local/prepare_ami_gss.sh @@ -58,7 +58,7 @@ if [ $stage -le 4 ]; then # for train, we use smaller context and larger batches to speed-up processing for JOB in $(seq $nj); do gss enhance cuts $EXP_DIR/cuts_train.jsonl.gz \ - $EXP_DIR/cuts_per_segment_train_split$nj/cuts_per_segment_train.JOB.jsonl.gz $EXP_DIR/enhanced \ + $EXP_DIR/cuts_per_segment_train_split$nj/cuts_per_segment_train.$JOB.jsonl.gz $EXP_DIR/enhanced \ --bss-iterations 10 \ --context-duration 5.0 \ --use-garbage-class \ @@ -77,7 +77,7 @@ if [ $stage -le 5 ]; then for part in dev test; do for JOB in $(seq $nj); do gss enhance cuts $EXP_DIR/cuts_${part}.jsonl.gz \ - $EXP_DIR/cuts_per_segment_${part}_split$nj/cuts_per_segment_${part}.JOB.jsonl.gz \ + $EXP_DIR/cuts_per_segment_${part}_split$nj/cuts_per_segment_${part}.$JOB.jsonl.gz \ $EXP_DIR/enhanced \ --bss-iterations 10 \ --context-duration 15.0 \ From 329e34ac204bfedf7d4169ca4ccd295de7ff8aac Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Tue, 10 Sep 2024 19:29:19 +0800 Subject: [PATCH 7/9] Test export onnx models for multi-zh-hans (#1752) --- .github/scripts/multi-zh-hans.sh | 42 ++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/.github/scripts/multi-zh-hans.sh b/.github/scripts/multi-zh-hans.sh index 427d8887b7..e254419ff3 100755 --- a/.github/scripts/multi-zh-hans.sh +++ b/.github/scripts/multi-zh-hans.sh @@ -16,6 +16,48 @@ log "pwd: $PWD" cd egs/multi_zh-hans/ASR +repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-2023-9-2 +log "Downloading pre-trained model from $repo_url" +GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url +repo=$(basename $repo_url) +pushd $repo +cd exp +git lfs pull --include pretrained.pt +ln -s pretrained.pt epoch-99.pt +cd ../data/lang_bpe_2000 +ls -lh +git lfs pull --include L.pt L_disambig.pt Linv.pt bpe.model +git lfs pull --include "*.model" +ls -lh +popd + +log "--------------------------------------------" +log "Export non-streaming ONNX transducer models " +log "--------------------------------------------" +./zipformer/export-onnx.py \ + --tokens $repo/data/lang_bpe_2000/tokens.txt \ + --use-averaged-model 0 \ + --epoch 99 \ + --avg 1 \ + --exp-dir $repo/exp \ + --causal False + +ls -lh $repo/exp + +./zipformer/onnx_pretrained.py \ + --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \ + --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \ + --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \ + --tokens $repo/data/lang_bpe_2000/tokens.txt \ + $repo/test_wavs/DEV_T0000000000.wav \ + $repo/test_wavs/DEV_T0000000001.wav \ + $repo/test_wavs/DEV_T0000000002.wav \ + $repo/test_wavs/TEST_MEETING_T0000000113.wav \ + $repo/test_wavs/TEST_MEETING_T0000000219.wav \ + $repo/test_wavs/TEST_MEETING_T0000000351.wav + +rm -rf $repo + repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05 log "Downloading pre-trained model from $repo_url" GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url From 6f1abd832dc290b62adfdd0f615010c2f3c274a5 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Wed, 11 Sep 2024 21:04:52 +0800 Subject: [PATCH 8/9] Fix exporting streaming zipformer models. (#1755) --- .../ASR/zipformer/export-onnx-streaming.py | 2 +- egs/librispeech/ASR/zipformer/zipformer.py | 41 +++++++++++++++---- 2 files changed, 34 insertions(+), 9 deletions(-) diff --git a/egs/librispeech/ASR/zipformer/export-onnx-streaming.py b/egs/librispeech/ASR/zipformer/export-onnx-streaming.py index e5ceb36831..88c58f5818 100755 --- a/egs/librispeech/ASR/zipformer/export-onnx-streaming.py +++ b/egs/librispeech/ASR/zipformer/export-onnx-streaming.py @@ -74,7 +74,6 @@ import torch import torch.nn as nn from decoder import Decoder -from onnxconverter_common import float16 from onnxruntime.quantization import QuantType, quantize_dynamic from scaling_converter import convert_scaled_to_non_scaled from train import add_model_arguments, get_model, get_params @@ -756,6 +755,7 @@ def main(): logging.info(f"Exported joiner to {joiner_filename}") if(params.fp16) : + from onnxconverter_common import float16 logging.info("Generate fp16 models") encoder = onnx.load(encoder_filename) diff --git a/egs/librispeech/ASR/zipformer/zipformer.py b/egs/librispeech/ASR/zipformer/zipformer.py index 69059287b4..2a0ae01297 100644 --- a/egs/librispeech/ASR/zipformer/zipformer.py +++ b/egs/librispeech/ASR/zipformer/zipformer.py @@ -191,6 +191,7 @@ def _to_tuple(x): dim=encoder_dim[i], downsample=downsampling_factor[i], dropout=dropout, + causal=causal, ) encoders.append(encoder) @@ -198,7 +199,10 @@ def _to_tuple(x): self.encoders = nn.ModuleList(encoders) self.downsample_output = SimpleDownsample( - max(encoder_dim), downsample=output_downsampling_factor, dropout=dropout + max(encoder_dim), + downsample=output_downsampling_factor, + dropout=dropout, + causal=causal, ) def get_feature_masks(self, x: Tensor) -> Union[List[float], List[Tensor]]: @@ -1217,11 +1221,16 @@ class DownsampledZipformer2Encoder(nn.Module): """ def __init__( - self, encoder: nn.Module, dim: int, downsample: int, dropout: FloatLike + self, + encoder: nn.Module, + dim: int, + downsample: int, + dropout: FloatLike, + causal: bool, ): super(DownsampledZipformer2Encoder, self).__init__() self.downsample_factor = downsample - self.downsample = SimpleDownsample(dim, downsample, dropout) + self.downsample = SimpleDownsample(dim, downsample, dropout, causal) self.num_layers = encoder.num_layers self.encoder = encoder self.upsample = SimpleUpsample(dim, downsample) @@ -1310,9 +1319,12 @@ class SimpleDownsample(torch.nn.Module): Does downsampling with attention, by weighted sum, and a projection.. """ - def __init__(self, channels: int, downsample: int, dropout: FloatLike): + def __init__( + self, channels: int, downsample: int, dropout: FloatLike, causal: bool + ): super(SimpleDownsample, self).__init__() + self.causal = causal self.bias = nn.Parameter(torch.zeros(downsample)) self.name = None # will be set from training code @@ -1333,9 +1345,18 @@ def forward(self, src: Tensor) -> Tensor: # Pad to an exact multiple of self.downsample # right-pad src, repeating the last element. pad = d_seq_len * ds - seq_len - src_extra = src[src.shape[0] - 1 :].expand(pad, src.shape[1], src.shape[2]) - src = torch.cat((src, src_extra), dim=0) - assert src.shape[0] == d_seq_len * ds + + if self.causal and torch.jit.is_tracing(): + assert ( + pad == 0 + ), f"pad should be zero for exporting streaming models. Given {pad}" + + # If we are exporting a streaming model, then we skip the if statement + if not self.causal or not torch.jit.is_tracing(): + src_extra = src[src.shape[0] - 1 :].expand(pad, src.shape[1], src.shape[2]) + src = torch.cat((src, src_extra), dim=0) + + assert src.shape[0] == d_seq_len * ds, (src.shape, d_seq_len, ds) src = src.reshape(d_seq_len, ds, batch_size, in_channels) @@ -1609,7 +1630,11 @@ def forward( k = x[..., query_dim : 2 * query_dim] # p is the position-encoding query p = x[..., 2 * query_dim :] - assert p.shape[-1] == num_heads * pos_head_dim, (p.shape[-1], num_heads, pos_head_dim) + assert p.shape[-1] == num_heads * pos_head_dim, ( + p.shape[-1], + num_heads, + pos_head_dim, + ) q = self.copy_query(q) # for diagnostics only, does nothing. k = self.whiten_keys(self.balance_keys(k)) # does nothing in the forward pass. From 5c04c31292b87dc95fe0a9b498dc753f509ebda1 Mon Sep 17 00:00:00 2001 From: Yu Lianjie Date: Fri, 20 Sep 2024 12:38:52 +0800 Subject: [PATCH 9/9] fix open-commands path (#1714) --- egs/wenetspeech/KWS/prepare.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/egs/wenetspeech/KWS/prepare.sh b/egs/wenetspeech/KWS/prepare.sh index dcc65fab49..e52e1a9d19 100755 --- a/egs/wenetspeech/KWS/prepare.sh +++ b/egs/wenetspeech/KWS/prepare.sh @@ -63,8 +63,8 @@ if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then ln -svf $(realpath ./open-commands/CN/small/commands.txt) commands_small.txt ln -svf $(realpath ./open-commands/CN/large/commands.txt) commands_large.txt pushd open-commands - ./script/prepare.sh --stage 1 --stop-stage 1 - ./script/prepare.sh --stage 3 --stop-stage 5 + ./scripts/prepare.sh --stage 1 --stop-stage 1 + ./scripts/prepare.sh --stage 3 --stop-stage 5 popd popd pushd data/fbank