Skip to content

Commit

Permalink
Merge branch 'k2-fsa:master' into dev/k2ssl
Browse files Browse the repository at this point in the history
  • Loading branch information
yfyeung authored Oct 30, 2024
2 parents 55fea00 + d513d45 commit 35fd0cf
Show file tree
Hide file tree
Showing 61 changed files with 6,418 additions and 101 deletions.
8 changes: 6 additions & 2 deletions .github/scripts/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,15 @@ LABEL github_repo="https://github.com/k2-fsa/icefall"

# Install dependencies
RUN pip install --no-cache-dir \
torch==${TORCH_VERSION} torchaudio==${TORCHAUDIO_VERSION} -f https://download.pytorch.org/whl/cpu/torch_stable.html \
torch==${TORCH_VERSION}+cpu -f https://download.pytorch.org/whl/torch \
torchaudio==${TORCHAUDIO_VERSION}+cpu -f https://download.pytorch.org/whl/torchaudio \
k2==${_K2_VERSION} -f https://k2-fsa.github.io/k2/cpu.html \
\
git+https://github.com/lhotse-speech/lhotse \
kaldifeat==${_KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cpu.html \
conformer==0.3.2 \
cython \
diffusers \
dill \
espnet_tts_frontend \
graphviz \
Expand All @@ -45,10 +48,11 @@ RUN pip install --no-cache-dir \
kaldialign \
kaldifst \
kaldilm \
librosa \
matplotlib \
multi_quantization \
numba \
numpy \
"numpy<2.0" \
onnxoptimizer \
onnxsim \
onnx \
Expand Down
32 changes: 13 additions & 19 deletions .github/scripts/docker/generate_build_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,11 @@ def get_torchaudio_version(torch_version):


def get_matrix():
k2_version = "1.24.4.dev20240223"
kaldifeat_version = "1.25.4.dev20240223"
version = "20240905"
k2_version = "1.24.4.dev20241029"
kaldifeat_version = "1.25.5.dev20241029"
version = "20241029"

# torchaudio 2.5.0 does not support python 3.13
python_version = ["3.8", "3.9", "3.10", "3.11", "3.12"]
torch_version = []
# torch_version += ["1.13.0", "1.13.1"]
Expand All @@ -56,6 +58,7 @@ def get_matrix():
torch_version += ["2.3.0", "2.3.1"]
torch_version += ["2.4.0"]
torch_version += ["2.4.1"]
torch_version += ["2.5.0"]

matrix = []
for p in python_version:
Expand All @@ -69,25 +72,16 @@ def get_matrix():
if version_gt(p, "3.11") and not version_gt(t, "2.1"):
continue

if version_gt(p, "3.12") and not version_gt(t, "2.4"):
continue

if version_gt(t, "2.4") and version_gt("3.10", p):
# torch>=2.5 requires python 3.10
continue

k2_version_2 = k2_version
kaldifeat_version_2 = kaldifeat_version

if t == "2.2.2":
k2_version_2 = "1.24.4.dev20240328"
kaldifeat_version_2 = "1.25.4.dev20240329"
elif t == "2.3.0":
k2_version_2 = "1.24.4.dev20240425"
kaldifeat_version_2 = "1.25.4.dev20240425"
elif t == "2.3.1":
k2_version_2 = "1.24.4.dev20240606"
kaldifeat_version_2 = "1.25.4.dev20240606"
elif t == "2.4.0":
k2_version_2 = "1.24.4.dev20240725"
kaldifeat_version_2 = "1.25.4.dev20240725"
elif t == "2.4.1":
k2_version_2 = "1.24.4.dev20240905"
kaldifeat_version_2 = "1.25.4.dev20240905"

matrix.append(
{
"k2-version": k2_version_2,
Expand Down
120 changes: 120 additions & 0 deletions .github/scripts/ljspeech/TTS/run-matcha.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#!/usr/bin/env bash

set -ex

apt-get update
apt-get install -y sox

python3 -m pip install piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html
python3 -m pip install espnet_tts_frontend
python3 -m pip install numba conformer==0.3.2 diffusers librosa

log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

cd egs/ljspeech/TTS

sed -i.bak s/600/8/g ./prepare.sh
sed -i.bak s/"first 100"/"first 3"/g ./prepare.sh
sed -i.bak s/500/5/g ./prepare.sh
git diff

function prepare_data() {
# We have created a subset of the data for testing
#
mkdir -p download
pushd download
wget -q https://huggingface.co/csukuangfj/ljspeech-subset-for-ci-test/resolve/main/LJSpeech-1.1.tar.bz2
tar xvf LJSpeech-1.1.tar.bz2
popd

./prepare.sh
tree .
}

function train() {
pushd ./matcha
sed -i.bak s/1500/3/g ./train.py
git diff .
popd

./matcha/train.py \
--exp-dir matcha/exp \
--num-epochs 1 \
--save-every-n 1 \
--num-buckets 2 \
--tokens data/tokens.txt \
--max-duration 20

ls -lh matcha/exp
}

function infer() {

curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v1

./matcha/inference.py \
--epoch 1 \
--exp-dir ./matcha/exp \
--tokens data/tokens.txt \
--vocoder ./generator_v1 \
--input-text "how are you doing?" \
--output-wav ./generated.wav

ls -lh *.wav
soxi ./generated.wav
rm -v ./generated.wav
rm -v generator_v1
}

function export_onnx() {
pushd matcha/exp
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/epoch-4000.pt
popd

pushd data/fbank
rm -v *.json
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/data/cmvn.json
popd

./matcha/export_onnx.py \
--exp-dir ./matcha/exp \
--epoch 4000 \
--tokens ./data/tokens.txt \
--cmvn ./data/fbank/cmvn.json

ls -lh *.onnx

if false; then
# THe CI machine does not have enough memory to run it
#
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v1
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v2
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v3
python3 ./matcha/export_onnx_hifigan.py
else
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v1.onnx
fi

ls -lh *.onnx

python3 ./matcha/onnx_pretrained.py \
--acoustic-model ./model-steps-6.onnx \
--vocoder ./hifigan_v1.onnx \
--tokens ./data/tokens.txt \
--input-text "how are you doing?" \
--output-wav /icefall/generated-matcha-tts-steps-6-v1.wav

ls -lh /icefall/*.wav
soxi /icefall/generated-matcha-tts-steps-6-v1.wav
}

prepare_data
train
infer
export_onnx

rm -rfv generator_v* matcha/exp
2 changes: 1 addition & 1 deletion .github/scripts/ljspeech/TTS/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ git diff
function prepare_data() {
# We have created a subset of the data for testing
#
mkdir download
mkdir -p download
pushd download
wget -q https://huggingface.co/csukuangfj/ljspeech-subset-for-ci-test/resolve/main/LJSpeech-1.1.tar.bz2
tar xvf LJSpeech-1.1.tar.bz2
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ repo=$(basename $repo_url)

echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_NAME}" == x"workflow_dispatch" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
mkdir -p pruned_transducer_stateless2/exp
ln -s $PWD/$repo/exp/pretrained-iter-3488000-avg-20.pt pruned_transducer_stateless2/exp/epoch-999.pt
ln -s $PWD/$repo/data/lang_bpe_500 data/
Expand All @@ -29,8 +29,16 @@ if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" ==
ls -lh data/fbank
ls -lh pruned_transducer_stateless2/exp

ln -sf data/fbank/cuts_DEV.jsonl.gz data/fbank/gigaspeech_cuts_DEV.jsonl.gz
ln -sf data/fbank/cuts_TEST.jsonl.gz data/fbank/gigaspeech_cuts_TEST.jsonl.gz
pushd data/fbank
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/cuts_DEV.jsonl.gz
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/cuts_TEST.jsonl.gz
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/feats_DEV.lca
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/feats_TEST.lca

ln -sf cuts_DEV.jsonl.gz gigaspeech_cuts_DEV.jsonl.gz
ln -sf cuts_TEST.jsonl.gz gigaspeech_cuts_TEST.jsonl.gz
popd


log "Decoding dev and test"

Expand Down
18 changes: 16 additions & 2 deletions .github/scripts/run-gigaspeech-zipformer-2023-10-17.sh
Original file line number Diff line number Diff line change
Expand Up @@ -129,20 +129,34 @@ done

echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_NAME}" == x"workflow_dispatch" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
mkdir -p zipformer/exp
ln -s $PWD/$repo/exp/pretrained.pt zipformer/exp/epoch-30.pt
mkdir -p data
ln -s $PWD/$repo/data/lang_bpe_500 data/

ls -lh data
ls -lh zipformer/exp

mkdir -p data/fbank
pushd data/fbank

curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/cuts_DEV.jsonl.gz
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/cuts_TEST.jsonl.gz
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/feats_DEV.lca
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/feats_TEST.lca

ln -sf cuts_DEV.jsonl.gz gigaspeech_cuts_DEV.jsonl.gz
ln -sf cuts_TEST.jsonl.gz gigaspeech_cuts_TEST.jsonl.gz

popd

log "Decoding test-clean and test-other"

# use a small value for decoding with CPU
max_duration=100

for method in greedy_search fast_beam_search modified_beam_search; do
for method in greedy_search; do
log "Decoding with $method"

./zipformer/decode.py \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ if [[ x"${GITHUB_EVENT_LABEL_NAME}" == x"LODR" ]]; then
--ngram-lm-scale -0.16
fi

if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" ]]; then
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_NAME}" == x"workflow_dispatch" ]]; then
mkdir -p lstm_transducer_stateless2/exp
ln -s $PWD/$repo/exp/pretrained.pt lstm_transducer_stateless2/exp/epoch-999.pt
ln -s $PWD/$repo/data/lang_bpe_500 data/
Expand All @@ -175,7 +175,7 @@ if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" ]]; then
# use a small value for decoding with CPU
max_duration=100

for method in greedy_search fast_beam_search modified_beam_search; do
for method in greedy_search fast_beam_search; do
log "Decoding with $method"

./lstm_transducer_stateless2/decode.py \
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/audioset.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ jobs:
ls -lh ./model-onnx/*
- name: Upload model to huggingface
if: matrix.python-version == '3.9' && matrix.torch-version == '2.2.0' && github.event_name == 'push'
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
uses: nick-fields/retry@v3
Expand Down Expand Up @@ -116,7 +116,7 @@ jobs:
rm -rf huggingface
- name: Prepare for release
if: matrix.python-version == '3.9' && matrix.torch-version == '2.2.0' && github.event_name == 'push'
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
shell: bash
run: |
d=sherpa-onnx-zipformer-audio-tagging-2024-04-09
Expand All @@ -125,7 +125,7 @@ jobs:
ls -lh
- name: Release exported onnx models
if: matrix.python-version == '3.9' && matrix.torch-version == '2.2.0' && github.event_name == 'push'
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
uses: svenstaro/upload-release-action@v2
with:
file_glob: true
Expand Down
12 changes: 3 additions & 9 deletions .github/workflows/ljspeech.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ jobs:
cd /icefall
git config --global --add safe.directory /icefall
.github/scripts/ljspeech/TTS/run-matcha.sh
.github/scripts/ljspeech/TTS/run.sh
- name: display files
Expand All @@ -78,19 +79,13 @@ jobs:
ls -lh
- uses: actions/upload-artifact@v4
if: matrix.python-version == '3.9' && matrix.torch-version == '2.2.0'
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0'
with:
name: generated-test-files-${{ matrix.python-version }}-${{ matrix.torch-version }}
path: ./*.wav

- uses: actions/upload-artifact@v4
if: matrix.python-version == '3.9' && matrix.torch-version == '2.2.0'
with:
name: generated-models-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}
path: ./*.wav

- name: Release exported onnx models
if: matrix.python-version == '3.9' && matrix.torch-version == '2.2.0' && github.event_name == 'push'
if: matrix.python-version == '3.9' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
uses: svenstaro/upload-release-action@v2
with:
file_glob: true
Expand All @@ -99,4 +94,3 @@ jobs:
repo_name: k2-fsa/sherpa-onnx
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
tag: tts-models

6 changes: 3 additions & 3 deletions .github/workflows/run-gigaspeech-2022-05-13.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ concurrency:

jobs:
run_gigaspeech_2022_05_13:
if: github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
if: github.event_name == 'workflow_dispatch' || github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
runs-on: ${{ matrix.os }}
strategy:
matrix:
Expand Down Expand Up @@ -106,7 +106,7 @@ jobs:
.github/scripts/run-gigaspeech-pruned-transducer-stateless2-2022-05-12.sh
- name: Display decoding results for gigaspeech pruned_transducer_stateless2
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event.label.name == 'run-decode'
shell: bash
run: |
cd egs/gigaspeech/ASR/
Expand All @@ -122,7 +122,7 @@ jobs:
- name: Upload decoding results for gigaspeech pruned_transducer_stateless2
uses: actions/upload-artifact@v4
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode'
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event.label.name == 'run-decode'
with:
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-gigaspeech-pruned_transducer_stateless2-2022-05-12
path: egs/gigaspeech/ASR/pruned_transducer_stateless2/exp/
Loading

0 comments on commit 35fd0cf

Please sign in to comment.