Skip to content

Commit b28f3c9

Browse files
LauraGPTlyblsgoR1ckShi
authored
fsmn-vad bugfix (#1270)
* funasr1.0 funetine * funasr1.0 pbar * update with main (#1260) * Update websocket_protocol_zh.md * update --------- Co-authored-by: Yabin Li <[email protected]> Co-authored-by: shixian.shi <[email protected]> * update with main (#1264) * Funasr1.0 (#1261) * funasr1.0 funetine * funasr1.0 pbar * update with main (#1260) * Update websocket_protocol_zh.md * update --------- Co-authored-by: Yabin Li <[email protected]> Co-authored-by: shixian.shi <[email protected]> --------- Co-authored-by: Yabin Li <[email protected]> Co-authored-by: shixian.shi <[email protected]> * bug fix --------- Co-authored-by: Yabin Li <[email protected]> Co-authored-by: shixian.shi <[email protected]> * funasr1.0 sanm scama * funasr1.0 infer_after_finetune * funasr1.0 fsmn-vad bug fix * funasr1.0 fsmn-vad bug fix --------- Co-authored-by: Yabin Li <[email protected]> Co-authored-by: shixian.shi <[email protected]>
1 parent 704db42 commit b28f3c9

File tree

18 files changed

+1952
-783
lines changed

18 files changed

+1952
-783
lines changed

README.md

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -91,12 +91,13 @@ Notes: Support recognition of single audio file, as well as file list in Kaldi-s
9191
from funasr import AutoModel
9292
# paraformer-zh is a multi-functional asr model
9393
# use vad, punc, spk or not as you need
94-
model = AutoModel(model="paraformer-zh", model_revision="v2.0.2", \
95-
vad_model="fsmn-vad", vad_model_revision="v2.0.2", \
96-
punc_model="ct-punc-c", punc_model_revision="v2.0.2", \
97-
spk_model="cam++", spk_model_revision="v2.0.2")
94+
model = AutoModel(model="paraformer-zh", model_revision="v2.0.2",
95+
vad_model="fsmn-vad", vad_model_revision="v2.0.2",
96+
punc_model="ct-punc-c", punc_model_revision="v2.0.2",
97+
# spk_model="cam++", spk_model_revision="v2.0.2",
98+
)
9899
res = model.generate(input=f"{model.model_path}/example/asr_example.wav",
99-
batch_size=64,
100+
batch_size_s=300,
100101
hotword='魔搭')
101102
print(res)
102103
```

README_zh.md

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -87,12 +87,13 @@ funasr +model=paraformer-zh +vad_model="fsmn-vad" +punc_model="ct-punc" +input=a
8787
from funasr import AutoModel
8888
# paraformer-zh is a multi-functional asr model
8989
# use vad, punc, spk or not as you need
90-
model = AutoModel(model="paraformer-zh", model_revision="v2.0.2", \
91-
vad_model="fsmn-vad", vad_model_revision="v2.0.2", \
92-
punc_model="ct-punc-c", punc_model_revision="v2.0.2", \
93-
spk_model="cam++", spk_model_revision="v2.0.2")
90+
model = AutoModel(model="paraformer-zh", model_revision="v2.0.2",
91+
vad_model="fsmn-vad", vad_model_revision="v2.0.2",
92+
punc_model="ct-punc-c", punc_model_revision="v2.0.2",
93+
# spk_model="cam++", spk_model_revision="v2.0.2",
94+
)
9495
res = model.generate(input=f"{model.model_path}/example/asr_example.wav",
95-
batch_size=64,
96+
batch_size_s=300,
9697
hotword='魔搭')
9798
print(res)
9899
```
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
2+
3+
python funasr/bin/inference.py \
4+
--config-path="/Users/zhifu/funasr_github/test_local/funasr_cli_egs" \
5+
--config-name="config.yaml" \
6+
++init_param="/Users/zhifu/funasr_github/test_local/funasr_cli_egs/model.pt" \
7+
+tokenizer_conf.token_list="/Users/zhifu/funasr_github/test_local/funasr_cli_egs/tokens.txt" \
8+
+frontend_conf.cmvn_file="/Users/zhifu/funasr_github/test_local/funasr_cli_egs/am.mvn" \
9+
+input="data/wav.scp" \
10+
+output_dir="./outputs/debug" \
11+
+device="cuda" \
12+
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#!/usr/bin/env python3
2+
# -*- encoding: utf-8 -*-
3+
# Copyright FunASR (https://github.com/alibaba-damo-academy/FunASR). All Rights Reserved.
4+
# MIT License (https://opensource.org/licenses/MIT)
5+
6+
from funasr import AutoModel
7+
8+
chunk_size = [5, 10, 5] #[0, 10, 5] 600ms, [0, 8, 4] 480ms
9+
encoder_chunk_look_back = 0 #number of chunks to lookback for encoder self-attention
10+
decoder_chunk_look_back = 0 #number of encoder chunks to lookback for decoder cross-attention
11+
12+
model = AutoModel(model="/Users/zhifu/Downloads/modelscope_models/speech_SCAMA_asr-zh-cn-16k-common-vocab8358-streaming", model_revision="v2.0.2")
13+
cache = {}
14+
res = model.generate(input="https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav",
15+
chunk_size=chunk_size,
16+
encoder_chunk_look_back=encoder_chunk_look_back,
17+
decoder_chunk_look_back=decoder_chunk_look_back,
18+
)
19+
print(res)
20+
21+
22+
import soundfile
23+
import os
24+
25+
wav_file = os.path.join(model.model_path, "example/asr_example.wav")
26+
speech, sample_rate = soundfile.read(wav_file)
27+
28+
chunk_stride = chunk_size[1] * 960 # 600ms、480ms
29+
30+
cache = {}
31+
total_chunk_num = int(len((speech)-1)/chunk_stride+1)
32+
for i in range(total_chunk_num):
33+
speech_chunk = speech[i*chunk_stride:(i+1)*chunk_stride]
34+
is_final = i == total_chunk_num - 1
35+
res = model.generate(input=speech_chunk,
36+
cache=cache,
37+
is_final=is_final,
38+
chunk_size=chunk_size,
39+
encoder_chunk_look_back=encoder_chunk_look_back,
40+
decoder_chunk_look_back=decoder_chunk_look_back,
41+
)
42+
print(res)
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
2+
model="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online"
3+
model_revision="v2.0.2"
4+
5+
python funasr/bin/inference.py \
6+
+model=${model} \
7+
+model_revision=${model_revision} \
8+
+input="https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav" \
9+
+output_dir="./outputs/debug" \
10+
+device="cpu" \
11+

0 commit comments

Comments
 (0)