Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ Notes: Support recognition of single audio file, as well as file list in Kaldi-s
from funasr import AutoModel
# paraformer-zh is a multi-functional asr model
# use vad, punc, spk or not as you need
model = AutoModel(model="paraformer-zh", vad_model="fsmn-vad", punc_model="ct-punc-c",
model = AutoModel(model="paraformer-zh", vad_model="fsmn-vad", punc_model="ct-punc",
# spk_model="cam++",
)
res = model.generate(input=f"{model.model_path}/example/asr_example.wav",
Expand Down
2 changes: 1 addition & 1 deletion README_zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ funasr ++model=paraformer-zh ++vad_model="fsmn-vad" ++punc_model="ct-punc" ++inp
from funasr import AutoModel
# paraformer-zh is a multi-functional asr model
# use vad, punc, spk or not as you need
model = AutoModel(model="paraformer-zh", vad_model="fsmn-vad", punc_model="ct-punc-c",
model = AutoModel(model="paraformer-zh", vad_model="fsmn-vad", punc_model="ct-punc",
# spk_model="cam++"
)
res = model.generate(input=f"{model.model_path}/example/asr_example.wav",
Expand Down
8 changes: 4 additions & 4 deletions examples/industrial_data_pretraining/bicif_paraformer/demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@

from funasr import AutoModel

model = AutoModel(model="damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch",
model = AutoModel(model="iic/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch",
model_revision="v2.0.4",
vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch",
vad_model="iic/speech_fsmn_vad_zh-cn-16k-common-pytorch",
vad_model_revision="v2.0.4",
punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch",
punc_model="iic/punc_ct-transformer_cn-en-common-vocab471067-large",
punc_model_revision="v2.0.4",
# spk_model="damo/speech_campplus_sv_zh-cn_16k-common",
# spk_model="iic/speech_campplus_sv_zh-cn_16k-common",
# spk_model_revision="v2.0.2",
)

Expand Down
11 changes: 6 additions & 5 deletions examples/industrial_data_pretraining/bicif_paraformer/demo.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@

model="damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
model="iic/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
model_revision="v2.0.4"
vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch"
vad_model="iic/speech_fsmn_vad_zh-cn-16k-common-pytorch"
vad_model_revision="v2.0.4"
punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch"
punc_model_revision="v2.0.3"
spk_model="damo/speech_campplus_sv_zh-cn_16k-common"
#punc_model="iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch"
punc_model="iic/punc_ct-transformer_cn-en-common-vocab471067-large"
punc_model_revision="v2.0.4"
spk_model="iic/speech_campplus_sv_zh-cn_16k-common"
spk_model_revision="v2.0.2"

python funasr/bin/inference.py \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from funasr import AutoModel

model = AutoModel(model="damo/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch",
model = AutoModel(model="iic/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch",
model_revision="v2.0.4", device="cpu")

res = model.export(type="onnx", quantize=False)
Expand Down
2 changes: 1 addition & 1 deletion examples/industrial_data_pretraining/campplus_sv/demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from funasr import AutoModel

model = AutoModel(model="damo/speech_campplus_sv_zh-cn_16k-common",
model = AutoModel(model="iic/speech_campplus_sv_zh-cn_16k-common",
model_revision="v2.0.2",
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from funasr import AutoModel

model = AutoModel(model="damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404", model_revision="v2.0.4")
model = AutoModel(model="iic/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404", model_revision="v2.0.4")

res = model.generate(input="https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav",
hotword='达摩院 魔搭')
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

model="damo/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404"
model="iic/speech_paraformer-large-contextual_asr_nat-zh-cn-16k-common-vocab8404"
model_revision="v2.0.4"

python ../../../funasr/bin/inference.py \
Expand Down
4 changes: 2 additions & 2 deletions examples/industrial_data_pretraining/ct_transformer/demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@

from funasr import AutoModel

model = AutoModel(model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch", model_revision="v2.0.4")
model = AutoModel(model="iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch", model_revision="v2.0.4")

res = model.generate(input="https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_text/punc_example.txt")
print(res)


from funasr import AutoModel

model = AutoModel(model="damo/punc_ct-transformer_cn-en-common-vocab471067-large", model_revision="v2.0.4")
model = AutoModel(model="iic/punc_ct-transformer_cn-en-common-vocab471067-large", model_revision="v2.0.4")

res = model.generate(input="https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_text/punc_example.txt")
print(res)
4 changes: 2 additions & 2 deletions examples/industrial_data_pretraining/ct_transformer/demo.sh
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@

#model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch"
#model="iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch"
#model_revision="v2.0.4"

model="damo/punc_ct-transformer_cn-en-common-vocab471067-large"
model="iic/punc_ct-transformer_cn-en-common-vocab471067-large"
model_revision="v2.0.4"

python funasr/bin/inference.py \
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

model="damo/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727"
model="iic/punc_ct-transformer_zh-cn-common-vad_realtime-vocab272727"
model_revision="v2.0.4"

python funasr/bin/inference.py \
Expand Down
2 changes: 1 addition & 1 deletion examples/industrial_data_pretraining/emotion2vec/demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from funasr import AutoModel

# model="damo/emotion2vec_base"
# model="iic/emotion2vec_base"
model = AutoModel(model="iic/emotion2vec_base_finetuned", model_revision="v2.0.4")

wav_file = f"{model.model_path}/example/test.wav"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from funasr import AutoModel
wav_file = "https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/vad_example.wav"

model = AutoModel(model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch", model_revision="v2.0.4")
model = AutoModel(model="iic/speech_fsmn_vad_zh-cn-16k-common-pytorch", model_revision="v2.0.4")

res = model.generate(input=wav_file)
print(res)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@


model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch"
model="iic/speech_fsmn_vad_zh-cn-16k-common-pytorch"
model_revision="v2.0.4"

python funasr/bin/inference.py \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from funasr import AutoModel

model = AutoModel(model="damo/speech_timestamp_prediction-v1-16k-offline", model_revision="v2.0.4")
model = AutoModel(model="iic/speech_timestamp_prediction-v1-16k-offline", model_revision="v2.0.4")

res = model.generate(input=("https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav",
"欢迎大家来到魔搭社区进行体验"),
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

model="damo/speech_timestamp_prediction-v1-16k-offline"
model="iic/speech_timestamp_prediction-v1-16k-offline"
model_revision="v2.0.4"

python funasr/bin/inference.py \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,13 @@

from funasr import AutoModel

model = AutoModel(model="damo/speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch",
model = AutoModel(model="iic/speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch",
model_revision="v2.0.4",
vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch",
vad_model="iic/speech_fsmn_vad_zh-cn-16k-common-pytorch",
vad_model_revision="v2.0.4",
punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch",
punc_model="iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch",
punc_model_revision="v2.0.4",
spk_model="damo/speech_campplus_sv_zh-cn_16k-common",
spk_model="iic/speech_campplus_sv_zh-cn_16k-common",
spk_model_revision="v2.0.2"
)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@

model="damo/speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
model="iic/speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
model_revision="v2.0.4"
vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch"
vad_model="iic/speech_fsmn_vad_zh-cn-16k-common-pytorch"
vad_model_revision="v2.0.4"
punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch"
punc_model="iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch"
punc_model_revision="v2.0.4"
spk_model="damo/speech_campplus_sv_zh-cn_16k-common"
spk_model="iic/speech_campplus_sv_zh-cn_16k-common"
spk_model_revision="v2.0.2"

python funasr/bin/inference.py \
Expand Down
10 changes: 5 additions & 5 deletions examples/industrial_data_pretraining/paraformer/demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@

model = AutoModel(model="iic/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch",
model_revision="v2.0.4",
vad_model="iic/speech_fsmn_vad_zh-cn-16k-common-pytorch",
vad_model_revision="v2.0.4",
punc_model="iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch",
punc_model_revision="v2.0.4",
# vad_model="iic/speech_fsmn_vad_zh-cn-16k-common-pytorch",
# vad_model_revision="v2.0.4",
# punc_model="iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch",
# punc_model_revision="v2.0.4",
# spk_model="iic/speech_campplus_sv_zh-cn_16k-common",
# spk_model_revision="v2.0.2",
)
Expand All @@ -22,7 +22,7 @@
''' can not use currently
from funasr import AutoFrontend

frontend = AutoFrontend(model="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch", model_revision="v2.0.4")
frontend = AutoFrontend(model="iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch", model_revision="v2.0.4")

fbanks = frontend(input="https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav", batch_size=2)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ torchrun \
--nnodes 1 \
--nproc_per_node ${gpu_num} \
funasr/bin/train.py \
++model="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch" \
++model="iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch" \
++model_revision="v2.0.4" \
++train_data_set_list="${train_data}" \
++valid_data_set_list="${val_data}" \
Expand Down
2 changes: 1 addition & 1 deletion examples/industrial_data_pretraining/paraformer/infer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ input="https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr

output_dir="./outputs/debug"

model="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
model="iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
model_revision="v2.0.4"

device="cuda:0" # "cuda:0" for gpu0, "cuda:1" for gpu1, "cpu"
Expand Down
2 changes: 1 addition & 1 deletion examples/industrial_data_pretraining/scama/demo.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

model="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online"
model="iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online"
model_revision="v2.0.4"

python funasr/bin/inference.py \
Expand Down
6 changes: 3 additions & 3 deletions examples/industrial_data_pretraining/seaco_paraformer/demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@

model = AutoModel(model="iic/speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch",
model_revision="v2.0.4",
# vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch",
# vad_model="iic/speech_fsmn_vad_zh-cn-16k-common-pytorch",
# vad_model_revision="v2.0.4",
# punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch",
# punc_model="iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch",
# punc_model_revision="v2.0.4",
# spk_model="damo/speech_campplus_sv_zh-cn_16k-common",
# spk_model="iic/speech_campplus_sv_zh-cn_16k-common",
# spk_model_revision="v2.0.2",
)

Expand Down
6 changes: 3 additions & 3 deletions examples/industrial_data_pretraining/seaco_paraformer/demo.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@

model="damo/speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
model="iic/speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
model_revision="v2.0.4"
vad_model="damo/speech_fsmn_vad_zh-cn-16k-common-pytorch"
vad_model="iic/speech_fsmn_vad_zh-cn-16k-common-pytorch"
vad_model_revision="v2.0.4"
punc_model="damo/punc_ct-transformer_zh-cn-common-vocab272727-pytorch"
punc_model="iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch"
punc_model_revision="v2.0.4"

python funasr/bin/inference.py \
Expand Down
2 changes: 1 addition & 1 deletion examples/industrial_data_pretraining/uniasr/demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
''' can not use currently
from funasr import AutoFrontend

frontend = AutoFrontend(model="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch", model_revision="v2.0.4")
frontend = AutoFrontend(model="iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch", model_revision="v2.0.4")

fbanks = frontend(input="https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav", batch_size=2)

Expand Down
2 changes: 1 addition & 1 deletion examples/industrial_data_pretraining/uniasr/demo.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

model="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
model="iic/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch"
model_revision="v2.0.4"

python funasr/bin/inference.py \
Expand Down
10 changes: 8 additions & 2 deletions examples/industrial_data_pretraining/whisper/demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,14 @@
from funasr import AutoModel

model = AutoModel(model="iic/Whisper-large-v3",
model_revision="v2.0.4",
model_revision="v2.0.5",
vad_model="iic/speech_fsmn_vad_zh-cn-16k-common-pytorch",
)

res = model.generate(input="https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav", language=None)
res = model.generate(
language=None,
task="transcribe",
batch_size_s=0,
input="https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav")

print(res)
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,11 @@
# model = AutoModel(model="Whisper-small", hub="openai")
# model = AutoModel(model="Whisper-medium", hub="openai")
# model = AutoModel(model="Whisper-large-v2", hub="openai")
model = AutoModel(model="Whisper-large-v3", hub="openai")
model = AutoModel(model="Whisper-large-v3", hub="openai", vad_model="iic/speech_fsmn_vad_zh-cn-16k-common-pytorch",)

res = model.generate(
language=None,
task="transcribe",
batch_size_s=0,
input="https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav")
print(res)
2 changes: 1 addition & 1 deletion funasr/auto/auto_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ def inference_with_vad(self, input, input_len=None, **cfg):
# step.2 compute asr model
model = self.model
deep_update(kwargs, cfg)
batch_size = int(kwargs.get("batch_size_s", 300))*1000
batch_size = max(int(kwargs.get("batch_size_s", 300))*1000, 1)
batch_size_threshold_ms = int(kwargs.get("batch_size_threshold_s", 60))*1000
kwargs["batch_size"] = batch_size

Expand Down
Loading