streaming bugfix (#1271)

LauraGPT · lyblsgo · R1ckShi · web-flow · commit 12496e559fee · 2024-01-18T23:21:12.000+08:00
* funasr1.0 funetine * funasr1.0 pbar * update with main (#1260) * Update websocket_protocol_zh.md * update --------- Co-authored-by: Yabin Li <wucong.lyb@alibaba-inc.com> Co-authored-by: shixian.shi <shixian.shi@alibaba-inc.com> * update with main (#1264) * Funasr1.0 (#1261) * funasr1.0 funetine * funasr1.0 pbar * update with main (#1260) * Update websocket_protocol_zh.md * update --------- Co-authored-by: Yabin Li <wucong.lyb@alibaba-inc.com> Co-authored-by: shixian.shi <shixian.shi@alibaba-inc.com> --------- Co-authored-by: Yabin Li <wucong.lyb@alibaba-inc.com> Co-authored-by: shixian.shi <shixian.shi@alibaba-inc.com> * bug fix --------- Co-authored-by: Yabin Li <wucong.lyb@alibaba-inc.com> Co-authored-by: shixian.shi <shixian.shi@alibaba-inc.com> * funasr1.0 sanm scama * funasr1.0 infer_after_finetune * funasr1.0 fsmn-vad bug fix * funasr1.0 fsmn-vad bug fix * funasr1.0 fsmn-vad bug fix --------- Co-authored-by: Yabin Li <wucong.lyb@alibaba-inc.com> Co-authored-by: shixian.shi <shixian.shi@alibaba-inc.com>
diff --git a/examples/industrial_data_pretraining/paraformer_streaming/demo.py b/examples/industrial_data_pretraining/paraformer_streaming/demo.py
@@ -10,7 +10,6 @@
 decoder_chunk_look_back = 1 #number of encoder chunks to lookback for decoder cross-attention
 
 model = AutoModel(model="damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online", model_revision="v2.0.2")
-cache = {}
 res = model.generate(input="https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav",
             chunk_size=chunk_size,
             encoder_chunk_look_back=encoder_chunk_look_back,
diff --git a/funasr/models/fsmn_vad_streaming/model.py b/funasr/models/fsmn_vad_streaming/model.py
@@ -501,7 +501,9 @@ def forward(self, feats: torch.Tensor, waveform: torch.tensor, cache: dict = {},
 		#     self.AllResetDetection()
 		return segments
 	
+
 	def init_cache(self, cache: dict = {}, **kwargs):
+    
 		cache["frontend"] = {}
 		cache["prev_samples"] = torch.empty(0)
 		cache["encoder"] = {}
@@ -528,7 +530,7 @@ def inference(self,
 	              cache: dict = {},
 	              **kwargs,
 	              ):
-		
+
 		if len(cache) == 0:
 			self.init_cache(cache, **kwargs)
 		
@@ -583,7 +585,7 @@ def inference(self,
 		
 		cache["prev_samples"] = audio_sample[:-m]
 		if _is_final:
-			cache = {}
+			self.init_cache(cache)
 		
 		ibest_writer = None
 		if ibest_writer is None and kwargs.get("output_dir") is not None:
diff --git a/funasr/models/paraformer_streaming/model.py b/funasr/models/paraformer_streaming/model.py
@@ -502,8 +502,7 @@ def inference(self,
             logging.info("enable beam_search")
             self.init_beam_search(**kwargs)
             self.nbest = kwargs.get("nbest", 1)
-        
-
+            
         if len(cache) == 0:
             self.init_cache(cache, **kwargs)