From 8983aa4039dd05be6b9183d7d366d55e8373bf31 Mon Sep 17 00:00:00 2001 From: WINLAIC Date: Wed, 30 Oct 2024 21:19:58 +0800 Subject: [PATCH] Fixed the problem that the duplicated characters would be merged as one. (#669) --- sherpa/csrc/offline-ctc-one-best-decoder.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sherpa/csrc/offline-ctc-one-best-decoder.cc b/sherpa/csrc/offline-ctc-one-best-decoder.cc index 5f11692e5..a46cacdcd 100644 --- a/sherpa/csrc/offline-ctc-one-best-decoder.cc +++ b/sherpa/csrc/offline-ctc-one-best-decoder.cc @@ -50,6 +50,8 @@ std::vector OfflineCtcOneBestDecoder::Decode( OfflineCtcDecoderResult *p = results.data(); + bool last_token_is_blank = false; + for (int32_t i = 0, t = 0; i != labels.numel(); ++i) { int32_t token = acc[i]; @@ -63,9 +65,10 @@ std::vector OfflineCtcOneBestDecoder::Decode( if (token == 0) { ++t; + last_token_is_blank = true; continue; } - if (t != 0 && !p->tokens.empty() && token == p->tokens.back()) { + if (t != 0 && !p->tokens.empty() && token == p->tokens.back() && (!last_token_is_blank)) { // This is a repeat, skip it. ++t; continue; @@ -74,6 +77,7 @@ std::vector OfflineCtcOneBestDecoder::Decode( p->tokens.push_back(token); p->timestamps.push_back(t); ++t; + last_token_is_blank = false; } // for (int32_t i = 0, t = 0; i != labels.numel(); ++i) return results;