Skip to content

Commit

Permalink
Fixed the problem that the duplicated characters would be merged as o…
Browse files Browse the repository at this point in the history
…ne. (#669)
  • Loading branch information
winlaic authored Oct 30, 2024
1 parent ba01902 commit 8983aa4
Showing 1 changed file with 5 additions and 1 deletion.
6 changes: 5 additions & 1 deletion sherpa/csrc/offline-ctc-one-best-decoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ std::vector<OfflineCtcDecoderResult> OfflineCtcOneBestDecoder::Decode(

OfflineCtcDecoderResult *p = results.data();

bool last_token_is_blank = false;

for (int32_t i = 0, t = 0; i != labels.numel(); ++i) {
int32_t token = acc[i];

Expand All @@ -63,9 +65,10 @@ std::vector<OfflineCtcDecoderResult> OfflineCtcOneBestDecoder::Decode(

if (token == 0) {
++t;
last_token_is_blank = true;
continue;
}
if (t != 0 && !p->tokens.empty() && token == p->tokens.back()) {
if (t != 0 && !p->tokens.empty() && token == p->tokens.back() && (!last_token_is_blank)) {
// This is a repeat, skip it.
++t;
continue;
Expand All @@ -74,6 +77,7 @@ std::vector<OfflineCtcDecoderResult> OfflineCtcOneBestDecoder::Decode(
p->tokens.push_back(token);
p->timestamps.push_back(t);
++t;
last_token_is_blank = false;
} // for (int32_t i = 0, t = 0; i != labels.numel(); ++i)

return results;
Expand Down

0 comments on commit 8983aa4

Please sign in to comment.