From f284f1c2772223db4dfbddb67bda76188a9ff1a3 Mon Sep 17 00:00:00 2001 From: Srikar Bhargav Durgi Date: Wed, 15 May 2024 15:00:57 +0530 Subject: [PATCH] fix typo in comment relating regex in encoder.py --- src/encoder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/encoder.py b/src/encoder.py index 5f52e723c..a40f75819 100644 --- a/src/encoder.py +++ b/src/encoder.py @@ -49,7 +49,7 @@ def __init__(self, encoder, bpe_merges, errors='replace'): self.bpe_ranks = dict(zip(bpe_merges, range(len(bpe_merges)))) self.cache = {} - # Should haved added re.IGNORECASE so BPE merges can happen for capitalized versions of contractions + # Should have added re.IGNORECASE so BPE merges can happen for capitalized versions of contractions self.pat = re.compile(r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""") def bpe(self, token):