@@ -120,11 +120,43 @@ def o200k_base():
120120 }
121121
122122
123+ def o200k_harmony ():
124+ base_enc = o200k_base ()
125+ name = "o200k_harmony"
126+ pat_str = base_enc ["pat_str" ]
127+ mergeable_ranks = base_enc ["mergeable_ranks" ]
128+ special_tokens = {
129+ ** base_enc ["special_tokens" ],
130+ "<|startoftext|>" : 199998 ,
131+ "<|endoftext|>" : 199999 ,
132+ "<|reserved_200000|>" : 200000 ,
133+ "<|reserved_200001|>" : 200001 ,
134+ "<|return|>" : 200002 ,
135+ "<|constrain|>" : 200003 ,
136+ "<|reserved_200004|>" : 200004 ,
137+ "<|channel|>" : 200005 ,
138+ "<|start|>" : 200006 ,
139+ "<|end|>" : 200007 ,
140+ "<|message|>" : 200008 ,
141+ "<|reserved_200009|>" : 200009 ,
142+ "<|reserved_200010|>" : 200010 ,
143+ "<|reserved_200011|>" : 200011 ,
144+ "<|call|>" : 200012 ,
145+ } | {f"<|reserved_{ i } |>" : i for i in range (200013 , 201088 )}
146+ return {
147+ "name" : name ,
148+ "pat_str" : pat_str ,
149+ "mergeable_ranks" : mergeable_ranks ,
150+ "special_tokens" : special_tokens ,
151+ }
152+
153+
123154ENCODING_CONSTRUCTORS = {
124155 "gpt2" : gpt2 ,
125156 "r50k_base" : r50k_base ,
126157 "p50k_base" : p50k_base ,
127158 "p50k_edit" : p50k_edit ,
128159 "cl100k_base" : cl100k_base ,
129160 "o200k_base" : o200k_base ,
161+ "o200k_harmony" : o200k_harmony ,
130162}
0 commit comments