Update README.md

EleutherAI · Aug 13, 2024 · 934012b · 934012b
1 parent f3f8d4d
commit 934012b
Showing 1 changed file with 8 additions and 6 deletions.
diff --git a/README.md b/README.md
@@ -78,19 +78,21 @@ index = MemmapIndex(
 ```python
 # Count how often each token in the corpus succeeds "hello world".
 print(index.count_next(tokenizer.encode("hello world")))
-
-# Parallelise over queries
 print(index.batch_count_next(
     [tokenizer.encode("hello world"), tokenizer.encode("hello universe")]
 ))
 
+# Get smoothed probabilities for the continuation of each query.
+print(index.smoothed_probs(tokenizer.encode("hello world")))
+print(index.batch_smoothed_probs(
+    [tokenizer.encode("hello world"), tokenizer.encode("hello universe")]
+))
+
 # Autoregressively sample 10 tokens using 5-gram language statistics. Initial
 # gram statistics are derived from the query, with lower order gram statistics used 
 # until the sequence contains at least 5 tokens.
-print(index.sample(tokenizer.encode("hello world"), n=5, k=10))
-
-# Parallelize over sequence generations
-print(index.batch_sample(tokenizer.encode("hello world"), n=5, k=10, num_samples=20))
+print(index.sample_smoothed(tokenizer.encode("hello world"), n=5, k=10, num_samples=20))
+print(index.sample_unsmoothed(tokenizer.encode("hello world"), n=5, k=10, num_samples=20))
 
 # Query whether the corpus contains "hello world"
 print(index.contains(tokenizer.encode("hello world")))