Skip to content

Commit

Permalink
Update README.md
Browse files Browse the repository at this point in the history
  • Loading branch information
luciaquirke authored Aug 13, 2024
1 parent f3f8d4d commit 934012b
Showing 1 changed file with 8 additions and 6 deletions.
14 changes: 8 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,19 +78,21 @@ index = MemmapIndex(
```python
# Count how often each token in the corpus succeeds "hello world".
print(index.count_next(tokenizer.encode("hello world")))

# Parallelise over queries
print(index.batch_count_next(
[tokenizer.encode("hello world"), tokenizer.encode("hello universe")]
))

# Get smoothed probabilities for the continuation of each query.
print(index.smoothed_probs(tokenizer.encode("hello world")))
print(index.batch_smoothed_probs(
[tokenizer.encode("hello world"), tokenizer.encode("hello universe")]
))

# Autoregressively sample 10 tokens using 5-gram language statistics. Initial
# gram statistics are derived from the query, with lower order gram statistics used
# until the sequence contains at least 5 tokens.
print(index.sample(tokenizer.encode("hello world"), n=5, k=10))

# Parallelize over sequence generations
print(index.batch_sample(tokenizer.encode("hello world"), n=5, k=10, num_samples=20))
print(index.sample_smoothed(tokenizer.encode("hello world"), n=5, k=10, num_samples=20))
print(index.sample_unsmoothed(tokenizer.encode("hello world"), n=5, k=10, num_samples=20))

# Query whether the corpus contains "hello world"
print(index.contains(tokenizer.encode("hello world")))
Expand Down

0 comments on commit 934012b

Please sign in to comment.