Skip to content

Commit e4b3333

Browse files
authored
Lore generator (#2)
* setting up the lore generator code * more intermediate code * strugglin' to make modal work * mas stuff * switch to lm studio * match new brand guidelines * finish slides? * tidy main code * formatting code * slides polishing * more commenting * update requirements * cleanming requirements * Update main code for the presentation * better split of code, use local inference * polish all the code + add README * final touches
1 parent 245ebb4 commit e4b3333

File tree

10 files changed

+1027
-2
lines changed

10 files changed

+1027
-2
lines changed

.gitignore

+6
Original file line numberDiff line numberDiff line change
@@ -1 +1,7 @@
11
*/__pycache__/
2+
lore-generator/milvusdemo.db
3+
lore-generator/.env
4+
lore-generator/slides_files/
5+
*.db
6+
*.html
7+
*.lock

lore-generator/README.md

+72
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
# Loremaster 6000
2+
3+
Loremaster 6000 is a tool for generating worlds and historical events in that world.
4+
5+
Loremaster is built to showcase
6+
7+
- [Outlines](https://github.com/dottxt-ai/outlines)
8+
- [Milvus](https://milvus.io/)
9+
- Agent frameworks
10+
- RAG with vector databases
11+
12+
## Features
13+
14+
- Generates a new fictional world with a setting and description.
15+
- Proposes new lore entries for the world, and refines them based on information requests and search results from the Milvus database.
16+
- Inserts the refined lore entries into the Milvus database for future use.
17+
- Provides a rich text-based interface with panels and markdown formatting for displaying the world, lore entries, and search results.
18+
19+
## Requirements
20+
21+
- Python 3.7 or higher
22+
- Packages:
23+
- `dotenv`
24+
- `pydantic`
25+
- `pymilvus`
26+
- `requests`
27+
- `rich`
28+
- `sentence_transformers`
29+
- `outlines`
30+
31+
## Usage
32+
33+
1. Clone the repository:
34+
```
35+
git clone https://github.com/dottxt-ai/demos.git
36+
cd demos/lore-generator
37+
```
38+
39+
2. Install the required packages:
40+
```
41+
pip install -r requirements.txt
42+
```
43+
44+
3. Run the script:
45+
```
46+
python main.py
47+
```
48+
49+
The script will prompt you to provide a seed for the world. After that, it will generate a new world, propose lore entries, and refine them based on the information in the Milvus database.
50+
51+
## How it Works
52+
53+
1. The script sets up the Milvus vector database and the embedding and language models.
54+
2. The user provides a seed for the world, which is used to generate a new fictional world.
55+
3. The script proposes a new lore entry for the world and retrieves relevant information from the Milvus database.
56+
4. The language model refines the lore entry proposal based on the retrieved information and the world description.
57+
5. The refined lore entry is then inserted into the Milvus database.
58+
6. The process repeats, generating new lore entries and refining them based on the existing lore.
59+
60+
The script uses the `outlines` library to generate the world and lore entries, and the `sentence_transformers` library to encode the lore entries for storage in the Milvus database.
61+
62+
## Contributing
63+
64+
If you find any issues or have suggestions for improvements, feel free to open an issue or submit a pull request.
65+
66+
The Loremaster 6000 could easily be made more interactive, more detailed, or more interesting with better prompting/structure/etc.
67+
68+
Go play with it!
69+
70+
## License
71+
72+
This project is licensed under the [MIT License](LICENSE).

lore-generator/api.py

+131
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
import hashlib
2+
import json
3+
import os
4+
import time
5+
import requests
6+
from dotenv import load_dotenv
7+
from typing import Optional
8+
from requests.exceptions import HTTPError
9+
10+
load_dotenv(override=True)
11+
12+
# Create schema-to-js_id mapping
13+
API_HOST = os.environ.get("DOTTXT_API_HOST", "api.dottxt.co")
14+
API_KEY = os.environ.get("DOTTXT_API_KEY", None)
15+
16+
def check_api_key() -> None:
17+
if not API_KEY:
18+
raise ValueError("DOTTXT_API_KEY environment variable is not set")
19+
20+
def get_headers(api_key: Optional[str] = None) -> dict:
21+
if api_key is None:
22+
check_api_key()
23+
api_key = API_KEY
24+
return {"Authorization": f"Bearer {api_key}"}
25+
26+
SCHEMA_HASH_TO_COMPLETION_URL = {}
27+
28+
def to_hash(pydantic_class):
29+
schema = pydantic_class.model_json_schema()
30+
schema_string = json.dumps(schema)
31+
return hashlib.sha256(schema_string.encode()).hexdigest()
32+
33+
def poll_status(url: str, api_key: Optional[str] = None) -> dict:
34+
headers = get_headers(api_key)
35+
while True:
36+
status_res = requests.get(url, headers=headers)
37+
status_json = status_res.json()
38+
if status_res.status_code != 200 or status_json["status"] != "in_progress":
39+
break
40+
time.sleep(1)
41+
return status_json
42+
43+
def get_schema_by_name(name: str, api_key: Optional[str] = None) -> Optional[dict]:
44+
headers = get_headers(api_key)
45+
try:
46+
response = requests.get(f"https://{API_HOST}/v1/json-schemas", headers=headers)
47+
response.raise_for_status()
48+
schemas = response.json()['items']
49+
50+
for schema in schemas:
51+
if schema['name'] == name:
52+
return schema
53+
return None
54+
except HTTPError as e:
55+
if e.response.status_code == 403:
56+
raise ValueError("Authentication failed. Please check your API key.") from e
57+
else:
58+
raise
59+
except Exception as e:
60+
raise
61+
62+
63+
def create_schema(schema: str, name: str, api_key: Optional[str] = None) -> dict:
64+
data = {"name": name, "json_schema": schema}
65+
headers = get_headers(api_key)
66+
try:
67+
response = requests.post(
68+
f"https://{API_HOST}/v1/json-schemas",
69+
headers=headers,
70+
json=data
71+
)
72+
response.raise_for_status()
73+
return response.json()
74+
except HTTPError as e:
75+
if e.response.status_code == 403:
76+
raise ValueError("Authentication failed. Please check your API key.") from e
77+
else:
78+
raise
79+
except Exception as e:
80+
raise
81+
82+
83+
def get_completion_endpoint(model_class, api_key: Optional[str] = None):
84+
schema_hash = to_hash(model_class)
85+
86+
if schema_hash in SCHEMA_HASH_TO_COMPLETION_URL:
87+
completion_url = SCHEMA_HASH_TO_COMPLETION_URL[schema_hash]
88+
return completion_url
89+
90+
# Check next to see if the schema_has is already stored by checking
91+
# GET https://api.dottxt.co/v1/json-schemas
92+
schema_response = get_schema_by_name(schema_hash, api_key)
93+
94+
# If the schema exists poll the status and return the completion URL
95+
if schema_response:
96+
status_url = schema_response["status_url"]
97+
final_status = poll_status(status_url, api_key)
98+
completion_url = final_status["completion_url"]
99+
if completion_url:
100+
SCHEMA_HASH_TO_COMPLETION_URL[schema_hash] = completion_url
101+
return completion_url
102+
103+
# Okay, we don't have a completion URL for this schema. Let's create it.
104+
schema_string = json.dumps(model_class.model_json_schema())
105+
schema_response = create_schema(schema_string, schema_hash, api_key)
106+
107+
# If we get here, we need to wait for the schema to be created
108+
status_url = schema_response["status_url"]
109+
final_status = poll_status(status_url, api_key)
110+
111+
completion_url = final_status["completion_url"]
112+
if not completion_url:
113+
raise ValueError(f"No completion URL available for schema: {schema_hash}")
114+
115+
SCHEMA_HASH_TO_COMPLETION_URL[schema_hash] = completion_url
116+
return completion_url
117+
118+
def create_completion(model_class, prompt: str, max_tokens: int = 30000, api_key: Optional[str] = None):
119+
completion_url = get_completion_endpoint(model_class, api_key)
120+
data = {"prompt": prompt, "max_tokens": max_tokens}
121+
headers = get_headers(api_key)
122+
completion_response = requests.post(completion_url, headers=headers, json=data)
123+
completion_response.raise_for_status()
124+
125+
# get json
126+
completion_response_json = completion_response.json()
127+
128+
# convert to pydantic model
129+
model = model_class.model_validate_json(completion_response_json['data'])
130+
131+
return model

lore-generator/custom.scss

+54
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/*-- scss:defaults --*/
2+
3+
$body-bg: #1d1d1b;
4+
$body-color: #fff;
5+
$link-color: #42affa;
6+
7+
$font-family-sans-serif: "PP Neue Montreal", sans-serif;
8+
$font-family-monospace: monospace;
9+
/*-- scss:rules --*/
10+
11+
.reveal .slide blockquote {
12+
border-left: 3px solid $text-muted;
13+
padding-left: 0.5em;
14+
}
15+
16+
body {
17+
font-family: "PP Neue Montreal", sans-serif;
18+
}
19+
20+
code, pre, .sourceCode {
21+
font-family: monospace;
22+
border: none !important;
23+
max-height: 100% !important;
24+
}
25+
26+
.reveal pre {
27+
font-family: monospace;
28+
display: flex;
29+
justify-content: center;
30+
align-items: center;
31+
}
32+
33+
.reveal code {
34+
width: fit-content;
35+
}
36+
37+
// .sourceCode {
38+
// // Centers the code blocks in the slides
39+
// display: flex;
40+
// justify-content: center;
41+
// width: 100%;
42+
// }
43+
44+
.quarto-title-affiliation {
45+
font-size: 1.5em !important;
46+
margin-bottom: 0 !important;
47+
margin-top: 0 !important;
48+
}
49+
50+
51+
.title-slide {
52+
text-align: center;
53+
}
54+

lore-generator/db-explorer.py

+68
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
from pymilvus import MilvusClient
2+
from pymilvus import model
3+
from rich import print
4+
from rich.panel import Panel
5+
6+
# Initialize the Milvus client
7+
client = MilvusClient("milvusdemo.db")
8+
9+
# Initialize the embedding function
10+
embedding_fn = model.DefaultEmbeddingFunction()
11+
12+
def semantic_search(query: str, limit: int = 5):
13+
"""
14+
Perform a semantic search on the 'lore' collection.
15+
16+
Args:
17+
query (str): The search query.
18+
limit (int): The maximum number of results to return.
19+
20+
Returns:
21+
list: A list of search results.
22+
"""
23+
# Embed the query
24+
query_vector = embedding_fn.encode_documents([query])[0]
25+
26+
# Perform the search
27+
results = client.search(
28+
collection_name="lore",
29+
data=[query_vector],
30+
output_fields=["name", "content", "keywords"],
31+
limit=limit,
32+
)
33+
34+
return results[0] # Return the first (and only) query result
35+
36+
def main():
37+
# Get collection statistics
38+
stats = client.get_collection_stats("lore")
39+
total_records = stats["row_count"]
40+
41+
print(f"Welcome to the Lore Explorer!")
42+
print(f"Total records in the database: {total_records}\n")
43+
44+
while True:
45+
# Get user input
46+
query = input("Enter your search query (or 'quit' to exit): ")
47+
48+
if query.lower() == 'quit':
49+
break
50+
51+
# Perform the search
52+
results = semantic_search(query)
53+
54+
# Display results
55+
print(f"\nSearch results for: '{query}'\n")
56+
for i, result in enumerate(results, 1):
57+
entity = result['entity']
58+
print(Panel.fit(
59+
f"[bold]Name:[/bold] {entity['name']}\n\n"
60+
f"[bold]Content:[/bold] {entity['content']}\n\n"
61+
f"[bold]Keywords:[/bold] {', '.join(entity['keywords'])}\n\n"
62+
f"[bold]Distance:[/bold] {result['distance']}",
63+
title=f"Result {i}"
64+
))
65+
print("\n")
66+
67+
if __name__ == "__main__":
68+
main()

0 commit comments

Comments
 (0)