dottxt-ai · cpfiffer · Oct 17, 2024 · Oct 3, 2024 · Oct 4, 2024 · Oct 4, 2024
diff --git a/.gitignore b/.gitignore
@@ -1 +1,7 @@
 */__pycache__/
+lore-generator/milvusdemo.db
+lore-generator/.env
+lore-generator/slides_files/
+*.db
+*.html
+*.lock
diff --git a/lore-generator/README.md b/lore-generator/README.md
@@ -0,0 +1,72 @@
+# Loremaster 6000
+
+Loremaster 6000 is a tool for generating worlds and historical events in that world.
+
+Loremaster is built to showcase
+
+- [Outlines](https://github.com/dottxt-ai/outlines)
+- [Milvus](https://milvus.io/)
+- Agent frameworks
+- RAG with vector databases
+
+## Features
+
+- Generates a new fictional world with a setting and description.
+- Proposes new lore entries for the world, and refines them based on information requests and search results from the Milvus database.
+- Inserts the refined lore entries into the Milvus database for future use.
+- Provides a rich text-based interface with panels and markdown formatting for displaying the world, lore entries, and search results.
+
+## Requirements
+
+- Python 3.7 or higher
+- Packages:
+  - `dotenv`
+  - `pydantic`
+  - `pymilvus`
+  - `requests`
+  - `rich`
+  - `sentence_transformers`
+  - `outlines`
+
+## Usage
+
+1. Clone the repository:
+   ```
+   git clone https://github.com/dottxt-ai/demos.git
+   cd demos/lore-generator
+   ```
+
+2. Install the required packages:
+   ```
+   pip install -r requirements.txt
+   ```
+
+3. Run the script:
+   ```
+   python main.py
+   ```
+
+   The script will prompt you to provide a seed for the world. After that, it will generate a new world, propose lore entries, and refine them based on the information in the Milvus database.
+
+## How it Works
+
+1. The script sets up the Milvus vector database and the embedding and language models.
+2. The user provides a seed for the world, which is used to generate a new fictional world.
+3. The script proposes a new lore entry for the world and retrieves relevant information from the Milvus database.
+4. The language model refines the lore entry proposal based on the retrieved information and the world description.
+5. The refined lore entry is then inserted into the Milvus database.
+6. The process repeats, generating new lore entries and refining them based on the existing lore.
+
+The script uses the `outlines` library to generate the world and lore entries, and the `sentence_transformers` library to encode the lore entries for storage in the Milvus database.
+
+## Contributing
+
+If you find any issues or have suggestions for improvements, feel free to open an issue or submit a pull request.
+
+The Loremaster 6000 could easily be made more interactive, more detailed, or more interesting with better prompting/structure/etc.
+
+Go play with it!
+
+## License
+
+This project is licensed under the [MIT License](LICENSE).
diff --git a/lore-generator/api.py b/lore-generator/api.py
@@ -0,0 +1,131 @@
+import hashlib
+import json
+import os
+import time
+import requests
+from dotenv import load_dotenv
+from typing import Optional
+from requests.exceptions import HTTPError
+
+load_dotenv(override=True)
+
+# Create schema-to-js_id mapping
+API_HOST = os.environ.get("DOTTXT_API_HOST", "api.dottxt.co")
+API_KEY = os.environ.get("DOTTXT_API_KEY", None)
+
+def check_api_key() -> None:
+    if not API_KEY:
+        raise ValueError("DOTTXT_API_KEY environment variable is not set")
+
+def get_headers(api_key: Optional[str] = None) -> dict:
+    if api_key is None:
+        check_api_key()
+        api_key = API_KEY
+    return {"Authorization": f"Bearer {api_key}"}
+
+SCHEMA_HASH_TO_COMPLETION_URL = {}
+
+def to_hash(pydantic_class):
+    schema = pydantic_class.model_json_schema()
+    schema_string = json.dumps(schema)
+    return hashlib.sha256(schema_string.encode()).hexdigest()
+
+def poll_status(url: str, api_key: Optional[str] = None) -> dict:
+    headers = get_headers(api_key)
+    while True:
+        status_res = requests.get(url, headers=headers)
+        status_json = status_res.json()
+        if status_res.status_code != 200 or status_json["status"] != "in_progress":
+            break
+        time.sleep(1)
+    return status_json
+
+def get_schema_by_name(name: str, api_key: Optional[str] = None) -> Optional[dict]:
+    headers = get_headers(api_key)
+    try:
+        response = requests.get(f"https://{API_HOST}/v1/json-schemas", headers=headers)
+        response.raise_for_status()
+        schemas = response.json()['items']
+
+        for schema in schemas:
+            if schema['name'] == name:
+                return schema
+        return None
+    except HTTPError as e:
+        if e.response.status_code == 403:
+            raise ValueError("Authentication failed. Please check your API key.") from e
+        else:
+            raise
+    except Exception as e:
+        raise
+
+
+def create_schema(schema: str, name: str, api_key: Optional[str] = None) -> dict:
+    data = {"name": name, "json_schema": schema}
+    headers = get_headers(api_key)
+    try:
+        response = requests.post(
+            f"https://{API_HOST}/v1/json-schemas", 
+            headers=headers, 
+            json=data
+        )
+        response.raise_for_status()
+        return response.json()
+    except HTTPError as e:
+        if e.response.status_code == 403:
+            raise ValueError("Authentication failed. Please check your API key.") from e
+        else:
+            raise
+    except Exception as e:
+        raise
+
+
+def get_completion_endpoint(model_class, api_key: Optional[str] = None):
+    schema_hash = to_hash(model_class)
+
+    if schema_hash in SCHEMA_HASH_TO_COMPLETION_URL:
+        completion_url = SCHEMA_HASH_TO_COMPLETION_URL[schema_hash]
+        return completion_url
+
+    # Check next to see if the schema_has is already stored by checking
+    # GET https://api.dottxt.co/v1/json-schemas
+    schema_response = get_schema_by_name(schema_hash, api_key)
+
+    # If the schema exists poll the status and return the completion URL
+    if schema_response:
+        status_url = schema_response["status_url"]
+        final_status = poll_status(status_url, api_key)
+        completion_url = final_status["completion_url"]
+        if completion_url:
+            SCHEMA_HASH_TO_COMPLETION_URL[schema_hash] = completion_url
+            return completion_url
+
+    # Okay, we don't have a completion URL for this schema. Let's create it.
+    schema_string = json.dumps(model_class.model_json_schema())
+    schema_response = create_schema(schema_string, schema_hash, api_key)
+
+    # If we get here, we need to wait for the schema to be created
+    status_url = schema_response["status_url"]
+    final_status = poll_status(status_url, api_key)
+
+    completion_url = final_status["completion_url"]
+    if not completion_url:
+        raise ValueError(f"No completion URL available for schema: {schema_hash}")
+
+    SCHEMA_HASH_TO_COMPLETION_URL[schema_hash] = completion_url
+    return completion_url
+
+def create_completion(model_class, prompt: str, max_tokens: int = 30000, api_key: Optional[str] = None):
+    completion_url = get_completion_endpoint(model_class, api_key)
+    data = {"prompt": prompt, "max_tokens": max_tokens}
+    headers = get_headers(api_key)
+    completion_response = requests.post(completion_url, headers=headers, json=data)
+    completion_response.raise_for_status()
+
+    # get json
+    completion_response_json = completion_response.json()
+
+    # convert to pydantic model
+    model = model_class.model_validate_json(completion_response_json['data'])
+
+    return model
diff --git a/lore-generator/custom.scss b/lore-generator/custom.scss
@@ -0,0 +1,54 @@
+/*-- scss:defaults --*/
+
+$body-bg: #1d1d1b;
+$body-color: #fff;
+$link-color: #42affa;
+
+$font-family-sans-serif: "PP Neue Montreal", sans-serif;
+$font-family-monospace: monospace;
+/*-- scss:rules --*/
+
+.reveal .slide blockquote {
+  border-left: 3px solid $text-muted;
+  padding-left: 0.5em;
+}
+
+body {
+  font-family: "PP Neue Montreal", sans-serif;
+}
+
+code, pre, .sourceCode {
+    font-family: monospace;
+    border: none !important;
+    max-height: 100% !important;
+}
+
+.reveal pre {
+  font-family: monospace;
+  display: flex;
+  justify-content: center;
+  align-items: center;
+}
+
+.reveal code {
+  width: fit-content;
+}
+
+// .sourceCode {
+//   // Centers the code blocks in the slides
+//   display: flex;
+//   justify-content: center;
+//   width: 100%;
+// }
+
+.quarto-title-affiliation {
+  font-size: 1.5em !important;
+  margin-bottom: 0 !important;
+  margin-top: 0 !important;
+}
+
+
+.title-slide {
+  text-align: center;
+}
+
diff --git a/lore-generator/db-explorer.py b/lore-generator/db-explorer.py
@@ -0,0 +1,68 @@
+from pymilvus import MilvusClient
+from pymilvus import model
+from rich import print
+from rich.panel import Panel
+
+# Initialize the Milvus client
+client = MilvusClient("milvusdemo.db")
+
+# Initialize the embedding function
+embedding_fn = model.DefaultEmbeddingFunction()
+
+def semantic_search(query: str, limit: int = 5):
+    """
+    Perform a semantic search on the 'lore' collection.
+
+    Args:
+    query (str): The search query.
+    limit (int): The maximum number of results to return.
+
+    Returns:
+    list: A list of search results.
+    """
+    # Embed the query
+    query_vector = embedding_fn.encode_documents([query])[0]
+
+    # Perform the search
+    results = client.search(
+        collection_name="lore",
+        data=[query_vector],
+        output_fields=["name", "content", "keywords"],
+        limit=limit,
+    )
+
+    return results[0]  # Return the first (and only) query result
+
+def main():
+    # Get collection statistics
+    stats = client.get_collection_stats("lore")
+    total_records = stats["row_count"]
+
+    print(f"Welcome to the Lore Explorer!")
+    print(f"Total records in the database: {total_records}\n")
+
+    while True:
+        # Get user input
+        query = input("Enter your search query (or 'quit' to exit): ")
+
+        if query.lower() == 'quit':
+            break
+
+        # Perform the search
+        results = semantic_search(query)
+
+        # Display results
+        print(f"\nSearch results for: '{query}'\n")
+        for i, result in enumerate(results, 1):
+            entity = result['entity']
+            print(Panel.fit(
+                f"[bold]Name:[/bold] {entity['name']}\n\n"
+                f"[bold]Content:[/bold] {entity['content']}\n\n"
+                f"[bold]Keywords:[/bold] {', '.join(entity['keywords'])}\n\n"
+                f"[bold]Distance:[/bold] {result['distance']}",
+                title=f"Result {i}"
+            ))
+        print("\n")
+
+if __name__ == "__main__":
+    main()