Skip to content

Commit

Permalink
quicksave before large reformatting
Browse files Browse the repository at this point in the history
  • Loading branch information
latekvo committed Apr 15, 2024
1 parent 56cd69e commit b4800cc
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 80 deletions.
129 changes: 71 additions & 58 deletions core/chainables/web.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,79 +6,92 @@


def web_news_lookup(prompt_text: str):
query = WebQuery('news', prompt_core=prompt_text)
query = WebQuery("news", prompt_core=prompt_text)
return web_query_google_lookup(query)


def web_wiki_lookup(prompt_text: str):
query = WebQuery('wiki', prompt_core=prompt_text)
query = WebQuery("wiki", prompt_core=prompt_text)
return web_query_google_lookup(query)


def web_docs_lookup(prompt_text: str):
query = WebQuery('docs', prompt_core=prompt_text)
query = WebQuery("docs", prompt_core=prompt_text)
return web_query_google_lookup(query)


def web_docs_lookup_prompt():
return ChatPromptTemplate.from_messages([
(
"system",
"You are a search results interpreter."
"Your job is to write an detailed instruction based on the provided context. "
"Your job is to convert all the search results you were given into a long, comprehensive and clean output. "
"Use provided search results data to explain object of user request to the best of your ability. "
"You don't have a knowledge cutoff. "
"It is currently " + datetime.date.today().strftime("%B %Y"),
),
(
"user",
"Search results data: "
"```"
"{search_data}"
"```"
'User request: "Write an article on: {user_request}"',
),
])
return ChatPromptTemplate.from_messages(
[
(
"system",
"You are a search results interpreter."
"Your job is to write an detailed instruction based on the provided context. "
"Your job is to convert all the search results you were given into a long, "
"comprehensive and clean output. "
"Use context data to explain "
"the topic of user request to the best of your ability. "
"You don't have a knowledge cutoff. "
"It is currently " + datetime.date.today().strftime("%B %Y"),
),
(
"user",
"Search results data: "
"```"
"{search_data}"
"```"
'User request: "Write an article on: {user_request}"',
),
]
)


def web_wiki_lookup_prompt():
return ChatPromptTemplate.from_messages([
(
"system",
"You are a search results interpreter. Your job is to write an article based on the provided context. "
"Your job is to convert all the search results you were given into a long, comprehensive and clean output. "
"Use provided search results data to answer the user request to the best of your ability. "
"You don't have a knowledge cutoff. "
"It is currently " + datetime.date.today().strftime("%B %Y"),
),
(
"user",
"Search results data: "
"```"
"{search_data}"
"```"
'User request: "Write an article on: {user_request}"',
),
])
return ChatPromptTemplate.from_messages(
[
(
"system",
"You are a search results interpreter. "
"Your job is to write an article based on the provided context. "
"Your job is to convert all the search results you were given into a long, "
"comprehensive and clean output. "
"Use context data to answer "
"the user request to the best of your ability. "
"You don't have a knowledge cutoff. "
"It is currently " + datetime.date.today().strftime("%B %Y"),
),
(
"user",
"Search results data: "
"```"
"{search_data}"
"```"
'User request: "Write an article on: {user_request}"',
),
]
)


def web_news_lookup_prompt():
return ChatPromptTemplate.from_messages([
(
"system",
"You are a search results interpreter. Your job is to write an article based on the provided context. "
"Your job is to convert all the search results you were given into a long, comprehensive and clean output. "
"Use provided search results data to answer the user request to the best of your ability. "
"You don't have a knowledge cutoff. "
"It is currently " + datetime.date.today().strftime("%B %Y"),
),
(
"user",
"Search results data: "
"```"
"{search_data}"
"```"
'User request: "Write an article on: {user_request}"',
),
])
return ChatPromptTemplate.from_messages(
[
(
"system",
"You are a search results interpreter. "
"Your job is to write an article based on the provided context. "
"Your job is to convert all the search results you were given into a long, "
"comprehensive and clean output. "
"Use provided context to answer the user request to the best of your ability. "
"You don't have a knowledge cutoff. "
"It is currently " + datetime.date.today().strftime("%B %Y"),
),
(
"user",
"Search results data: "
"```"
"{search_data}"
"```"
'User request: "Write an article on: {user_request}"',
),
]
)
49 changes: 27 additions & 22 deletions core/classes/query.py
Original file line number Diff line number Diff line change
@@ -1,63 +1,68 @@
import datetime
from typing import Literal, Union

from core.tools import utils


class WebQuery:
"""class for bundling all data required for embedding and search operations"""
# Small chunks make it impossible to deduct full context in presence of millions of other unrelated texts

# Small chunks make it impossible to deduct full context
# in presence of millions of other unrelated texts
# Small chunks are meaningful only when talking about a single topic
_DEFAULT_INFO_CHUNK_LENGTH = 800
_DEFAULT_STORY_CHUNK_LENGTH = 1200
_DEFAULT_PRIORITY = 1

query_type: str

prompt_core: str = ''
prompt_core: str = ""

web_query: str = ''
web_query: str = ""

web_extra_params: Union[dict, None] = None
web_tbs = 0

db_search_query: str = '' # query to search by
db_embedding_prefix: str = '' # prefixed to each article saved to faiss db
db_embedding_postfix: str = '' # postfixed -||-
db_save_file_extension: str = '' # most types will have dedicated db for them
db_search_query: str = "" # query to search by
db_embedding_prefix: str = "" # prefixed to each article saved to faiss db
db_embedding_postfix: str = "" # postfixed -||-
db_save_file_extension: str = "" # most types will have dedicated db for them
db_chunk_size: int = 600 # legacy default

def __init__(self,
query_type: Literal['basic', 'wiki', 'news', 'docs'],
prompt_core: str,
priority: int = _DEFAULT_PRIORITY):
def __init__(
self,
query_type: Literal["basic", "wiki", "news", "docs"],
prompt_core: str,
priority: int = _DEFAULT_PRIORITY,
):

self.query_type = query_type
self.prompt_core = prompt_core
self.db_embed_query = prompt_core # query to search by
self.priority = priority

if query_type == 'basic':
if query_type == "basic":
self.web_query = prompt_core
self.db_chunk_size = 800

elif query_type == 'wiki':
elif query_type == "wiki":
# deprecated, use 'basic'
self.web_query = 'wikipedia ' + prompt_core
self.db_save_file_extension = '_facts'
self.web_query = "wikipedia " + prompt_core
self.db_save_file_extension = "_facts"
self.db_chunk_size = 600

elif query_type == 'news':
elif query_type == "news":
# this prompt works well for Google News searches
self.web_query = f"{prompt_core} news comprehensive overview "
self.web_extra_params = {
'tbm': 'nws', # news only
"tbm": "nws", # news only
}
self.web_tbs = 'qdr:m' # last month only
self.web_tbs = "qdr:m" # last month only
self.db_search_query = f"{prompt_core} news and innovations"
self.db_save_file_extension = f"_news_{datetime.date.today().strftime('%Y_%m_%d').lower()}"
self.db_save_file_extension = f"_news_{utils.gen_unix_time()}"
self.db_chunk_size = 1200

elif query_type == 'docs':
self.web_query = 'documentation for ' + prompt_core
elif query_type == "docs":
self.web_query = "documentation for " + prompt_core
self.db_save_file_extension = "_docs"
self.db_chunk_size = 600

0 comments on commit b4800cc

Please sign in to comment.