Skip to content

Commit

Permalink
Fixes #9, code clarification
Browse files Browse the repository at this point in the history
  • Loading branch information
JolanThomassin committed Dec 12, 2023
1 parent 5e716ce commit 8d2e67f
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 41 deletions.
23 changes: 1 addition & 22 deletions ailab/db/finesse/test_queries/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,32 +13,11 @@ def get_random_chunk(cursor):
INNER JOIN
"louis_0.0.6".crawl cr ON hc.md5hash = cr.md5hash
WHERE
dc.score > 0.7
dc.score > 0.01
ORDER BY
RANDOM()
LIMIT
1;
"""
cursor.execute(query)
return cursor.fetchall()


def to_delete_fct(cursor):
query = """
SELECT
ch.id AS chunk_score_id,
hc.md5hash AS md5hash_content_to_chunk,
hc.content AS html_content
FROM
"louis_0.0.6".chunk_score ch
LEFT JOIN
"louis_0.0.6".html_content_to_chunk hctc ON ch.id = hctc.chunk_id
LEFT JOIN
"louis_0.0.6".html_content hc ON hctc.md5hash = hc.md5hash
WHERE
ch.score > 0.9
LIMIT
1;
"""
cursor.execute(query)
return cursor.fetchall()
81 changes: 62 additions & 19 deletions bin/search-function-test-utilizing-llm.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
"""
Script Purpose:
This script generates questions based on provided prompts
and stores the responses as JSON files.
It interacts with the AI model to create questions
and saves the relevant data for each question in a JSON file.
Usage:
./search-function-test-utilizing-llm.sh PROMPT_PATH
Parameters:
- PROMPT_PATH: Directory containing the API prompt files
(qna_system_prompt.txt, qna_user_prompt.txt, and JSON template)
"""

import os
import sys
import json
Expand All @@ -11,7 +26,7 @@

# Constants
TEST_VERSION = date.today()
REQUIRED_QUESTIONS = 1
REQUIRED_QUESTIONS = 50
CHARACTER_LIMIT = 14383
STORAGE_PATH = "/home/vscode/finesse-data-2/qna"

Expand All @@ -26,7 +41,7 @@ def load_prompts_and_template(prompt_path):


def construct_user_prompt(user_prompt, random_chunk_str, json_template):
"""Constructs the user prompt using the user prompt, random chunk and json template"""
"""Constructs the user prompt using prompt, chunk and json template"""
return (
f"{user_prompt}\n\nHere is the JSON containing the search:\n{random_chunk_str}"
f"\n\nAnd here is the JSON template:\n{json_template}"
Expand All @@ -42,26 +57,54 @@ def generate_question(system_prompt, user_prompt, json_template, project_db):
if not random_chunk:
print("No chunk found in the database.")
sys.exit(1) # exit the program if chunk is empty

constructed_user_prompt = construct_user_prompt(
user_prompt, str(random_chunk), json_template
)
total_length = len(system_prompt) + len(constructed_user_prompt)
average_tokens += total_length

if total_length < CHARACTER_LIMIT:
response = openai.get_chat_answer(
system_prompt, constructed_user_prompt, 2000
)
data = json.loads(response.choices[0].message.content)
if isinstance(data, dict):
for chunk in random_chunk:
data["text_content"] = chunk["text_content"]
save_response_to_file(data)

chunk_title = ""
for chunk in random_chunk:
chunk_title = chunk["title"]

### TO REMOVE ###
words_to_check = [
"This page is part",
"Cette page fait partie",
"Archivée",
"archivée",
"Archived",
"archived"
]

found_words = []

for word in words_to_check:
if word.lower() in chunk_title.lower():
found_words.append(word)

if found_words:
print("The following words were found in the string:")
for found_word in found_words:
print("-", found_word)
print("Skipping...")
else:
### TO REMOVE ###

constructed_user_prompt = construct_user_prompt(
user_prompt, str(random_chunk), json_template
)
total_length = len(system_prompt) + len(constructed_user_prompt)
average_tokens += total_length

if total_length < CHARACTER_LIMIT:
response = openai.get_chat_answer(
system_prompt, constructed_user_prompt, 2000
)
data = json.loads(response.choices[0].message.content)
if isinstance(data, dict):
for chunk in random_chunk:
data["text_content"] = chunk["text_content"]
save_response_to_file(data)

return average_tokens / REQUIRED_QUESTIONS


def save_response_to_file(data):
"""Saves the provided data to a new file"""
file_number = 1
Expand Down

0 comments on commit 8d2e67f

Please sign in to comment.