From 6b891bf320228e2f30f2a1b05d78e1378abb4d48 Mon Sep 17 00:00:00 2001 From: Robert Date: Thu, 31 Oct 2024 22:36:17 -0700 Subject: [PATCH] RAG Test works now... --- App_Function_Libraries/RAG/RAG_Library_2.py | 17 +- Config_Files/Backup_Config.txt | 7 +- Config_Files/config.txt | 9 +- Tests/RAG/test_RAG_Library_2.py | 658 +++++++------------- 4 files changed, 235 insertions(+), 456 deletions(-) diff --git a/App_Function_Libraries/RAG/RAG_Library_2.py b/App_Function_Libraries/RAG/RAG_Library_2.py index 3af8b92f8..ad80eef1e 100644 --- a/App_Function_Libraries/RAG/RAG_Library_2.py +++ b/App_Function_Libraries/RAG/RAG_Library_2.py @@ -42,6 +42,15 @@ # Read the configuration file config.read('config.txt') + +search_functions = { + "Media DB": search_media_db, + "RAG Chat": search_rag_chat, + "RAG Notes": search_rag_notes, + "Character Chat": search_character_chat, + "Character Cards": search_character_cards +} + # RAG pipeline function for web scraping # def rag_web_scraping_pipeline(url: str, query: str, api_choice=None) -> Dict[str, Any]: # try: @@ -483,14 +492,6 @@ def perform_full_text_search(query: str, database_type: str, relevant_ids: List[ fts_top_k = 10 # Call appropriate search function based on database type - search_functions = { - "Media DB": search_media_db, - "RAG Chat": search_rag_chat, - "RAG Notes": search_rag_notes, - "Character Chat": search_character_chat, - "Character Cards": search_character_cards - } - if database_type not in search_functions: raise ValueError(f"Unsupported database type: {database_type}") diff --git a/Config_Files/Backup_Config.txt b/Config_Files/Backup_Config.txt index 71413766d..3d497b8b8 100644 --- a/Config_Files/Backup_Config.txt +++ b/Config_Files/Backup_Config.txt @@ -17,6 +17,7 @@ mistral_model = mistral-large-latest mistral_api_key = > custom_openai_api_key = custom_openai_api_ip = +default_api = openai [Local-API] kobold_api_IP = http://127.0.0.1:5001/api/v1/generate @@ -44,6 +45,10 @@ words_per_second = 3 save_character_chats = False save_rag_chats = False +[Auto-Save] +save_character_chats = False +save_rag_chats = False + [Prompts] prompt_sample = "What is the meaning of life?" video_summarize_prompt = "Above is the transcript of a video. Please read through the transcript carefully. Identify the main topics that are discussed over the course of the transcript. Then, summarize the key points about each main topic in bullet points. The bullet points should cover the key information conveyed about each topic in the video, but should be much shorter than the full transcript. Please output your bullet point summary inside tags. Do not repeat yourself while writing the summary." @@ -58,7 +63,7 @@ elasticsearch_port = 9200 # Additionally you can use elasticsearch as the database type, just replace `sqlite` with `elasticsearch` for `type` and provide the `elasticsearch_host` and `elasticsearch_port` of your configured ES instance. chroma_db_path = Databases/chroma_db prompts_db_path = Databases/prompts.db -rag_qa_db_path = Databases/rag_qa.db +rag_qa_db_path = Databases/RAG_QA_Chat.db character_db_path = Databases/chatDB.db [Embeddings] diff --git a/Config_Files/config.txt b/Config_Files/config.txt index a0b079af5..45099b81a 100644 --- a/Config_Files/config.txt +++ b/Config_Files/config.txt @@ -17,6 +17,7 @@ mistral_model = mistral-large-latest mistral_api_key = > custom_openai_api_key = custom_openai_api_ip = +default_api = openai [Local-API] kobold_api_IP = http://127.0.0.1:5001/api/v1/generate @@ -42,6 +43,11 @@ processing_choice = cuda chunk_duration = 30 words_per_second = 3 +[Auto-Save] +save_character_chats = False +save_rag_chats = False + + [Prompts] prompt_sample = "What is the meaning of life?" video_summarize_prompt = "Above is the transcript of a video. Please read through the transcript carefully. Identify the main topics that are discussed over the course of the transcript. Then, summarize the key points about each main topic in bullet points. The bullet points should cover the key information conveyed about each topic in the video, but should be much shorter than the full transcript. Please output your bullet point summary inside tags. Do not repeat yourself while writing the summary." @@ -56,7 +62,8 @@ elasticsearch_port = 9200 # Additionally you can use elasticsearch as the database type, just replace `sqlite` with `elasticsearch` for `type` and provide the `elasticsearch_host` and `elasticsearch_port` of your configured ES instance. chroma_db_path = Databases/chroma_db prompts_db_path = Databases/prompts.db -rag_qa_db_path = Databases/rag_qa.db +rag_qa_db_path = Databases/RAG_QA_Chat.db +character_db_path = Databases/chatDB.db [Embeddings] embedding_provider = openai diff --git a/Tests/RAG/test_RAG_Library_2.py b/Tests/RAG/test_RAG_Library_2.py index d0dc7d057..b31353cd1 100644 --- a/Tests/RAG/test_RAG_Library_2.py +++ b/Tests/RAG/test_RAG_Library_2.py @@ -1,9 +1,9 @@ # Tests/RAG/test_rag_functions.py - +import configparser import os import sys -import unittest -from unittest.mock import patch, MagicMock +import pytest +from unittest.mock import MagicMock from typing import List, Dict, Any # Adjust the path to the parent directory of App_Function_Libraries @@ -15,455 +15,221 @@ from App_Function_Libraries.RAG.RAG_Library_2 import ( fetch_relevant_media_ids, perform_vector_search, - perform_full_text_search + perform_full_text_search, + enhanced_rag_pipeline, + enhanced_rag_pipeline_chat, + generate_answer, + fetch_relevant_chat_ids, + fetch_all_chat_ids, + filter_results_by_keywords, + extract_media_id_from_result ) -class TestRAGFunctions(unittest.TestCase): - """ - Unit tests for RAG-related functions. - """ - - @patch('App_Function_Libraries.RAG.RAG_Library_2.fetch_keywords_for_media') - def test_fetch_relevant_media_ids_success(self, mock_fetch_keywords_for_media): - """ - Test fetch_relevant_media_ids with valid keywords. - """ - # Setup mock return values - mock_fetch_keywords_for_media.side_effect = lambda keyword: { +def test_fetch_relevant_media_ids_success(mocker): + """Test fetch_relevant_media_ids with valid keywords.""" + mock_fetch_keywords_for_media = mocker.patch( + 'App_Function_Libraries.RAG.RAG_Library_2.fetch_keywords_for_media', + side_effect=lambda keyword: { 'geography': [1, 2], 'cities': [2, 3, 4] }.get(keyword, []) - - # Input keywords - keywords = ['geography', 'cities'] - - # Call the function - result = fetch_relevant_media_ids(keywords) - - # Expected result is the union of media_ids: [1,2,3,4] - self.assertEqual(sorted(result), [1, 2, 3, 4]) - - # Assert fetch_keywords_for_media was called correctly - mock_fetch_keywords_for_media.assert_any_call('geography') - mock_fetch_keywords_for_media.assert_any_call('cities') - self.assertEqual(mock_fetch_keywords_for_media.call_count, 2) - - @patch('App_Function_Libraries.RAG.RAG_Library_2.fetch_keywords_for_media') - def test_fetch_relevant_media_ids_empty_keywords(self, mock_fetch_keywords_for_media): - """ - Test fetch_relevant_media_ids with an empty keywords list. - """ - keywords = [] - result = fetch_relevant_media_ids(keywords) - self.assertEqual(result, []) - mock_fetch_keywords_for_media.assert_not_called() - - @patch('App_Function_Libraries.RAG.RAG_Library_2.fetch_keywords_for_media') - @patch('App_Function_Libraries.RAG.RAG_Library_2.logging') - def test_fetch_relevant_media_ids_exception(self, mock_logging, mock_fetch_keywords_for_media): - """ - Test fetch_relevant_media_ids when fetch_keywords_for_media raises an exception. - """ - # Configure the mock to raise an exception - mock_fetch_keywords_for_media.side_effect = Exception("Database error") - - keywords = ['geography', 'cities'] - result = fetch_relevant_media_ids(keywords) - - # The function should return an empty list upon exception - self.assertEqual(result, []) - - # Assert that errors were logged for both keywords - mock_logging.error.assert_any_call("Error fetching relevant media IDs for keyword 'geography': Database error") - mock_logging.error.assert_any_call("Error fetching relevant media IDs for keyword 'cities': Database error") - self.assertEqual(mock_logging.error.call_count, 2) - - @patch('App_Function_Libraries.RAG.RAG_Library_2.vector_search') - @patch('App_Function_Libraries.RAG.RAG_Library_2.chroma_client') - def test_perform_vector_search_with_relevant_media_ids(self, mock_chroma_client, mock_vector_search): - """ - Test perform_vector_search with relevant_media_ids provided. - """ - # Setup mock chroma_client to return a list of collections - mock_collection = MagicMock() - mock_collection.name = 'collection1' - mock_chroma_client.list_collections.return_value = [mock_collection] - - # Setup mock vector_search to return search results - mock_vector_search.return_value = [ - {'content': 'Document 1', 'metadata': {'media_id': 1}}, - {'content': 'Document 2', 'metadata': {'media_id': 2}}, - {'content': 'Document 3', 'metadata': {'media_id': 3}}, - ] - - # Input parameters - query = 'sample query' - relevant_media_ids = [1, 3] - - # Call the function - result = perform_vector_search(query, relevant_media_ids) - - # Expected to filter out media_id 2 - expected = [ - {'content': 'Document 1', 'metadata': {'media_id': 1}}, - {'content': 'Document 3', 'metadata': {'media_id': 3}}, - ] - self.assertEqual(result, expected) - - # Assert chroma_client.list_collections was called once - mock_chroma_client.list_collections.assert_called_once() - - # Assert vector_search was called with correct arguments - mock_vector_search.assert_called_once_with('collection1', query, k=10) - - @patch('App_Function_Libraries.RAG.RAG_Library_2.vector_search') - @patch('App_Function_Libraries.RAG.RAG_Library_2.chroma_client') - def test_perform_vector_search_without_relevant_media_ids(self, mock_chroma_client, mock_vector_search): - """ - Test perform_vector_search without relevant_media_ids (None). - """ - # Setup mock chroma_client to return a list of collections - mock_collection = MagicMock() - mock_collection.name = 'collection1' - mock_chroma_client.list_collections.return_value = [mock_collection] - - # Setup mock vector_search to return search results - mock_vector_search.return_value = [ - {'content': 'Document 1', 'metadata': {'media_id': 1}}, - {'content': 'Document 2', 'metadata': {'media_id': 2}}, - ] - - # Input parameters - query = 'sample query' - relevant_media_ids = None - - # Call the function - result = perform_vector_search(query, relevant_media_ids) - - # Expected to return all results - expected = [ - {'content': 'Document 1', 'metadata': {'media_id': 1}}, - {'content': 'Document 2', 'metadata': {'media_id': 2}}, - ] - self.assertEqual(result, expected) - - # Assert chroma_client.list_collections was called once - mock_chroma_client.list_collections.assert_called_once() - - # Assert vector_search was called with correct arguments - mock_vector_search.assert_called_once_with('collection1', query, k=10) - - @patch('App_Function_Libraries.RAG.RAG_Library_2.search_db') - def test_perform_full_text_search_with_relevant_media_ids(self, mock_search_db): - """ - Test perform_full_text_search with relevant_media_ids provided. - """ - # Setup mock search_db to return search results - mock_search_db.return_value = [ - {'content': 'Full text document 1', 'id': 1}, - {'content': 'Full text document 2', 'id': 2}, - {'content': 'Full text document 3', 'id': 3}, - ] - - # Input parameters - query = 'full text query' - relevant_media_ids = [1, 3] - - # Call the function - result = perform_full_text_search(query, relevant_media_ids, fts_top_k=10) - - # Expected to filter out id 2 - expected = [ - {'content': 'Full text document 1', 'metadata': {'media_id': 1}}, - {'content': 'Full text document 3', 'metadata': {'media_id': 3}}, - ] - self.assertEqual(result, expected) - - # Assert search_db was called with correct arguments - mock_search_db.assert_called_once_with( - query, ['content'], '', page=1, results_per_page=10) - - @patch('App_Function_Libraries.RAG.RAG_Library_2.search_db') - def test_perform_full_text_search_without_relevant_media_ids(self, mock_search_db): - """ - Test perform_full_text_search without relevant_media_ids (None). - """ - # Setup mock search_db to return search results - mock_search_db.return_value = [ - {'content': 'Full text document 1', 'id': 1}, - {'content': 'Full text document 2', 'id': 2}, - ] - - # Input parameters - query = 'full text query' - relevant_media_ids = None - - # Call the function - result = perform_full_text_search(query, relevant_media_ids) - - # Expected to return all results - expected = [ - {'content': 'Full text document 1', 'metadata': {'media_id': 1}}, - {'content': 'Full text document 2', 'metadata': {'media_id': 2}}, - ] - self.assertEqual(result, expected) - - # Assert search_db was called with correct arguments - mock_search_db.assert_called_once_with( - query, ['content'], '', page=1, results_per_page=10) - - @patch('App_Function_Libraries.RAG.RAG_Library_2.search_db') - def test_perform_full_text_search_empty_results(self, mock_search_db): - """ - Test perform_full_text_search when search_db returns no results. - """ - # Setup mock search_db to return empty list - mock_search_db.return_value = [] - - # Input parameters - query = 'full text query' - relevant_media_ids = [1, 2] - - # Call the function - result = perform_full_text_search(query, relevant_media_ids) - - # Expected to return an empty list - expected = [] - self.assertEqual(result, expected) - - # Assert search_db was called with correct arguments - mock_search_db.assert_called_once_with( - query, ['content'], '', page=1, results_per_page=10) - - @patch('App_Function_Libraries.RAG.RAG_Library_2.fetch_keywords_for_media') - @patch('App_Function_Libraries.RAG.RAG_Library_2.logging') - def test_fetch_relevant_media_ids_partial_failure(self, mock_logging, mock_fetch_keywords_for_media): - """ - Test fetch_relevant_media_ids when fetch_keywords_for_media partially fails. - """ - - # Configure the mock to raise an exception for one keyword - def side_effect(keyword): - if keyword == 'geography': - return [1, 2] - elif keyword == 'cities': - raise Exception("Database error") - return [] - - mock_fetch_keywords_for_media.side_effect = side_effect - - keywords = ['geography', 'cities'] - result = fetch_relevant_media_ids(keywords) - - # The function should still return media_ids for 'geography' and skip 'cities' - self.assertEqual(sorted(result), [1, 2]) - - # Assert that an error was logged for 'cities' - mock_logging.error.assert_called_once_with( - "Error fetching relevant media IDs for keyword 'cities': Database error") - - @patch('App_Function_Libraries.RAG.RAG_Library_2.chroma_client') - @patch('App_Function_Libraries.RAG.RAG_Library_2.vector_search') - def test_perform_vector_search_no_collections(self, mock_vector_search, mock_chroma_client): - """ - Test perform_vector_search when there are no collections. - """ - # Setup mock chroma_client to return an empty list of collections - mock_chroma_client.list_collections.return_value = [] - - # Input parameters - query = 'sample query' - relevant_media_ids = [1, 2] - - # Call the function - result = perform_vector_search(query, relevant_media_ids) - - # Expected to return an empty list since there are no collections - expected = [] - self.assertEqual(result, expected) - - # Assert chroma_client.list_collections was called once - mock_chroma_client.list_collections.assert_called_once() - - # Assert vector_search was not called since there are no collections - mock_vector_search.assert_not_called() - - @patch('App_Function_Libraries.RAG.RAG_Library_2.fetch_keywords_for_media') - def test_fetch_relevant_media_ids_duplicate_media_ids(self, mock_fetch_keywords_for_media): - """ - Test fetch_relevant_media_ids with duplicate media_ids across keywords. - """ - # Setup mock return values with overlapping media_ids - mock_fetch_keywords_for_media.side_effect = lambda keyword: { - 'science': [1, 2, 3], - 'technology': [3, 4, 5], - 'engineering': [5, 6], - }.get(keyword, []) - - # Input keywords - keywords = ['science', 'technology', 'engineering'] - - # Call the function - result = fetch_relevant_media_ids(keywords) - - # Expected result is the unique union of media_ids: [1,2,3,4,5,6] - self.assertEqual(sorted(result), [1, 2, 3, 4, 5, 6]) - - # Assert fetch_keywords_for_media was called correctly - mock_fetch_keywords_for_media.assert_any_call('science') - mock_fetch_keywords_for_media.assert_any_call('technology') - mock_fetch_keywords_for_media.assert_any_call('engineering') - self.assertEqual(mock_fetch_keywords_for_media.call_count, 3) - - @patch('App_Function_Libraries.RAG.RAG_Library_2.search_db') - def test_perform_full_text_search_case_insensitive_filtering(self, mock_search_db): - """ - Test perform_full_text_search with case-insensitive filtering of media_ids. - """ - # Setup mock search_db to return mixed-case media_ids - mock_search_db.return_value = [ - {'content': 'Full text document 1', 'id': '1'}, - {'content': 'Full text document 2', 'id': '2'}, - {'content': 'Full text document 3', 'id': '3'}, - ] - - # Input parameters with media_ids as strings - query = 'full text query' - relevant_media_ids = ['1', '3'] - - # Call the function - result = perform_full_text_search(query, relevant_media_ids) - - # Expected to filter out id '2' - expected = [ - {'content': 'Full text document 1', 'metadata': {'media_id': '1'}}, - {'content': 'Full text document 3', 'metadata': {'media_id': '3'}}, - ] - self.assertEqual(result, expected) - - # Assert search_db was called with correct arguments - mock_search_db.assert_called_once_with( - query, ['content'], '', page=1, results_per_page=10) - - @patch('App_Function_Libraries.RAG.RAG_Library_2.search_db') - def test_perform_full_text_search_multiple_pages(self, mock_search_db): - """ - Test perform_full_text_search with multiple pages of results. - Note: The current implementation fetches only the first page. - """ - # Setup mock search_db to return results from the first page - mock_search_db.return_value = [ - {'content': 'Full text document 1', 'id': 1}, - {'content': 'Full text document 2', 'id': 2}, - {'content': 'Full text document 3', 'id': 3}, - {'content': 'Full text document 4', 'id': 4}, - {'content': 'Full text document 5', 'id': 5}, - ] - - # Input parameters - query = 'full text query' - relevant_media_ids = [1, 2, 3, 4, 5] - - # Call the function - result = perform_full_text_search(query, relevant_media_ids) - - # Expected to return all results - expected = [ - {'content': 'Full text document 1', 'metadata': {'media_id': 1}}, - {'content': 'Full text document 2', 'metadata': {'media_id': 2}}, - {'content': 'Full text document 3', 'metadata': {'media_id': 3}}, - {'content': 'Full text document 4', 'metadata': {'media_id': 4}}, - {'content': 'Full text document 5', 'metadata': {'media_id': 5}}, - ] - self.assertEqual(result, expected) - - # Assert search_db was called with correct arguments - mock_search_db.assert_called_once_with( - query, ['content'], '', page=1, results_per_page=10) - - @patch('App_Function_Libraries.RAG.RAG_Library_2.chroma_client') - @patch('App_Function_Libraries.RAG.RAG_Library_2.vector_search') - def test_perform_vector_search_multiple_collections(self, mock_vector_search, mock_chroma_client): - """ - Test perform_vector_search with multiple collections. - """ - # Setup mock chroma_client to return multiple collections - mock_collection1 = MagicMock() - mock_collection1.name = 'collection1' - mock_collection2 = MagicMock() - mock_collection2.name = 'collection2' - mock_chroma_client.list_collections.return_value = [mock_collection1, mock_collection2] - - # Setup mock vector_search to return different results for each collection - def vector_search_side_effect(collection_name, query, k): - if collection_name == 'collection1': - return [ - {'content': 'Collection1 Document 1', 'metadata': {'media_id': 1}}, - {'content': 'Collection1 Document 2', 'metadata': {'media_id': 2}}, - ] - elif collection_name == 'collection2': - return [ - {'content': 'Collection2 Document 1', 'metadata': {'media_id': 3}}, - {'content': 'Collection2 Document 2', 'metadata': {'media_id': 4}}, - ] - return [] - - mock_vector_search.side_effect = vector_search_side_effect - - # Input parameters - query = 'sample query' - relevant_media_ids = [2, 3] - - # Call the function - result = perform_vector_search(query, relevant_media_ids) - - # Expected to filter and include media_id 2 and 3 - expected = [ - {'content': 'Collection1 Document 2', 'metadata': {'media_id': 2}}, - {'content': 'Collection2 Document 1', 'metadata': {'media_id': 3}}, - ] - self.assertEqual(result, expected) - - # Assert chroma_client.list_collections was called once - mock_chroma_client.list_collections.assert_called_once() - - # Assert vector_search was called twice with correct arguments - mock_vector_search.assert_any_call('collection1', query, k=10) - mock_vector_search.assert_any_call('collection2', query, k=10) - self.assertEqual(mock_vector_search.call_count, 2) - - @patch('App_Function_Libraries.RAG.RAG_Library_2.search_db') - def test_perform_full_text_search_partial_matches(self, mock_search_db): - """ - Test perform_full_text_search where some media_ids do not match the relevant_media_ids. - """ - # Setup mock search_db to return search results - mock_search_db.return_value = [ - {'content': 'Full text document 1', 'id': 1}, - {'content': 'Full text document 2', 'id': 2}, - {'content': 'Full text document 3', 'id': 3}, - {'content': 'Full text document 4', 'id': 4}, - ] - - # Input parameters - query = 'full text query' - relevant_media_ids = [2, 4] - - # Call the function - result = perform_full_text_search(query, relevant_media_ids) - - # Expected to include only media_id 2 and 4 - expected = [ - {'content': 'Full text document 2', 'metadata': {'media_id': 2}}, - {'content': 'Full text document 4', 'metadata': {'media_id': 4}}, - ] - self.assertEqual(result, expected) - - # Assert search_db was called with correct arguments - mock_search_db.assert_called_once_with( - query, ['content'], '', page=1, results_per_page=10) + ) + + keywords = ['geography', 'cities'] + result = fetch_relevant_media_ids(keywords) + assert sorted(result) == [1, 2, 3, 4] + + mock_fetch_keywords_for_media.assert_any_call('geography') + mock_fetch_keywords_for_media.assert_any_call('cities') + assert mock_fetch_keywords_for_media.call_count == 2 + + +def test_perform_full_text_search_with_relevant_ids(mocker): + """Test perform_full_text_search with relevant_ids provided.""" + # Create a transformed response matching the expected format + transformed_response = [ + {'content': 'Full text document 1', 'metadata': {'media_id': 1}}, + {'content': 'Full text document 3', 'metadata': {'media_id': 3}}, + ] + + # Mock the search functions mapping + search_function_mock = lambda query, fts_top_k, relevant_ids: transformed_response + search_functions_mock = { + "Media DB": search_function_mock + } + mocker.patch('App_Function_Libraries.RAG.RAG_Library_2.search_functions', search_functions_mock) + + query = 'full text query' + database_type = "Media DB" + relevant_ids = "1,3" + + result = perform_full_text_search(query, database_type, relevant_ids) + + expected = [ + {'content': 'Full text document 1', 'metadata': {'media_id': 1}}, + {'content': 'Full text document 3', 'metadata': {'media_id': 3}}, + ] + assert result == expected + + +def test_perform_full_text_search_without_relevant_ids(mocker): + """Test perform_full_text_search without relevant_ids.""" + # Create a transformed response matching the expected format + transformed_response = [ + {'content': 'Full text document 1', 'metadata': {'media_id': 1}}, + {'content': 'Full text document 2', 'metadata': {'media_id': 2}}, + ] + + # Mock the search functions mapping + search_function_mock = lambda query, fts_top_k, relevant_ids: transformed_response + search_functions_mock = { + "Media DB": search_function_mock + } + mocker.patch('App_Function_Libraries.RAG.RAG_Library_2.search_functions', search_functions_mock) + + query = 'full text query' + database_type = "Media DB" + relevant_ids = "" + + result = perform_full_text_search(query, database_type, relevant_ids) + + expected = [ + {'content': 'Full text document 1', 'metadata': {'media_id': 1}}, + {'content': 'Full text document 2', 'metadata': {'media_id': 2}}, + ] + assert result == expected + + +@pytest.mark.parametrize("database_type,search_module_path,mock_response", [ + ( + "Media DB", + 'App_Function_Libraries.DB.SQLite_DB.search_media_db', + [{'content': 'Media DB document 1', 'metadata': {'media_id': '1'}}] + ), + ( + "RAG Chat", + 'App_Function_Libraries.DB.RAG_QA_Chat_DB.search_rag_chat', + [{'content': 'RAG Chat document 1', 'metadata': {'media_id': '1'}}] + ), + ( + "RAG Notes", + 'App_Function_Libraries.DB.RAG_QA_Chat_DB.search_rag_notes', + [{'content': 'RAG Notes document 1', 'metadata': {'media_id': '1'}}] + ), + ( + "Character Chat", + 'App_Function_Libraries.DB.Character_Chat_DB.search_character_chat', + [{'content': 'Character Chat document 1', 'metadata': {'media_id': '1'}}] + ), + ( + "Character Cards", + 'App_Function_Libraries.DB.Character_Chat_DB.search_character_cards', + [{'content': 'Character Cards document 1', 'metadata': {'media_id': '1'}}] + ) +]) +def test_perform_full_text_search_different_db_types(mocker, database_type, search_module_path, mock_response): + """Test perform_full_text_search with different database types.""" + # Mock the search functions mapping with already transformed response + search_functions_mock = { + database_type: lambda query, fts_top_k, relevant_ids: mock_response + } + mocker.patch('App_Function_Libraries.RAG.RAG_Library_2.search_functions', search_functions_mock) + + query = 'test query' + relevant_ids = "1" + + result = perform_full_text_search(query, database_type, relevant_ids) + assert result == mock_response + + +def test_enhanced_rag_pipeline_success(mocker): + """Test enhanced_rag_pipeline with a successful flow.""" + # Mock config + mock_config = configparser.ConfigParser() + mock_config['Embeddings'] = {'provider': 'openai'} + mocker.patch('App_Function_Libraries.RAG.RAG_Library_2.config', mock_config) + + # Mock search functions + fts_result = [{'content': 'FTS result', 'id': 1}] + vector_result = [{'content': 'Vector result'}] + + mock_search = lambda *args, **kwargs: fts_result + search_functions_mock = { + "Media DB": mock_search + } + mocker.patch('App_Function_Libraries.RAG.RAG_Library_2.search_functions', search_functions_mock) + + mocker.patch( + 'App_Function_Libraries.RAG.RAG_Library_2.perform_vector_search', + return_value=vector_result + ) + + mocker.patch( + 'App_Function_Libraries.RAG.RAG_Library_2.generate_answer', + return_value='Generated answer' + ) + + # Mock relevant media IDs + mocker.patch( + 'App_Function_Libraries.RAG.RAG_Library_2.fetch_relevant_media_ids', + return_value=[1, 2, 3] + ) + + result = enhanced_rag_pipeline( + query='test query', + api_choice='OpenAI', + keywords='keyword1,keyword2', + database_types=["Media DB"] + ) + + # Check both vector and FTS results are in context + assert result['answer'] == 'Generated answer' + assert 'Vector result' in result['context'] + assert 'FTS result' in result['context'] + + +def test_enhanced_rag_pipeline_error_handling(mocker): + """Test enhanced_rag_pipeline error handling.""" + mock_config = configparser.ConfigParser() + mock_config['Embeddings'] = {'provider': 'openai'} + mocker.patch('App_Function_Libraries.RAG.RAG_Library_2.config', mock_config) + + mock_fetch_keywords_for_media = mocker.patch( + 'App_Function_Libraries.RAG.RAG_Library_2.fetch_relevant_media_ids', + side_effect=Exception("Fetch error") + ) + + result = enhanced_rag_pipeline( + query='test query', + api_choice='OpenAI', + keywords='keyword1', + database_types=["Media DB"] + ) + + assert "An error occurred" in result['answer'] + assert result['context'] == "" + + +def test_generate_answer_success(mocker): + """Test generate_answer with successful API call.""" + # Mock config + mock_config = configparser.ConfigParser() + mock_config['API'] = {'openai_api_key': 'test_key'} + mocker.patch( + 'App_Function_Libraries.RAG.RAG_Library_2.load_comprehensive_config', + return_value=mock_config + ) + + # Mock the summarization function + mock_summarize = mocker.patch( + 'App_Function_Libraries.Summarization.Summarization_General_Lib.summarize_with_openai', + return_value='API response' + ) + + result = generate_answer('OpenAI', 'Test context', 'Test query') + assert result == 'API response' if __name__ == '__main__': - unittest.main() \ No newline at end of file + pytest.main(['-v']) \ No newline at end of file