From c4240f0f075989bbf2148f724a657dd71b37e1e2 Mon Sep 17 00:00:00 2001 From: Swastika Dutta <166885959+sduttanv@users.noreply.github.com> Date: Mon, 9 Sep 2024 12:34:38 -0700 Subject: [PATCH] Add support NeMo Retriever Text Reranking NIM in O-RAN chatbot (#187) * Add support for NeMo Retriever Text Reranking NIM in oran chatbot * Add default reranker and NIM reranker configurations for oran chatbot --- .../Multimodal_Assistant.py | 62 ++++++++++++------- community/oran-chatbot-multimodal/config.yaml | 6 +- 2 files changed, 43 insertions(+), 25 deletions(-) diff --git a/community/oran-chatbot-multimodal/Multimodal_Assistant.py b/community/oran-chatbot-multimodal/Multimodal_Assistant.py index 86ab51e6..6cc26923 100644 --- a/community/oran-chatbot-multimodal/Multimodal_Assistant.py +++ b/community/oran-chatbot-multimodal/Multimodal_Assistant.py @@ -35,7 +35,7 @@ from retriever.retriever import Retriever, get_relevant_docs, get_relevant_docs_mq from utils.feedback import feedback_kwargs -from langchain_nvidia_ai_endpoints import ChatNVIDIA +from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIARerank from langchain_core.messages import HumanMessage from langchain_core.output_parsers import StrOutputParser from langchain_core.prompts import ChatPromptTemplate @@ -400,52 +400,66 @@ def load_config(cfg_arg): if rag_type == 1: augmented_queries = augment_multiple_query(transformed_query["text"]) queries = [transformed_query["text"]] + augmented_queries[2:] - print("Queries are = ", queries) + # print("Queries are = ", queries) retrieved_documents = [] retrieved_metadatas = [] + relevant_docs = [] for query in queries: ret_docs,cons,srcs = get_relevant_docs(CORE_DIR, query) for doc in ret_docs: retrieved_documents.append(doc.page_content) retrieved_metadatas.append(doc.metadata['source']) + relevant_docs.append(doc) print("length of retrieved docs: ", len(retrieved_documents)) #Remove all duplicated documents and retain the original metadata unique_documents = [] unique_documents_metadata = [] - for document,source in zip(retrieved_documents,retrieved_metadatas): + unique_relevant_documents = [] + for idx, (document,source) in enumerate(zip(retrieved_documents,retrieved_metadatas)): if document not in unique_documents: unique_documents.append(document) unique_documents_metadata.append(source) + unique_relevant_documents.append(relevant_docs[idx]) if len(retrieved_documents) == 0: context = "" print("not context found context") else: print("length of unique docs: ", len(unique_documents)) - #Instantiate the cross-encoder model and get scores for each retrieved document - cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2') # ('BAAI/bge-reranker-large')('cross-encoder/ms-marco-MiniLM-L-6-v2') - pairs = [[prompt, doc] for doc in unique_documents] - scores = cross_encoder.predict(pairs) - #Sort the scores from highest to least - order_ids = np.argsort(scores)[::-1] - # print(order_ids) + #Instantiate the re-ranker model and get scores for each retrieved document new_updated_documents = [] new_updated_sources = [] - #Get the top 6 scores - if len(order_ids)>=10: - for i in range(10): - new_updated_documents.append(unique_documents[order_ids[i]]) - new_updated_sources.append(unique_documents_metadata[order_ids[i]]) + if not config_yaml['Reranker_NIM']: + print("\n\nReranking with Cross-encoder model: ", config_yaml['reranker_model']) + cross_encoder = CrossEncoder(config_yaml['reranker_model']) + pairs = [[prompt, doc] for doc in unique_documents] + scores = cross_encoder.predict(pairs) + #Sort the scores from highest to least + order_ids = np.argsort(scores)[::-1] + #Get the top 10 scores + if len(order_ids)>=10: + for i in range(10): + new_updated_documents.append(unique_documents[order_ids[i]]) + new_updated_sources.append(unique_documents_metadata[order_ids[i]]) + else: + for i in range(len(order_ids)): + new_updated_documents.append(unique_documents[order_ids[i]]) + new_updated_sources.append(unique_documents_metadata[order_ids[i]]) else: - for i in range(len(order_ids)): - new_updated_documents.append(unique_documents[order_ids[i]]) - new_updated_sources.append(unique_documents_metadata[order_ids[i]]) + print("\n\nReranking with Retriever Text Reranking NIM model: ", config_yaml["reranker_model_name"]) + # Initialize and connect to the running NeMo Retriever Text Reranking NIM + reranker = NVIDIARerank(model=config_yaml["reranker_model_name"], + base_url=config_yaml["reranker_api_endpoint_url"], top_n=10) + reranked_chunks = reranker.compress_documents(query=transformed_query["text"], documents=unique_relevant_documents) + for chunks in reranked_chunks: + metadata = chunks.metadata + page_content = chunks.page_content + new_updated_documents.append(page_content) + new_updated_sources.append(metadata['source']) - print(new_updated_sources) - print(len(new_updated_documents)) + print("Reranking of completed for ", len(new_updated_documents), " chunks") context = "" - # sources = "" sources = {} for doc in new_updated_documents: context += doc + "\n\n" @@ -455,7 +469,7 @@ def load_config(cfg_arg): sources[src] = {"doc_content": sources[src]["doc_content"]+"\n\n"+new_updated_documents[i], "doc_metadata": src} else: sources[src] = {"doc_content": new_updated_documents[i], "doc_metadata": src} - print("length of source docs: ", len(sources)) + print("Length of unique source docs: ", len(sources)) #Send the top 10 results along with the query to LLM if rag_type == 2: @@ -486,7 +500,7 @@ def load_config(cfg_arg): print("length of unique docs: ", len(unique_documents)) #Instantiate the cross-encoder model and get scores for each retrieved document - cross_encoder = CrossEncoder('BAAI/bge-reranker-large') #('cross-encoder/ms-marco-MiniLM-L-6-v2') + cross_encoder = CrossEncoder(config_yaml['reranker_model']) pairs = [[prompt, doc] for doc in unique_documents] scores = cross_encoder.predict(pairs) #Sort the scores from highest to least @@ -544,7 +558,7 @@ def load_config(cfg_arg): print("length of unique docs: ", len(unique_documents)) #Instantiate the cross-encoder model and get scores for each retrieved document - cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2') #('BAAI/bge-reranker-large') + cross_encoder = CrossEncoder(config_yaml['reranker_model']) pairs = [[prompt, doc] for doc in unique_documents] scores = cross_encoder.predict(pairs) #Sort the scores from highest to least diff --git a/community/oran-chatbot-multimodal/config.yaml b/community/oran-chatbot-multimodal/config.yaml index 8c181869..08fcb163 100644 --- a/community/oran-chatbot-multimodal/config.yaml +++ b/community/oran-chatbot-multimodal/config.yaml @@ -2,8 +2,8 @@ nvidia_api_key: "nvapi--***" ## Set these to required models endpoints from NVIDIA NGC llm_model: "mistralai/mixtral-8x7b-instruct-v0.1" -# Augmentation_model: embedding_model: "nvidia/nv-embedqa-e5-v5" +reranker_model: "cross-encoder/ms-marco-MiniLM-L-6-v2" NIM: false nim_model_name: "meta/llama3-8b-instruct" @@ -17,4 +17,8 @@ nrem_model_name: "nvidia/nv-embedqa-e5-v5" nrem_api_endpoint_url: "http://localhost:8001/v1" nrem_truncate: "END" +Reranker_NIM: false +reranker_model_name: "nvidia/nv-rerankqa-mistral-4b-v3" +reranker_api_endpoint_url: "http://localhost:8000/v1" + file_delete_password: "oranpwd"