diff --git a/intel_extension_for_transformers/neural_chat/prompts/prompt.py b/intel_extension_for_transformers/neural_chat/prompts/prompt.py
index 6a1da02af39..d94d72f59da 100644
--- a/intel_extension_for_transformers/neural_chat/prompts/prompt.py
+++ b/intel_extension_for_transformers/neural_chat/prompts/prompt.py
@@ -321,3 +321,39 @@ def generate_sqlcoder_prompt(qurey, metadata_file):
         qurey=qurey, table_metadata_string=table_metadata_string
     )
     return prompt
+
+QUERYGENERATE_PROMPT = """
+Task: You are asked to act as a human annotator. Your role is to generate 2 specific, open-ended questions based on the provided context.
+Each question should aim to extract or clarify key information from the context, focusing on a single aspect or detail.
+The questions must be directly related to the context to form a query-positive pair, suitable for use in constructing a retrieval dataset.
+---
+Requirements:
+1. Questions should be based on the keywords, such as phrases at the beginning, phrases before colon, and recurring phrases in the context.
+2. Use the terms in the context instead of pronouns.
+---
+Desired format:
+1. <question_1>
+2. <question_2>
+---
+### Context:
+{context}
+---
+Generated questions:
+"""
+
+TRUTHGENERATE_PROMPT = """
+Task: You are asked to act as a human annotator. Your role is to generate the right answer based on the context and question provided.
+Answers should aim to extract or clarify the key information of the question from the context, focusing on a single aspect or detail.
+The answer must be directly related to the context and the question, suitable for use in constructing a synthetic retrieval evaluation dataset.
+---
+Desired format:
+1. <ground_truth>
+---
+### Question:
+{question}
+---
+### Context:
+{context}
+---
+Generated ground_truth:
+"""
diff --git a/intel_extension_for_transformers/neural_chat/tests/ci/tools/test_evaluation.py b/intel_extension_for_transformers/neural_chat/tests/ci/tools/test_evaluation.py
new file mode 100644
index 00000000000..f223e0f5fc5
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tests/ci/tools/test_evaluation.py
@@ -0,0 +1,116 @@
+# !/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest, os, shutil
+from unittest.mock import patch
+from intel_extension_for_transformers.neural_chat.tools.evaluation.data_augmentation import retrieval_dataset_construction, llm_generate_truth
+from intel_extension_for_transformers.neural_chat.tools.evaluation.retriever import evaluate_retrieval
+from intel_extension_for_transformers.neural_chat.tools.evaluation.framework import ragas_evaluation
+
+class TestEvaluation(unittest.TestCase):
+    def setUp(self) -> None:
+        if os.path.exists("data"):
+            shutil.rmtree("data", ignore_errors=True)
+        if os.path.exists("ground_truth.jsonl"):
+            os.remove("ground_truth.jsonl")
+        if os.path.exists("output"):
+            shutil.rmtree("output", ignore_errors=True)
+        return super().setUp()
+
+    def tearDown(self) -> None:
+        if os.path.exists("data"):
+            shutil.rmtree("data", ignore_errors=True)
+        if os.path.exists("ground_truth.jsonl"):
+            os.remove("ground_truth.jsonl")
+        if os.path.exists("output"):
+            shutil.rmtree("output", ignore_errors=True)
+        return super().tearDown()
+
+    def test_retrieval_dataset_construction(self):
+        path = \
+          "/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/assets/docs/retrieve_multi_doc/"
+        if os.path.exists(path):
+            input_path=path
+        else:
+            input_path='../assets/docs/retrieve_multi_doc/'
+        argv = ['--llm_model', '/tf_dataset2/models/nlp_toolkit/neural-chat-7b-v3-1', \
+                '--embedding_model', '/tf_dataset2/inc-ut/gte-base', \
+                '--input', input_path, \
+                '--output', './data', \
+                '--range_for_sampling', '2-2', \
+                '--negative_number', '1']
+        with patch('sys.argv', ['python retrieval_dataset_construction.py'] + argv):
+            retrieval_dataset_construction.main()
+            self.assertTrue(os.path.exists("./data/minedHN_split.jsonl"))
+
+    def test_llm_generate_truth(self):
+        path = \
+          "/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl"
+        if os.path.exists(path):
+            input_path=path
+        else:
+            input_path='../tools/evaluation/data_augmentation/example.jsonl'
+        argv = ['--llm_model', '/tf_dataset2/models/nlp_toolkit/neural-chat-7b-v3-1', \
+                '--input', input_path, \
+                '--output', 'ground_truth.jsonl']
+        with patch('sys.argv', ['python llm_generate_truth.py'] + argv):
+            llm_generate_truth.main()
+            self.assertTrue(os.path.exists("ground_truth.jsonl"))
+
+    def test_evaluate_retrieval(self):
+        path1 = \
+          "/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/candidate_context.jsonl"
+        if os.path.exists(path1):
+            index_file_jsonl_path=path1
+        else:
+            index_file_jsonl_path='../tools/evaluation/data_augmentation/candidate_context.jsonl'
+        path2 = \
+          "/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl"
+        if os.path.exists(path2):
+            query_file_jsonl_path=path2
+        else:
+            query_file_jsonl_path='../tools/evaluation/data_augmentation/example.jsonl'
+        argv = ['--index_file_jsonl_path', index_file_jsonl_path, \
+                '--query_file_jsonl_path', query_file_jsonl_path, \
+                '--embedding_model', '/tf_dataset2/inc-ut/gte-base']
+        with patch('sys.argv', ['python evaluate_retrieval.py'] + argv):
+            result = evaluate_retrieval.main()
+            self.assertIsNotNone(result)
+
+    def test_ragas_evaluation(self):
+        path1 = \
+          "/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/answer.jsonl"
+        if os.path.exists(path1):
+            answer_file_path=path1
+        else:
+            answer_file_path='../tools/evaluation/data_augmentation/answer.jsonl'
+        path2 = \
+          "/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl"
+        if os.path.exists(path2):
+            ground_truth_file_path=path2
+        else:
+            ground_truth_file_path='../tools/evaluation/data_augmentation/ground_truth.jsonl'
+        argv = ['--answer_file', answer_file_path, \
+                '--ground_truth_file', ground_truth_file_path, \
+                '--llm_model', '/tf_dataset2/models/nlp_toolkit/neural-chat-7b-v3-1', \
+                '--embedding_model', '/tf_dataset2/inc-ut/gte-base']
+        with patch('sys.argv', ['python ragas_evaluation.py'] + argv):
+            result = ragas_evaluation.main()
+            self.assertIsNotNone(result)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py b/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py
new file mode 100644
index 00000000000..f223e0f5fc5
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py
@@ -0,0 +1,116 @@
+# !/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest, os, shutil
+from unittest.mock import patch
+from intel_extension_for_transformers.neural_chat.tools.evaluation.data_augmentation import retrieval_dataset_construction, llm_generate_truth
+from intel_extension_for_transformers.neural_chat.tools.evaluation.retriever import evaluate_retrieval
+from intel_extension_for_transformers.neural_chat.tools.evaluation.framework import ragas_evaluation
+
+class TestEvaluation(unittest.TestCase):
+    def setUp(self) -> None:
+        if os.path.exists("data"):
+            shutil.rmtree("data", ignore_errors=True)
+        if os.path.exists("ground_truth.jsonl"):
+            os.remove("ground_truth.jsonl")
+        if os.path.exists("output"):
+            shutil.rmtree("output", ignore_errors=True)
+        return super().setUp()
+
+    def tearDown(self) -> None:
+        if os.path.exists("data"):
+            shutil.rmtree("data", ignore_errors=True)
+        if os.path.exists("ground_truth.jsonl"):
+            os.remove("ground_truth.jsonl")
+        if os.path.exists("output"):
+            shutil.rmtree("output", ignore_errors=True)
+        return super().tearDown()
+
+    def test_retrieval_dataset_construction(self):
+        path = \
+          "/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/assets/docs/retrieve_multi_doc/"
+        if os.path.exists(path):
+            input_path=path
+        else:
+            input_path='../assets/docs/retrieve_multi_doc/'
+        argv = ['--llm_model', '/tf_dataset2/models/nlp_toolkit/neural-chat-7b-v3-1', \
+                '--embedding_model', '/tf_dataset2/inc-ut/gte-base', \
+                '--input', input_path, \
+                '--output', './data', \
+                '--range_for_sampling', '2-2', \
+                '--negative_number', '1']
+        with patch('sys.argv', ['python retrieval_dataset_construction.py'] + argv):
+            retrieval_dataset_construction.main()
+            self.assertTrue(os.path.exists("./data/minedHN_split.jsonl"))
+
+    def test_llm_generate_truth(self):
+        path = \
+          "/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl"
+        if os.path.exists(path):
+            input_path=path
+        else:
+            input_path='../tools/evaluation/data_augmentation/example.jsonl'
+        argv = ['--llm_model', '/tf_dataset2/models/nlp_toolkit/neural-chat-7b-v3-1', \
+                '--input', input_path, \
+                '--output', 'ground_truth.jsonl']
+        with patch('sys.argv', ['python llm_generate_truth.py'] + argv):
+            llm_generate_truth.main()
+            self.assertTrue(os.path.exists("ground_truth.jsonl"))
+
+    def test_evaluate_retrieval(self):
+        path1 = \
+          "/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/candidate_context.jsonl"
+        if os.path.exists(path1):
+            index_file_jsonl_path=path1
+        else:
+            index_file_jsonl_path='../tools/evaluation/data_augmentation/candidate_context.jsonl'
+        path2 = \
+          "/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl"
+        if os.path.exists(path2):
+            query_file_jsonl_path=path2
+        else:
+            query_file_jsonl_path='../tools/evaluation/data_augmentation/example.jsonl'
+        argv = ['--index_file_jsonl_path', index_file_jsonl_path, \
+                '--query_file_jsonl_path', query_file_jsonl_path, \
+                '--embedding_model', '/tf_dataset2/inc-ut/gte-base']
+        with patch('sys.argv', ['python evaluate_retrieval.py'] + argv):
+            result = evaluate_retrieval.main()
+            self.assertIsNotNone(result)
+
+    def test_ragas_evaluation(self):
+        path1 = \
+          "/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/answer.jsonl"
+        if os.path.exists(path1):
+            answer_file_path=path1
+        else:
+            answer_file_path='../tools/evaluation/data_augmentation/answer.jsonl'
+        path2 = \
+          "/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl"
+        if os.path.exists(path2):
+            ground_truth_file_path=path2
+        else:
+            ground_truth_file_path='../tools/evaluation/data_augmentation/ground_truth.jsonl'
+        argv = ['--answer_file', answer_file_path, \
+                '--ground_truth_file', ground_truth_file_path, \
+                '--llm_model', '/tf_dataset2/models/nlp_toolkit/neural-chat-7b-v3-1', \
+                '--embedding_model', '/tf_dataset2/inc-ut/gte-base']
+        with patch('sys.argv', ['python ragas_evaluation.py'] + argv):
+            result = ragas_evaluation.main()
+            self.assertIsNotNone(result)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/intel_extension_for_transformers/neural_chat/tests/requirements.txt b/intel_extension_for_transformers/neural_chat/tests/requirements.txt
index a1d17030e6d..60d26bc9beb 100644
--- a/intel_extension_for_transformers/neural_chat/tests/requirements.txt
+++ b/intel_extension_for_transformers/neural_chat/tests/requirements.txt
@@ -40,6 +40,7 @@ langid
 librosa
 lm-eval
 markdown
+modelscope
 neural-compressor
 neural_speed==1.0a0
 num2words
@@ -64,6 +65,7 @@ python-docx
 python-multipart
 pyyaml
 qdrant-client==1.8.2
+ragas==0.1.7
 rank_bm25
 resampy==0.3.1
 rouge_score
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
new file mode 100644
index 00000000000..84f931f8458
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
@@ -0,0 +1,444 @@
+# Retrieval and Rag Benchmark
+
+## 1. Introduction
+We provide scripts of the benchmark of Retrieval and Rag. For data augmentation, please go to [Retrieval Data Augmentation](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation).
+
+## 2. Retrieval Benchmark
+### Installation
+Please ensure the installation of requirements for NeuralChat and retrieval plugin by the following commands.
+```
+git clone https://github.com/intel/intel-extension-for-transformers.git
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat
+pip install -r requirements.txt
+cd pipeline/plugins/retrieval
+pip install -r requirements.txt
+```
+
+### Benchmark
+You can run retrieval benchmark by the following commands.
+```
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever
+bash retrieval_benchmark.sh \
+--index_file_jsonl_path=/path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/candidate_context.jsonl \
+--query_file_jsonl_path=/path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl
+--vector_database=Chroma \
+--embedding_model=<embedding model name or path> \
+--llm_model=<llm model name or path> \
+--reranker_model=<reranker model name or path>
+```
+**Some Important Arguments**:
+- `index_file_jsonl_path`: path of JSON data including candidate context where each line is a dict like this:```{"context": List[str]}```. See [candidate_context.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/candidate_context.jsonl) for a data file.
+- `query_file_jsonl_path`: path of JSON data including queries and positives where each line is a dict like this:```{"query": str, "pos": List[str]}```. See [example.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl) for a data file.
+- `vector_database`: The vector database for constructing the knowledge base.
+- `embedding_model`: The name or path for the text embedding model. The default value is "BAAI/bge-base-en-v1.5". Other options are "BAAI/bge-large-en-v1.5", "thenlper/gte-large", "infgrad/stella-base-en-v2", "thenlper/gte-base", "intfloat/e5-large-v2", "hkunlp/instructor-xl", and "hkunlp/instructor-large".
+- `llm_model`: The name or path for the LLM model.
+- `reranker_model`: The name or path for the reranker model.
+- `retrieval_type`: The type of the retriever. The default value is "default". The other options are "child_parent" and "bm25".
+- `polish`: Whether to polish the input query before processing. The default value is False.
+- `search_type`: Type of search to perform. The default value is "similarity". The other options are "mmr" and "similarity_score_threshold".
+- `k`: The number of the returned most similar documents. The default value is 1.
+- `fetch_k`: The number of Documents to fetch to pass to MMR algorithm. The default value is 5.
+- `score_threshold`: The similar score threshold for the retrieved documents. The default value is 0.3.
+- `top_n`: The return number of the reranker model. The default value is 1.
+- `enable_rerank`: Whether to enable retrieval then rerank pipeline. The default value is False.
+
+**Result**:
+The result will include all parameter values and MRR (Mean reciprocal rank) and Hit (Hit Ratio) values.
+|  Parameter & Result  | Value  |
+|  :----:  | :----:  |
+| 'index_file_jsonl_path'  | '/path/to/candidate_context.jsonl' |
+| 'query_file_jsonl_path'  | '/path/to/example.jsonl' |
+| 'vector_database'  | 'Chroma'|
+| 'embedding_model' | '/path/to/bge-large-en-v1.5' |
+| 'retrieval_type' | 'default' |
+| 'polish' | False |
+| 'search_type' | 'similarity' |
+| 'llm_model' | '/path/to/neural-chat-7b-v3-1/' |
+| 'k' | 1 |
+| 'fetch_k' | 5 |
+| 'score_threshold' | 0.3 |
+| 'reranker_model' | '/path/to/bge-reranker-large' |
+| 'top_n' | 1 |
+| 'enable_rerank' | False |
+| 'MRR' | 0.8 |
+| 'Hit' | 0.8 |
+
+### SuperBenchmark
+You can run retrieval superbenchmark by the following commands.
+```
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever
+python retrieval_superbenchmark.py \
+--index_file_jsonl_path /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/candidate_context.jsonl \
+--query_file_jsonl_path /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl \
+--vector_database Chroma \
+--embedding_model <embedding model name or path> \
+--llm_model <llm model name or path> \
+--reranker_model <reranker model name or path>
+```
+
+This will run benchmark multiple times based on the following different parameter values and output the parameter values that achieve the maximum MRR and Hit.
+
+**Adjustable Parameters**:
+- `retrieval_type`: ['default','child_parent','bm25']
+- `polish`: [True, False]
+- `search_type`: ['similarity','mmr','similarity_score_threshold']
+- `k`: [1, 3, 5]
+- `fetch_k`: [5, 10, 20]
+- `score_threshold`: [0.3, 0.5, 0.7]
+- `top_n`: [1, 3, 5, 10]
+- `enable_rerank`: [True, False]
+
+**Result**:
+***max_MRR***:
+|  Parameter & Result  | Value  |
+|  :----:  | :----:  |
+| 'index_file_jsonl_path'  | '/path/to/candidate_context.jsonl' |
+| 'query_file_jsonl_path'  | '/path/to/example.jsonl' |
+| 'vector_database'  | 'Chroma'|
+| 'embedding_model' | '/path/to/bge-large-en-v1.5' |
+| 'retrieval_type' | 'default' |
+| 'polish' | True |
+| 'search_type' | 'similarity' |
+| 'llm_model' | '/path/to/neural-chat-7b-v3-1/' |
+| 'k' | 1 |
+| 'fetch_k' | 5 |
+| 'score_threshold' | 0.3 |
+| 'reranker_model' | '/path/to/bge-reranker-large' |
+| 'top_n' | 1 |
+| 'enable_rerank' | True |
+| 'MRR' | 0.7 |
+| 'Hit' | 0.7 |
+
+...
+
+***max_Hit***:
+|  Parameter & Result  | Value  |
+|  :----:  | :----:  |
+| 'index_file_jsonl_path'  | '/path/to/candidate_context.jsonl' |
+| 'query_file_jsonl_path'  | '/path/to/example.jsonl' |
+| 'vector_database'  | 'Chroma'|
+| 'embedding_model' | '/path/to/bge-large-en-v1.5' |
+| 'retrieval_type' | 'default' |
+| 'polish' | True |
+| 'search_type' | 'similarity' |
+| 'llm_model' | '/path/to/neural-chat-7b-v3-1/' |
+| 'k' | 1 |
+| 'fetch_k' | 20 |
+| 'score_threshold' | 0.3 |
+| 'reranker_model' | '/path/to/bge-reranker-large' |
+| 'top_n' | 3 |
+| 'enable_rerank' | True |
+| 'MRR' | 0.7 |
+| 'Hit' | 0.7 |
+
+...
+
+### Config SuperBenchmark
+You can also run retrieval superbenchmark by the following commands.
+```
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever
+python ragas_config_benchmark.py --config_path = /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/config.yaml
+```
+You can set the parameters in config.yaml.
+``` yaml
+index_file_jsonl_path: path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/candidate_context.jsonl
+query_file_jsonl_path: path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl
+vector_database: Chroma
+embedding_model: <embedding model name or path>
+llm_model: <llm model name or path>
+reranker_model: <reranker model name or path>
+retrieval_type: ['default','child_parent','bm25']
+polish: [True, False]
+search_type: ['similarity','mmr','similarity_score_threshold']
+k: [1, 3, 5]
+fetch_k: [5, 10, 20]
+score_threshold: [0.3, 0.5, 0.7]
+top_n: [1, 3, 5, 10]
+enable_rerank: [True, False]
+```
+
+## 3. Rag Benchmark
+### Installation
+Please ensure the installation of requirements for NeuralChat and retrieval plugin first by the following commands.
+```
+git clone https://github.com/intel/intel-extension-for-transformers.git
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat
+pip install -r requirements.txt
+cd pipeline/plugins/retrieval
+pip install -r requirements.txt
+```
+After that, please install dependency using the following commands.
+```
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/framework
+pip install -r requirements.txt
+```
+
+### Benchmark
+You can run rag benchmark by the following commands.
+```
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/framework
+bash ragas_benchmark.sh \
+--ground_truth_file=/path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl \
+--input_path=/path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/data.txt \
+--vector_database=Chroma \
+--embedding_model=<embedding model name or path> \
+--llm_model=<llm model name or path> \
+--reranker_model=<reranker model name or path>
+```
+
+**Some Important Arguments**:
+- `ground_truth_file`: The path of JSON data including question, context, and ground_truth, where each line is a dict like this:```{"question": str, "context": List[str], "ground_truth": str}```. See [ground_truth.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl) for a data file. The `"question"` of `answer_file` and `ground_truth_file` should correspond one-to-one.
+- `input_path`: The path of the file/folder/link of the content.
+- `use_openai_key`: Whether to utilize OpenAI for running ragas to compute the score. If you’re using openai, ensure you have your OpenAI key ready and available in your environment by `export OPENAI_API_KEY=xxx`. The default value is False.
+- `vector_database`: The vector database for constructing the knowledge base.
+- `embedding_model`: The name or path for the text embedding model. The default value is "BAAI/bge-base-en-v1.5". Other options are "BAAI/bge-large-en-v1.5", "thenlper/gte-large", "infgrad/stella-base-en-v2", "thenlper/gte-base", "intfloat/e5-large-v2", "hkunlp/instructor-xl", and "hkunlp/instructor-large".
+- `llm_model`: The name or path for the LLM model.
+- `reranker_model`: The name or path for the reranker model.
+- `retrieval_type`: The type of the retriever. The default value is "default". The other options are "child_parent" and "bm25".
+- `polish`: Whether to polish the input query before processing. The default value is False.
+- `search_type`: Type of search to perform. The default value is "similarity". The other options are "mmr" and "similarity_score_threshold".
+- `k`: The number of the returned most similar documents. The default value is 1.
+- `fetch_k`: The number of Documents to fetch to pass to MMR algorithm. The default value is 5.
+- `score_threshold`: The similar score threshold for the retrieved documents. The default value is 0.3.
+- `top_n`: The return number of the reranker model. The default value is 1.
+- `enable_rerank`: Whether to enable retrieval then rerank pipeline. The default value is False.
+- `max_chuck_size`: The max token length for a single chuck in the knowledge base. The default value is 256.
+- `temperature`: The value is used to modulate the next token probabilities, and will influence the distribution of similarity scores. The default value is 0.01.
+- `top_k`: The number of highest probability vocabulary tokens to keep for top-k-filtering. The default value is 1.
+- `top_p`: If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. The default value is 0.1.
+- `repetition_penalty`: The parameter for repetition penalty. 1.0 means no penalty. The default value is 1.0.
+- `num_beams`: Number of beams for beam search. 1 means no beam search. The default value is 1.
+- `do_sample`: Whether or not to use sampling; use greedy decoding otherwise. The default value is False.
+
+**Result**:
+The result will include all parameter values and values of Average Answer Relevancy, Average Faithfulness, Average Context Recall, Average Context Precision.
+|  Parameter & Result  | Value  |
+|  :----:  | :----:  |
+| "ground_truth_file"  | "ground_truth.jsonl" |
+| "input_path" | "data.txt" |
+| "vector_database"  | "Chroma" |
+| "embedding_model" | "/path/to/bge-large-en-v1.5" |
+| "retrieval_type" | "default" |
+| "polish" | True |
+| "search_type" | "similarity" |
+| "llm_model" | "/path/to/neural-chat-7b-v3-1/" |
+| "k" | 1 |
+| "fetch_k" | 5 |
+| "score_threshold" | 0.3 |
+| "reranker_model" | "/path/to/bge-reranker-large" |
+| "top_n" | 1 |
+| "enable_rerank" | True |
+| "max_chuck_size" | 256 |
+| "temperature" | 0.01 |
+| "top_k" | 1 |
+| "top_p" | 0.1 |
+| "repetition_penalty" | 1.0 |
+| "num_beams" | 1 |
+| "do_sample" | True |
+| "answer_relevancy_average" | 0.937748267362332 |
+| "faithfulness_average" | 0.5833333333333333 |
+| "context_recall_average" | 1.0 |
+| "context_precision_average" | 0.49999999995 |
+
+### SuperBenchmark
+You can run rag superbenchmark by the following commands.
+```
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/framework
+python ragas_benchmark.py \
+--ground_truth_file /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl \
+--input_path /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/data.txt \
+--vector_database Chroma \
+--embedding_model <embedding model name or path> \
+--llm_model <llm model name or path> \
+--reranker_model <reranker model name or path>
+```
+
+If you utilize OpenAI for running ragas, ensure you have your OpenAI key ready and available in your environment. This will make multiple calls to the OpenAI API, please be aware of your costs.
+```
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/framework
+export OPENAI_API_KEY=xxx
+python ragas_benchmark.py \
+--ground_truth_file /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl \
+--input_path /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/data.txt \
+--use_openai_key \
+--vector_database Chroma \
+--embedding_model <embedding model name or path> \
+--llm_model <llm model name or path> \
+--reranker_model <reranker model name or path>
+```
+
+This will run benchmark multiple times based on the following different parameter values and output the parameter values that achieve the maximum Average Answer Relevancy, Average Faithfulness, Average Context Recall, Average Context Precision.
+
+**Adjustable Parameters**:
+- `retrieval_type`: ['default','child_parent','bm25']
+- `polish`: [True, False]
+- `search_type`: ['similarity','mmr','similarity_score_threshold']
+- `k`: [1, 3, 5]
+- `fetch_k`: [5, 10, 20]
+- `score_threshold`: [0.3, 0.5, 0.7]
+- `top_n`: [1, 3, 5, 10]
+- `enable_rerank`: [True, False]
+- `max_chuck_size`: [256, 512, 768, 1024]
+- `temperature`: [0.01, 0.05, 0.1, 0.3, 0.5, 0.7]
+- `top_k`: [1, 3, 10, 20]
+- `top_p`: [0.1, 0.3, 0.5, 0.7]
+- `repetition_penalty`: [1.0, 1.1, 1.3, 1.5, 1.7]
+- `num_beams`: [1, 3, 10, 20]
+- `do_sample`: [True, False]
+
+**Result**:
+***max_answer_relevancy_average***:
+|  Parameter & Result  | Value  |
+|  :----:  | :----:  |
+| "ground_truth_file"  | "ground_truth.jsonl" |
+| "input_path" | "data.txt" |
+| "vector_database"  | "Chroma" |
+| "embedding_model" | "/path/to/bge-large-en-v1.5" |
+| "retrieval_type" | "default" |
+| "polish" | True |
+| "search_type" | "similarity" |
+| "llm_model" | "/path/to/neural-chat-7b-v3-1/" |
+| "k" | 1 |
+| "fetch_k" | 5 |
+| "score_threshold" | 0.3 |
+| "reranker_model" | "/path/to/bge-reranker-large" |
+| "top_n" | 1 |
+| "enable_rerank" | True |
+| "max_chuck_size" | 256 |
+| "temperature" | 0.01 |
+| "top_k" | 1 |
+| "top_p" | 0.1 |
+| "repetition_penalty" | 1.0 |
+| "num_beams" | 20 |
+| "do_sample" | True |
+| "answer_relevancy_average" | 0.9533325665270252 |
+| "faithfulness_average" | 0.5083333333333333 |
+| "context_recall_average" | 1.0 |
+| "context_precision_average" | 0.49999999995 |
+
+...
+
+***max_faithfulness_average***:
+|  Parameter & Result  | Value  |
+|  :----:  | :----:  |
+| "ground_truth_file"  | "ground_truth.jsonl" |
+| "input_path" | "data.txt" |
+| "vector_database"  | "Chroma" |
+| "embedding_model" | "/path/to/bge-large-en-v1.5" |
+| "retrieval_type" | "default" |
+| "polish" | True |
+| "search_type" | "similarity" |
+| "llm_model" | "/path/to/neural-chat-7b-v3-1/" |
+| "k" | 1 |
+| "fetch_k" | 5 |
+| "score_threshold" | 0.3 |
+| "reranker_model" | "/path/to/bge-reranker-large" |
+| "top_n" | 1 |
+| "enable_rerank" | True |
+| "max_chuck_size" | 256 |
+| "temperature" | 0.01 |
+| "top_k" | 1 |
+| "top_p" | 0.1 |
+| "repetition_penalty" | 1.0 |
+| "num_beams" | 1 |
+| "do_sample" | True |
+| "answer_relevancy_average" | 0.9354267206448277 |
+| "faithfulness_average" | 0.675 |
+| "context_recall_average" | 1.0 |
+| "context_precision_average" | 0.49999999995 |
+
+...
+
+***max_context_recall_average***:
+|  Parameter & Result  | Value  |
+|  :----:  | :----:  |
+| "ground_truth_file"  | "ground_truth.jsonl" |
+| "input_path" | "data.txt" |
+| "vector_database"  | "Chroma" |
+| "embedding_model" | "/path/to/bge-large-en-v1.5" |
+| "retrieval_type" | "default" |
+| "polish" | True |
+| "search_type" | "similarity" |
+| "llm_model" | "/path/to/neural-chat-7b-v3-1/" |
+| "k" | 1 |
+| "fetch_k" | 5 |
+| "score_threshold" | 0.3 |
+| "reranker_model" | "/path/to/bge-reranker-large" |
+| "top_n" | 1 |
+| "enable_rerank" | True |
+| "max_chuck_size" | 256 |
+| "temperature" | 0.01 |
+| "top_k" | 1 |
+| "top_p" | 0.1 |
+| "repetition_penalty" | 1.0 |
+| "num_beams" | 1 |
+| "do_sample" | True |
+| "answer_relevancy_average" | 0.9354267206448277 |
+| "faithfulness_average" | 0.675 |
+| "context_recall_average" | 1.0 |
+| "context_precision_average" | 0.49999999995 |
+
+...
+
+***max_context_precision_average***:
+|  Parameter & Result  | Value  |
+|  :----:  | :----:  |
+| "ground_truth_file"  | "ground_truth.jsonl" |
+| "input_path" | "data.txt" |
+| "vector_database"  | "Chroma" |
+| "embedding_model" | "/path/to/bge-large-en-v1.5" |
+| "retrieval_type" | "default" |
+| "polish" | True |
+| "search_type" | "similarity" |
+| "llm_model" | "/path/to/neural-chat-7b-v3-1/" |
+| "k" | 1 |
+| "fetch_k" | 5 |
+| "score_threshold" | 0.3 |
+| "reranker_model" | "/path/to/bge-reranker-large" |
+| "top_n" | 1 |
+| "enable_rerank" | True |
+| "max_chuck_size" | 256 |
+| "temperature" | 0.01 |
+| "top_k" | 1 |
+| "top_p" | 0.1 |
+| "repetition_penalty" | 1.1 |
+| "num_beams" | 1 |
+| "do_sample" | True |
+| "answer_relevancy_average" | 0.7429146997306499 |
+| "faithfulness_average" | 0.6666666666666667 |
+| "context_recall_average" | 1.0 |
+| "context_precision_average" | 0.49999999995 |
+
+...
+
+### Config SuperBenchmark
+You can also run rag superbenchmark by the following commands.
+```
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/framework
+python ragas_config_benchmark.py --config_path = /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/config.yaml
+```
+You can set the parameters in config.yaml.
+``` yaml
+ground_truth_file: path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl
+input_path: path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/data.txt
+use_openai_key: false
+vector_database: Chroma
+embedding_model: <embedding model name or path>
+llm_model: <llm model name or path>
+reranker_model: <reranker model name or path>
+retrieval_type: ['default','child_parent','bm25']
+polish: [True, False]
+search_type: ['similarity','mmr','similarity_score_threshold']
+k: [1, 3, 5]
+fetch_k: [5, 10, 20]
+score_threshold: [0.3, 0.5, 0.7]
+top_n: [1, 3, 5, 10]
+enable_rerank: [True, False]
+max_chuck_size: [256, 512, 768, 1024]
+temperature: [0.01, 0.05, 0.1, 0.3, 0.5, 0.7]
+top_k: [1, 3, 10, 20]
+top_p: [0.1, 0.3, 0.5, 0.7]
+repetition_penalty: [1.0, 1.1, 1.3, 1.5, 1.7]
+num_beams: [1, 3, 10, 20]
+do_sample: [True, False]
+```
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/__init__.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/__init__.py
new file mode 100644
index 00000000000..18896e7b549
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/__init__.py
@@ -0,0 +1,16 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md
new file mode 100644
index 00000000000..f5c03008230
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md
@@ -0,0 +1,144 @@
+# Retrieval Data Augmentation
+
+## 1. Introduction
+In this example, we show how to do data augmentation to construct a retrieval dataset. The data is described below.
+* **[example.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl)** can be used in [hard negatives mining](https://github.com/intel/intel-extension-for-transformers/tree/main/intel_extension_for_transformers/neural_chat/tools/embedding_finetune/mine_hard_neg.py),  [embedding model evaluation](https://github.com/intel/intel-extension-for-transformers/tree/main/intel_extension_for_transformers/neural_chat/tools/embedding_finetune/evaluate.py), [retriever evaluation](https://github.com/intel/intel-extension-for-transformers/tree/main/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval.py).
+Each line is a dict like this:
+```
+{"query": str, "pos": List[str]}
+```
+`query` is the query text, and `pos` is a positive text.
+
+* **[augmented_example.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/augmented_example.jsonl)** can be used in [embedding finetuning](https://github.com/intel/intel-extension-for-transformers/tree/main/intel_extension_for_transformers/neural_chat/tools/embedding_finetune/finetune.py). 
+Each line is a dict like this:
+```
+{"query": str, "pos": List[str], "neg": List[str]}
+```
+`query` is the query text, and `pos` is a positive text, `neg` is a list of negative texts.
+
+* **[candidate_context.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/candidate_context.jsonl)** can be used in [embedding model evaluation](https://github.com/intel/intel-extension-for-transformers/tree/main/intel_extension_for_transformers/neural_chat/tools/embedding_finetune/evaluate.py), [retriever evaluation](https://github.com/intel/intel-extension-for-transformers/tree/main/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval.py). 
+Each line is a dict like this:
+```
+{"context": List[str]}
+```
+`context` is the candidate context.
+
+* **[answer.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/answer.jsonl)** can be used in [Rag evaluation](https://github.com/intel/intel-extension-for-transformers/tree/main/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py).
+Each line is a dict like this:
+```
+{"question": str, "answer": str}
+```
+`question` is the question text, `answer` is the answer text.
+
+* **[ground_truth.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl)** can be used in [Rag evaluation](https://github.com/intel/intel-extension-for-transformers/tree/main/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py).
+Each line is a dict like this:
+```
+{"question": str, "context": List[str], "ground_truth": str}
+```
+`question` is the question text, `context` is the candidate context, `ground_truth` is the ground truth.
+
+## 2. Supported Devices
+CPU, CUDA
+
+## 3. Installation
+Please ensure the installation of requirements for NeuralChat and retrieval plugin first by the following commands.
+```
+git clone https://github.com/intel/intel-extension-for-transformers.git
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat
+pip install -r requirements.txt
+cd pipeline/plugins/retrieval
+pip install -r requirements.txt
+```
+After that, install additional dependency according to your device.
+* **On CPU**
+```
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation
+pip install -r requirements_cpu.txt
+```
+* **On CUDA**
+```
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation
+pip install -r requirements_cuda.txt
+```
+
+## 4. Retrieval Dataset Construction
+### Context to Questions and Mine Hard Negatives
+The effect is to generate several specific open-ended questions based on the context of the input file provided. The questions are directly related to the context to form a query-positive pair, suitable for use in constructing a retrieval dataset. Then we sample some from the entire corpus as the negatives by mining hard negatives, which is a widely used method to improve the quality of finetuning sentence embedding models.
+* **On CPU**
+```
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation
+python -m data_augmentation.retrieval_dataset_construction \
+--llm_model <llm model name or path> \
+--embedding_model <embedding model name or path> \
+--input <your input file name or path>
+```
+
+* **On CUDA**
+```
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation
+python -m data_augmentation.retrieval_dataset_construction \
+--llm_model <llm model name or path> \
+--embedding_model <embedding model name or path> \
+--input <your input file name or path> \
+--use_gpu_for_searching True
+```
+
+**Some Important Arguments**:
+- `llm_model`: The name or path for the LLM model.
+- `embedding_model`: The name or path for the text embedding model.
+- `input`: The path of the file/folder/link of the content.
+- `output`: The path of output files. The default value is './data'. The default output files are './data/raw.jsonl', './data/minedHN.jsonl', './data/minedHN_split.jsonl'.
+- `temperature`: The value is used to modulate the next token probabilities, and will influence the distribution of similarity scores. The default value is 0.8.
+- `top_p`: If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. The default value is 0.9.
+- `top_k`: The number of highest probability vocabulary tokens to keep for top-k-filtering. The default value is 40.
+- `repetition_penalty`: The parameter for repetition penalty. 1.0 means no penalty. The default value is 2.0.
+- `max_new_tokens`: The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt. The default value is 48.
+- `do_sample`: Whether or not to use sampling; use greedy decoding otherwise. The default value is True.
+- `num_beams`: Number of beams for beam search. 1 means no beam search. The default value is 2.
+- `num_return_sequences`: The number of independently computed returned sequences for each element in the batch. The default value is 2.
+- `use_cache`: Whether or not the model should use the past last key/values attentions (if applicable to the model) to speed up decoding. The default value is True.
+- `range_for_sampling`: The range to sample negatives. For example, `2-100` means sampling `negative_number` negatives from top2-top200 documents. You can set a larger value to reduce the difficulty of negatives (e.g., set it `60-300` to sample negatives from top60-300 passages). The default value is '2-10'.
+- `negative_number`: The number of sampled negatives. The default value is 5.
+- `use_gpu_for_searching`: Whether to use faiss-gpu to retrieve negatives. The default value is False.
+- `similarity_threshold`: The cosine similarity threshold used to filter the generated queries. The default value is 0.6.
+
+**Result**:
+Three files will be generated. The default output files are `./data/raw.jsonl`, `./data/minedHN.jsonl`, `./data/minedHN_split.jsonl`. The third is the final output dataset, where each line is a dict like this:
+```
+{"query": str, "pos": List[str], "neg": List[str]}
+```
+`query` is the query, and `pos` is a positive text, based on the context of the input file provided, `neg` is a list of negative texts.
+See [augmented_example.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/augmented_example.jsonl) for a data file.
+
+
+### Context, Question to Ground Truth
+The effect is to generate the right answer based on the context and question provided. The answer is directly related to the context and the question, suitable for use in constructing a synthetic retrieval evaluation dataset.
+```
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation
+python llm_generate_truth.py \
+--llm_model <llm model name or path> \
+--input example.jsonl \
+--output ground_truth.jsonl
+```
+
+**Some Important Arguments**:
+- `llm_model`: The name or path for the LLM model.
+- `input`: The path of JSON data including queries and positives where each line is a dict like this:```{"query": str, "pos": List[str]}```. See [example.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl) for a data file.
+- `output`: The path of the output JSON data.
+- `temperature`: The value is used to modulate the next token probabilities, and will influence the distribution of similarity scores. The default value is 0.8.
+- `top_p`: If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. The default value is 0.9.
+- `top_k`: The number of highest probability vocabulary tokens to keep for top-k-filtering. The default value is 40.
+- `repetition_penalty`: The parameter for repetition penalty. 1.0 means no penalty. The default value is 2.0.
+- `max_new_tokens`: The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt. The default value is 48.
+- `do_sample`: Whether or not to use sampling ; use greedy decoding otherwise. The default value is True.
+- `num_beams`: Number of beams for beam search. 1 means no beam search. The default value is 2.
+- `num_return_sequences`: The number of independently computed returned sequences for each element in the batch. The default value is 2.
+- `use_cache`: Whether or not the model should use the past last key/values attentions (if applicable to the model) to speed up decoding. The default value is True.
+
+**Result**:
+Each line of the output JSON data is a dict like this:
+```
+{"question": str, "context": List[str], "ground_truth": str}
+```
+`ground_truth` is the generated ground truth, based on the question and context provided.
+See [ground_truth.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl) for a data file.
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/__init__.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/__init__.py
new file mode 100644
index 00000000000..18896e7b549
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/__init__.py
@@ -0,0 +1,16 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/answer.jsonl b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/answer.jsonl
new file mode 100644
index 00000000000..fe8976774e2
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/answer.jsonl
@@ -0,0 +1,10 @@
+{"question": "What types of platforms does the organization focus on?", "answer": "The organization focuses on delivering open software and hardware platforms with industry-defining standards, as well as leadership products, open and secure platforms, and resilient manufacturing."}
+{"question": "What are the core values that drive our company's actions?", "answer": "The core values driving the company's actions include focusing on having a positive impact on business, society, and the planet by working together with talented individuals. They also emphasize delivering leadership products, open and secure platforms, and resilient manufacturing to support global digitalization and ensure customer success."}
+{"question": "What types of companies does Intel invest in?", "answer": "Intel invests in public and private companies."}
+{"question": "How has technology been central to our lives in recent years?", "answer": "In recent years, technology has become more essential as it permeates various aspects of our daily lives. This includes advancements in communication, entertainment, transportation, healthcare, and many other sectors. All these rely heavily on semiconductors, which play a crucial role in powering and enabling these technologies."}
+{"question": "What is Intel's focus in terms of delivering leadership products?", "answer": "Intel's focus in terms of delivering leadership products includes providing open and secure platforms as well as resilient manufacturing for enabling global digitalization and fueling customer success."}
+{"question": "How has Intel been affected by the COVID-19 pandemic so far, and what?", "answer": "Intel has not provided specific details on how they have been directly affected by the COVID-19 pandemic. However, it can be inferred that like many other companies, they might have experienced challenges related to supply chain disruptions, workforce adjustments, and potential changes in demand for their products due to the global economic impact of the pandemic."}
+{"question": "How does the company protect personal data to prevent unauthorized access or misuse?", "answer": "The text provided doesn't specifically mention how the company protects personal data to prevent unauthorized access or misuse. However, it highlights the potential consequences of such incidents, which might imply that they have measures in place to minimize these risks."}
+{"question": "What are the conditions for accessing third-party IP?", "answer": "The conditions for accessing third-party IP can vary depending on the specific agreement between the parties involved. However, generally, it includes ensuring availability on commercially reasonable terms or at all."}
+{"question": "How many customers contribute to the majority of our revenue?", "answer": "A limited number of customers contribute to the majority of your revenue."}
+{"question": "When does Intel plan to deliver on its goal of five manufacturing technology nodes in four years?", "answer": "Intel remains on track to deliver on this goal within four years."}
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/augmented_example.jsonl b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/augmented_example.jsonl
new file mode 100644
index 00000000000..4ae26cb49b7
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/augmented_example.jsonl
@@ -0,0 +1,10 @@
+{"query": "What types of platforms does the organization focus on?", "pos": ["We aim to deliver open software and hardware platforms with industry-defining standards."], "neg": ["The COVID-19 pandemic could materially adversely affect our financial condition and results of operations.", "We rely on access to third-party IP, which may not be available to us on commercially reasonable terms or at all.", "Intel plans to regain transistor performance and power performance leadership by 2025, and we remain on track to deliver on our goal of five manufacturing technology nodes in four years.", "We invest in public and private companies and do not always realize a return on our investments.", "We receive a significant portion of our revenue from a limited number of customers."]}
+{"query": "What are the core values that drive our company's actions?", "pos": ["Our world-class talent is at the heart of everything we do. Together we strive to have a positive effect on business, society, and the planet."], "neg": ["Theft, loss, or misuse of personal data about our employees, customers, or other third parties could increase our expenses, damage our reputation, or result in legal or regulatory proceedings.", "Intel plans to regain transistor performance and power performance leadership by 2025, and we remain on track to deliver on our goal of five manufacturing technology nodes in four years.", "We aim to deliver open software and hardware platforms with industry-defining standards.", "The COVID-19 pandemic could materially adversely affect our financial condition and results of operations.", "We receive a significant portion of our revenue from a limited number of customers."]}
+{"query": "What types of companies does Intel invest in?", "pos": ["We invest in public and private companies and do not always realize a return on our investments."], "neg": ["The past several years demonstrated just how much technology is increasingly central to every aspect of our lives, all of which depends on semiconductors.", "We aim to deliver open software and hardware platforms with industry-defining standards.", "Our world-class talent is at the heart of everything we do. Together we strive to have a positive effect on business, society, and the planet.", "The COVID-19 pandemic could materially adversely affect our financial condition and results of operations.", "Intel plans to regain transistor performance and power performance leadership by 2025, and we remain on track to deliver on our goal of five manufacturing technology nodes in four years."]}
+{"query": "How has technology been central to our lives in recent years?", "pos": ["The past several years demonstrated just how much technology is increasingly central to every aspect of our lives, all of which depends on semiconductors."], "neg": ["Our world-class talent is at the heart of everything we do. Together we strive to have a positive effect on business, society, and the planet.", "We rely on access to third-party IP, which may not be available to us on commercially reasonable terms or at all.", "We receive a significant portion of our revenue from a limited number of customers.", "Intel plans to regain transistor performance and power performance leadership by 2025, and we remain on track to deliver on our goal of five manufacturing technology nodes in four years.", "We invest in public and private companies and do not always realize a return on our investments."]}
+{"query": "What is Intel's focus in terms of delivering leadership products?", "pos": ["With our focus on delivering leadership products, open and secure platforms and resilient manufacturing, Intel has the right strategy in place to enable this global digitalization and fuel customer success."], "neg": ["The past several years demonstrated just how much technology is increasingly central to every aspect of our lives, all of which depends on semiconductors.", "Theft, loss, or misuse of personal data about our employees, customers, or other third parties could increase our expenses, damage our reputation, or result in legal or regulatory proceedings.", "We rely on access to third-party IP, which may not be available to us on commercially reasonable terms or at all.", "Our world-class talent is at the heart of everything we do. Together we strive to have a positive effect on business, society, and the planet.", "We aim to deliver open software and hardware platforms with industry-defining standards."]}
+{"query": "How has Intel been affected by the COVID-19 pandemic so far, and what?", "pos": ["The COVID-19 pandemic could materially adversely affect our financial condition and results of operations."], "neg": ["The past several years demonstrated just how much technology is increasingly central to every aspect of our lives, all of which depends on semiconductors.", "We invest in public and private companies and do not always realize a return on our investments.", "Our world-class talent is at the heart of everything we do. Together we strive to have a positive effect on business, society, and the planet.", "Theft, loss, or misuse of personal data about our employees, customers, or other third parties could increase our expenses, damage our reputation, or result in legal or regulatory proceedings.", "With our focus on delivering leadership products, open and secure platforms and resilient manufacturing, Intel has the right strategy in place to enable this global digitalization and fuel customer success."]}
+{"query": "How does the company protect personal data to prevent unauthorized access or misuse?", "pos": ["Theft, loss, or misuse of personal data about our employees, customers, or other third parties could increase our expenses, damage our reputation, or result in legal or regulatory proceedings."], "neg": ["We aim to deliver open software and hardware platforms with industry-defining standards.", "Our world-class talent is at the heart of everything we do. Together we strive to have a positive effect on business, society, and the planet.", "Intel plans to regain transistor performance and power performance leadership by 2025, and we remain on track to deliver on our goal of five manufacturing technology nodes in four years.", "The past several years demonstrated just how much technology is increasingly central to every aspect of our lives, all of which depends on semiconductors.", "With our focus on delivering leadership products, open and secure platforms and resilient manufacturing, Intel has the right strategy in place to enable this global digitalization and fuel customer success."]}
+{"query": "What are the conditions for accessing third-party IP?", "pos": ["We rely on access to third-party IP, which may not be available to us on commercially reasonable terms or at all."], "neg": ["The past several years demonstrated just how much technology is increasingly central to every aspect of our lives, all of which depends on semiconductors.", "With our focus on delivering leadership products, open and secure platforms and resilient manufacturing, Intel has the right strategy in place to enable this global digitalization and fuel customer success.", "We invest in public and private companies and do not always realize a return on our investments.", "We receive a significant portion of our revenue from a limited number of customers.", "We aim to deliver open software and hardware platforms with industry-defining standards."]}
+{"query": "How many customers contribute to the majority of our revenue?", "pos": ["We receive a significant portion of our revenue from a limited number of customers."], "neg": ["Our world-class talent is at the heart of everything we do. Together we strive to have a positive effect on business, society, and the planet.", "The past several years demonstrated just how much technology is increasingly central to every aspect of our lives, all of which depends on semiconductors.", "We invest in public and private companies and do not always realize a return on our investments.", "Intel plans to regain transistor performance and power performance leadership by 2025, and we remain on track to deliver on our goal of five manufacturing technology nodes in four years.", "We rely on access to third-party IP, which may not be available to us on commercially reasonable terms or at all."]}
+{"query": "When does Intel plan to deliver on its goal of five manufacturing technology nodes in four years?", "pos": ["Intel plans to regain transistor performance and power performance leadership by 2025, and we remain on track to deliver on our goal of five manufacturing technology nodes in four years."], "neg": ["Our world-class talent is at the heart of everything we do. Together we strive to have a positive effect on business, society, and the planet.", "The past several years demonstrated just how much technology is increasingly central to every aspect of our lives, all of which depends on semiconductors.", "We receive a significant portion of our revenue from a limited number of customers.", "We invest in public and private companies and do not always realize a return on our investments.", "We aim to deliver open software and hardware platforms with industry-defining standards."]}
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/candidate_context.jsonl b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/candidate_context.jsonl
new file mode 100644
index 00000000000..1c9589eb633
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/candidate_context.jsonl
@@ -0,0 +1,50 @@
+{"context": ["Intel is creating a sustainable, resilient, and secure supply chain."]}
+{"context": ["We are fueling customer growth through tech innovation."]}
+{"context": ["Intel plans to regain transistor performance and power performance leadership by 2025, and we remain on track to deliver on our goal of five manufacturing technology nodes in four years."]}
+{"context": ["Intel is focused on navigating the challenges of today, while anticipating the needs of the future."]}
+{"context": ["The past several years demonstrated just how much technology is increasingly central to every aspect of our lives, all of which depends on semiconductors."]}
+{"context": ["With our focus on delivering leadership products, open and secure platforms and resilient manufacturing, Intel has the right strategy in place to enable this global digitalization and fuel customer success."]}
+{"context": ["Lead and democratize compute with Intel x86 and xPU."]}
+{"context": ["We aim to deliver open software and hardware platforms with industry-defining standards."]}
+{"context": ["Our world-class talent is at the heart of everything we do. Together we strive to have a positive effect on business, society, and the planet."]}
+{"context": ["Demand for our products is variable and hard to predict."]}
+{"context": ["Due to the complexity of our manufacturing operations, we are not always able to timely respond to fluctuations in demand and we may incur significant charges and costs."]}
+{"context": ["We face significant competition."]}
+{"context": ["We invest significantly in R&D, and to the extent our R&D efforts are unsuccessful, our competitive position can be harmed, and we may not realize a return on our investments."]}
+{"context": ["Our investments in new businesses, products, and technologies are inherently risky and do not always succeed."]}
+{"context": ["Changes in the mix of products sold can materially impact our financial results."]}
+{"context": ["We are subject to risks associated with the development and implementation of new manufacturing technologies."]}
+{"context": ["We face supply chain risks."]}
+{"context": ["Our disaggregated design strategy introduces additional production risks."]}
+{"context": ["We are subject to the risks of product defects, errata, or other product issues."]}
+{"context": ["We face risks related to security vulnerabilities in our products."]}
+{"context": ["We are subject to risks associated with environmental, health, and safety and product regulations."]}
+{"context": ["We have established and report on our initiatives, aspirations, and goals related to corporate responsibility matters, which exposes us to numerous risks."]}
+{"context": ["The COVID-19 pandemic could materially adversely affect our financial condition and results of operations."]}
+{"context": ["We operate globally and are subject to significant risks in many jurisdictions."]}
+{"context": ["Global or regional conditions can harm our financial results."]}
+{"context": ["We are subject to risks related to trade policies and regulations."]}
+{"context": ["Laws and regulations can have a negative impact on our business."]}
+{"context": ["We are affected by fluctuations in currency exchange rates."]}
+{"context": ["Changes in our effective tax rate may impact our net income."]}
+{"context": ["Catastrophic events can have a material adverse effect on our operations and financial results."]}
+{"context": ["Damage to our reputation can damage our business."]}
+{"context": ["We are subject to cybersecurity and privacy risks."]}
+{"context": ["We face risks related to cybersecurity threats and incidents."]}
+{"context": ["Theft, loss, or misuse of personal data about our employees, customers, or other third parties could increase our expenses, damage our reputation, or result in legal or regulatory proceedings."]}
+{"context": ["We are subject to IP risks and risks associated with litigation and regulatory proceedings."]}
+{"context": ["We cannot always protect our IP or enforce our IP rights."]}
+{"context": ["Our licenses with other companies and participation in industry initiatives at times allow competitors to use some of our patent rights."]}
+{"context": ["Third parties assert claims based on IP rights against us and our products, which could harm our business."]}
+{"context": ["We rely on access to third-party IP, which may not be available to us on commercially reasonable terms or at all."]}
+{"context": ["We are subject to risks associated with litigation and regulatory matters."]}
+{"context": ["We must attract, retain, and motivate key employees."]}
+{"context": ["We are subject to risks associated with our strategic transactions."]}
+{"context": ["Our acquisitions, divestitures, and other strategic transactions could fail to achieve our financial or strategic objectives, disrupt our ongoing business, and adversely impact our results of operations."]}
+{"context": ["We invest in public and private companies and do not always realize a return on our investments."]}
+{"context": ["We face risks related to our debt obligations."]}
+{"context": ["We are subject to sales-related risks."]}
+{"context": ["We face risks related to sales through distributors and other third parties."]}
+{"context": ["We receive a significant portion of our revenue from a limited number of customers."]}
+{"context": ["We face risks related to transactions with government entities."]}
+{"context": ["We have fluctuations in our stock price and the amount and frequency of our stock repurchases."]}
\ No newline at end of file
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/data.txt b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/data.txt
new file mode 100644
index 00000000000..b468c8b9399
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/data.txt
@@ -0,0 +1,10 @@
+We aim to deliver open software and hardware platforms with industry-defining standards.
+Our world-class talent is at the heart of everything we do. Together we strive to have a positive effect on business, society, and the planet.
+We invest in public and private companies and do not always realize a return on our investments.
+The past several years demonstrated just how much technology is increasingly central to every aspect of our lives, all of which depends on semiconductors.
+With our focus on delivering leadership products, open and secure platforms and resilient manufacturing, Intel has the right strategy in place to enable this global digitalization and fuel customer success.
+The COVID-19 pandemic could materially adversely affect our financial condition and results of operations.
+Theft, loss, or misuse of personal data about our employees, customers, or other third parties could increase our expenses, damage our reputation, or result in legal or regulatory proceedings.
+We rely on access to third-party IP, which may not be available to us on commercially reasonable terms or at all.
+We receive a significant portion of our revenue from a limited number of customers.
+Intel plans to regain transistor performance and power performance leadership by 2025, and we remain on track to deliver on our goal of five manufacturing technology nodes in four years.
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl
new file mode 100644
index 00000000000..06bf031621e
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl
@@ -0,0 +1,10 @@
+{"query": "What types of platforms does the organization focus on?", "pos": ["We aim to deliver open software and hardware platforms with industry-defining standards."]}
+{"query": "What are the core values that drive our company's actions?", "pos": ["Our world-class talent is at the heart of everything we do. Together we strive to have a positive effect on business, society, and the planet."]}
+{"query": "What types of companies does Intel invest in?", "pos": ["We invest in public and private companies and do not always realize a return on our investments."]}
+{"query": "How has technology been central to our lives in recent years?", "pos": ["The past several years demonstrated just how much technology is increasingly central to every aspect of our lives, all of which depends on semiconductors."]}
+{"query": "What is Intel's focus in terms of delivering leadership products?", "pos": ["With our focus on delivering leadership products, open and secure platforms and resilient manufacturing, Intel has the right strategy in place to enable this global digitalization and fuel customer success."]}
+{"query": "How has Intel been affected by the COVID-19 pandemic so far, and what?", "pos": ["The COVID-19 pandemic could materially adversely affect our financial condition and results of operations."]}
+{"query": "How does the company protect personal data to prevent unauthorized access or misuse?", "pos": ["Theft, loss, or misuse of personal data about our employees, customers, or other third parties could increase our expenses, damage our reputation, or result in legal or regulatory proceedings."]}
+{"query": "What are the conditions for accessing third-party IP?", "pos": ["We rely on access to third-party IP, which may not be available to us on commercially reasonable terms or at all."]}
+{"query": "How many customers contribute to the majority of our revenue?", "pos": ["We receive a significant portion of our revenue from a limited number of customers."]}
+{"query": "When does Intel plan to deliver on its goal of five manufacturing technology nodes in four years?", "pos": ["Intel plans to regain transistor performance and power performance leadership by 2025, and we remain on track to deliver on our goal of five manufacturing technology nodes in four years."]}
\ No newline at end of file
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl
new file mode 100644
index 00000000000..cd471552fe6
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl
@@ -0,0 +1,10 @@
+{"question": "What types of platforms does the organization focus on?", "context": ["We aim to deliver open software and hardware platforms with industry-defining standards."], "ground_truth": "open software and hardware platforms"}
+{"question": "What are the core values that drive our company's actions?", "context": ["Our world-class talent is at the heart of everything we do. Together we strive to have a positive effect on business, society, and the planet."], "ground_truth": "The core values driving the company's actions include focus on talent, positively impacting business, society, and the environment."}
+{"question": "What types of companies does Intel invest in?", "context": ["We invest in public and private companies and do not always realize a return on our investments."], "ground_truth": "Intel invests in public and private companies."}
+{"question": "How has technology been central to our lives in recent years?", "context": ["The past several years demonstrated just how much technology is increasingly central to every aspect of our lives, all of which depends on semiconductors."], "ground_truth": "Technology has become integral to various aspects of life through its dependence on semiconductors."}
+{"question": "What is Intel's focus in terms of delivering leadership products?", "context": ["With our focus on delivering leadership products, open and secure platforms and resilient manufacturing, Intel has the right strategy in place to enable this global digitalization and fuel customer success."], "ground_truth": "Intel's focus in terms of delivering leadership products is on open and secure platforms and resilient manufacturing."}
+{"question": "How has Intel been affected by the COVID-19 pandemic so far, and what?", "context": ["The COVID-19 pandemic could materially adversely affect our financial condition and results of operations."], "ground_truth": "Intel has been financially impacted by the COVID-19 pandemic."}
+{"question": "How does the company protect personal data to prevent unauthorized access or misuse?", "context": ["Theft, loss, or misuse of personal data about our employees, customers, or other third parties could increase our expenses, damage our reputation, or result in legal or regulatory proceedings."], "ground_truth": "The company focuses on preventing unauthorized access or misuse of personal data by protecting it from theft, loss, or misuse."}
+{"question": "What are the conditions for accessing third-party IP?", "context": ["We rely on access to third-party IP, which may not be available to us on commercially reasonable terms or at all."], "ground_truth": "The conditions for accessing third-party IP depend on factors such as availability, commercial terms, and reasonableness."}
+{"question": "How many customers contribute to the majority of our revenue?", "context": ["We receive a significant portion of our revenue from a limited number of customers."], "ground_truth": "a limited number of customers"}
+{"question": "When does Intel plan to deliver on its goal of five manufacturing technology nodes in four years?", "context": ["Intel plans to regain transistor performance and power performance leadership by 2025, and we remain on track to deliver on our goal of five manufacturing technology nodes in four years."], "ground_truth": "Intel aims to achieve five manufacturing technology nodes within four years, with the goal being fulfilled by 2025."}
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/hn_mine.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/hn_mine.py
new file mode 100644
index 00000000000..8aa51306950
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/hn_mine.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import random
+import numpy as np
+import faiss
+from tqdm import tqdm
+
+def create_index(embeddings, use_gpu):
+    index = faiss.IndexFlatIP(len(embeddings[0]))
+    embeddings = np.asarray(embeddings, dtype=np.float32)
+    if use_gpu:
+        co = faiss.GpuMultipleClonerOptions() # pylint: disable=E1101
+        co.shard = True
+        co.useFloat16 = True
+        index = faiss.index_cpu_to_all_gpus(index, co=co)
+    else:
+        pass
+    index.add(embeddings)
+    return index
+
+def batch_search(index,
+                 query,
+                 topk: int = 200,
+                 batch_size: int = 64):
+    all_scores, all_inxs = [], []
+    for start_index in tqdm(range(0, len(query), batch_size), desc="Batches", disable=len(query) < 256):
+        batch_query = query[start_index:start_index + batch_size]
+        batch_scores, batch_inxs = index.search(np.asarray(batch_query, dtype=np.float32), k=topk)
+        all_scores.extend(batch_scores.tolist())
+        all_inxs.extend(batch_inxs.tolist())
+    return all_scores, all_inxs
+
+def get_corpus(candidate_pool):
+    corpus = []
+    for line in open(candidate_pool):
+        line = json.loads(line.strip())
+        corpus.append(line['text'])
+    return corpus
+
+def find_knn_neg(model, input_file, candidate_pool, output_file, sample_range, negative_number, use_gpu):
+    corpus = []
+    queries = []
+    train_data = []
+    for line in open(input_file):
+        line = json.loads(line.strip())
+        train_data.append(line)
+        corpus.extend(line['pos'])
+        if 'neg' in line:
+            corpus.extend(line['neg'])
+        queries.append(line['query'])
+
+    if candidate_pool is not None:
+        if not isinstance(candidate_pool, list):
+            candidate_pool = get_corpus(candidate_pool)
+        corpus = list(set(candidate_pool))
+    else:
+        corpus = list(set(corpus))
+
+    p_vecs = model.encode(corpus, batch_size=256)
+    q_vecs = model.encode(queries, batch_size=256)
+
+    index = create_index(p_vecs, use_gpu=use_gpu)
+    _, all_inxs = batch_search(index, q_vecs, topk=sample_range[-1])
+    assert len(all_inxs) == len(train_data)
+
+    for i, data in enumerate(train_data):
+        query = data['query']
+        inxs = all_inxs[i][sample_range[0]:sample_range[1]]
+        filtered_inx = []
+        for inx in inxs:
+            if inx == -1: break
+            if corpus[inx] not in data['pos'] and corpus[inx] != query:
+                filtered_inx.append(inx)
+
+        if len(filtered_inx) > negative_number:
+            filtered_inx = random.sample(filtered_inx, negative_number)
+        data['neg'] = [corpus[inx] for inx in filtered_inx]
+
+    with open(output_file, 'w') as f:
+        for data in train_data:
+            if len(data['neg']) < negative_number:
+                data['neg'].extend(random.sample(corpus, negative_number - len(data['neg'])))
+            f.write(json.dumps(data, ensure_ascii=False) + '\n')
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py
new file mode 100644
index 00000000000..c0a613577ea
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import torch
+from modelscope import AutoModelForCausalLM, AutoTokenizer  # pylint: disable=E0401
+import jsonlines
+import os, re
+from typing import List
+from intel_extension_for_transformers.neural_chat.pipeline.plugins.retrieval.parser.parser import DocumentParser
+import logging
+from intel_extension_for_transformers.neural_chat.prompts.prompt import QUERYGENERATE_PROMPT
+from transformers import GenerationConfig
+
+logging.basicConfig(
+    format="%(asctime)s %(name)s:%(levelname)s:%(message)s",
+    datefmt="%d-%M-%Y %H:%M:%S",
+    level=logging.INFO
+)
+
+device = "cuda" if torch.cuda.is_available() else "cpu"
+
+def document_append(data_collection):
+    documents = []
+    for data, metadata in data_collection:
+        if len(data) < 5:
+            continue
+        documents.append(data)
+    return documents
+
+def raw_data_generate(model_id,
+                      input_path,
+                      file_json_path,
+                      temperature,
+                      top_p,
+                      top_k,
+                      repetition_penalty,
+                      max_new_tokens,
+                      do_sample,
+                      num_beams,
+                      num_return_sequences,
+                      use_cache):
+   tokenizer = AutoTokenizer.from_pretrained(model_id)
+   model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto', torch_dtype=torch.float16)
+   data_collection = DocumentParser().load(input=input_path)
+   documents = document_append(data_collection)
+
+   generation_config = GenerationConfig(
+   temperature = temperature,
+   top_p = top_p,
+   top_k = top_k,
+   repetition_penalty = repetition_penalty,
+   max_new_tokens = max_new_tokens,
+   do_sample = do_sample,
+   num_beams = num_beams,
+   num_return_sequences = num_return_sequences,
+   use_cache = use_cache,
+   pad_token_id=tokenizer.eos_token_id
+   )
+
+   for i in range(len(documents)):
+      context = documents[i]
+
+      if context:
+         input = QUERYGENERATE_PROMPT.format(context=context)
+         if device=="cpu":
+            model_input = tokenizer(input, return_tensors="pt")
+         elif device=="cuda":
+            model_input = tokenizer(input, return_tensors="pt").to("cuda")
+         model.eval()
+         result = []
+
+         for j in range(5):
+            with torch.no_grad():
+               res = model.generate(**model_input, generation_config=generation_config)[0]
+               res=tokenizer.decode(res, skip_special_tokens=True)
+
+            res = res[res.find('Generated questions:') :]
+            res = re.sub('Generated questions:', '', res)
+            res = re.sub('---', '', res)
+
+            res = res.split("?")[0:2]
+            for r in res:
+               r = r.replace('1.', "").replace('2.', "")
+               r = r.replace('Evaluation:', "")
+               r = r.replace('#', " ").replace(r'\t', " ").replace('\n', ' ').replace('\n\n', ' ').strip()
+               r = r + '?'
+               result.append(r)
+
+         result_str=''
+         result_set = list(set(result))
+         for k in range(len(result_set)):
+            result_str = result_str + str(k) + '. '+ result_set[k]
+
+         if result_str and result_str.isspace()==False:
+            data = {
+                     "query": result_str,
+                     "pos": [context],
+               }
+            with jsonlines.open(file_json_path,"a") as file_json:
+                  file_json.write(data)
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_truth.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_truth.py
new file mode 100644
index 00000000000..6edd4942796
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_truth.py
@@ -0,0 +1,147 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from modelscope import AutoModelForCausalLM, AutoTokenizer  # pylint: disable=E0401
+import jsonlines
+import re
+import logging
+from intel_extension_for_transformers.neural_chat.prompts.prompt import TRUTHGENERATE_PROMPT
+from transformers import GenerationConfig
+import argparse
+
+logging.basicConfig(
+    format="%(asctime)s %(name)s:%(levelname)s:%(message)s",
+    datefmt="%d-%M-%Y %H:%M:%S",
+    level=logging.INFO
+)
+
+device = "cuda" if torch.cuda.is_available() else "cpu"
+
+def document_set(document_file_jsonl_path):
+    document_list = []
+    with open(document_file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            passages=[stu["query"],stu["pos"][0]]
+            document_list.append(passages)
+    return document_list
+
+def raw_data_generate(model_id,
+                      base_dir,
+                      file_json_path,
+                      temperature,
+                      top_p,
+                      top_k,
+                      repetition_penalty,
+                      max_new_tokens,
+                      do_sample,
+                      num_beams,
+                      num_return_sequences,
+                      use_cache):
+   tokenizer = AutoTokenizer.from_pretrained(model_id)
+   model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto', torch_dtype=torch.float16)
+   documents = document_set(base_dir)
+   generation_config = GenerationConfig(
+   temperature = temperature,
+   top_p = top_p,
+   top_k = top_k,
+   repetition_penalty = repetition_penalty,
+   max_new_tokens = max_new_tokens,
+   do_sample = do_sample,
+   num_beams = num_beams,
+   num_return_sequences = num_return_sequences,
+   use_cache = use_cache,
+   pad_token_id=tokenizer.eos_token_id
+   )
+
+   for i in range(len(documents)):
+      [question, context] = documents[i]
+
+      if context:
+         input = TRUTHGENERATE_PROMPT.format(question=question,context=context)
+         if device=="cpu":
+            model_input = tokenizer(input, return_tensors="pt")
+         elif device=="cuda":
+            model_input = tokenizer(input, return_tensors="pt").to("cuda")
+         model.eval()
+
+         with torch.no_grad():
+            res = model.generate(**model_input, generation_config=generation_config)[0]
+            res=tokenizer.decode(res, skip_special_tokens=True)
+
+         res = res[res.find('Generated ground_truth:') :]
+         res = re.sub('Generated ground_truth:', '', res)
+         res = re.sub('---', '', res)
+
+         result_str=res.replace('#', " ").replace(r'\t', " ").replace('\n', ' ').replace('\n\n', ' ').strip()
+
+         if result_str and result_str.isspace()==False:
+            data = {
+                     "question": question,
+                     "context": [context],
+                     "ground_truth": result_str,
+               }
+            with jsonlines.open(file_json_path,"a") as file_json:
+                  file_json.write(data)
+
+def main():
+   parser = argparse.ArgumentParser()
+   parser.add_argument("--llm_model", type=str)
+   parser.add_argument("--input", type=str)
+   parser.add_argument("--output", type=str)
+
+   parser.add_argument("--temperature", type=float, default=0.8)
+   parser.add_argument("--top_p", type=float, default=0.9)
+   parser.add_argument("--top_k", type=int, default=40)
+   parser.add_argument("--repetition_penalty", type=float, default=2.0)
+   parser.add_argument("--max_new_tokens", type=int, default=48)
+   parser.add_argument("--do_sample", type=bool, default=True)
+   parser.add_argument("--num_beams", type=int, default=2)
+   parser.add_argument("--num_return_sequences", type=int, default=2)
+   parser.add_argument("--use_cache", type=bool, default=True)
+
+   args = parser.parse_args()
+
+   llm_model = args.llm_model
+   input = args.input
+   output = args.output
+
+   temperature = args.temperature
+   top_p = args.top_p
+   top_k = args.top_k
+   repetition_penalty = args.repetition_penalty
+   max_new_tokens = args.max_new_tokens
+   do_sample = args.do_sample
+   num_beams = args.num_beams
+   num_return_sequences = args.num_return_sequences
+   use_cache = args.use_cache
+
+   raw_data_generate(llm_model,
+                     input,
+                     output,
+                     temperature,
+                     top_p,
+                     top_k,
+                     repetition_penalty,
+                     max_new_tokens,
+                     do_sample,
+                     num_beams,
+                     num_return_sequences,
+                     use_cache)
+
+if __name__ == '__main__':
+    main()
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/mine_hard_negatives_check_similarity.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/mine_hard_negatives_check_similarity.py
new file mode 100644
index 00000000000..eaba94528a2
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/mine_hard_negatives_check_similarity.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import jsonlines
+from .hn_mine import find_knn_neg
+from sentence_transformers import SentenceTransformer
+
+def mine_hard_negatives(model_name_or_path,
+                        input_file,
+                        output_file,
+                        range_for_sampling,
+                        negative_number,
+                        use_gpu_for_searching):
+   candidate_pool=None
+
+   sample_range = range_for_sampling.split('-')
+   sample_range = [int(x) for x in sample_range]
+
+   model = SentenceTransformer(model_name_or_path)
+
+   find_knn_neg(model,
+               input_file=input_file,
+               candidate_pool=candidate_pool,
+               output_file=output_file,
+               sample_range=sample_range,
+               negative_number=negative_number,
+               use_gpu=use_gpu_for_searching)
+
+def similarity_score(queries,passages,model_name_or_path):
+   queries = [queries]
+   passages = passages
+   instruction = ""
+   model = SentenceTransformer(model_name_or_path)
+   q_embeddings = model.encode([instruction+q for q in queries], normalize_embeddings=True)
+   p_embeddings = model.encode(passages, normalize_embeddings=True)
+   similarity_score =  q_embeddings @ p_embeddings.T
+   return similarity_score
+
+def similarity_check(file_jsonl_path,file_json_split_path,model_name_or_path, similarity_threshold):
+   with open(file_jsonl_path) as file:
+      for stu in jsonlines.Reader(file):
+         stu["query"]=stu["query"].split("?")[:-1]
+         for i in range(len(stu["query"])):
+               stu["query"][i]=stu["query"][i].lstrip('0123456789-. ')+ '?'
+               if similarity_score(stu["query"][i],stu["pos"],model_name_or_path) >= similarity_threshold:
+                  data = {
+                        "query": stu["query"][i],
+                        "pos": stu["pos"],
+                        "neg": stu["neg"],
+                     }
+                  with jsonlines.open(file_json_split_path,"a") as file_json:
+                     file_json.write(data)
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/requirements_cpu.txt b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/requirements_cpu.txt
new file mode 100644
index 00000000000..85afbe99ffa
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/requirements_cpu.txt
@@ -0,0 +1,2 @@
+faiss-cpu
+modelscope
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/requirements_cuda.txt b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/requirements_cuda.txt
new file mode 100644
index 00000000000..9e91dfbd37d
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/requirements_cuda.txt
@@ -0,0 +1,2 @@
+faiss-gpu
+modelscope
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py
new file mode 100644
index 00000000000..095a5aff944
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py
@@ -0,0 +1,150 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .llm_generate_raw_data import raw_data_generate
+from .mine_hard_negatives_check_similarity import mine_hard_negatives, similarity_check
+import argparse
+import os
+
+def construct_retrieval_dataset(
+      llm_model,
+      embedding_model,
+      input,
+      output,
+      temperature,
+      top_p,
+      top_k,
+      repetition_penalty,
+      max_new_tokens,
+      do_sample,
+      num_beams,
+      num_return_sequences,
+      use_cache,
+      range_for_sampling,
+      negative_number,
+      use_gpu_for_searching,
+      similarity_threshold):
+
+   output_path=output+'/raw.jsonl'
+   raw_data_generate(llm_model,
+                     input,
+                     output_path,
+                     temperature,
+                     top_p,
+                     top_k,
+                     repetition_penalty,
+                     max_new_tokens,
+                     do_sample,
+                     num_beams,
+                     num_return_sequences,
+                     use_cache)
+
+   output_hn_path=output+'/minedHN.jsonl'
+   mine_hard_negatives(embedding_model,
+                       output_path,
+                       output_hn_path,
+                       range_for_sampling,
+                       negative_number,
+                       use_gpu_for_searching)
+
+   output_json_split_path = output+"/minedHN_split.jsonl"
+   similarity_check(output_hn_path,
+                    output_json_split_path,
+                    embedding_model,
+                    similarity_threshold)
+
+
+def main():
+   parser = argparse.ArgumentParser()
+   parser.add_argument("--llm_model", type=str)
+   parser.add_argument("--embedding_model", type=str)
+   parser.add_argument("--input", type=str)
+   parser.add_argument("--output", type=str, default='./data')
+
+   parser.add_argument("--temperature", type=float, default=0.8)
+   parser.add_argument("--top_p", type=float, default=0.9)
+   parser.add_argument("--top_k", type=int, default=40)
+   parser.add_argument("--repetition_penalty", type=float, default=2.0)
+   parser.add_argument("--max_new_tokens", type=int, default=48)
+   parser.add_argument("--do_sample", type=bool, default=True)
+   parser.add_argument("--num_beams", type=int, default=2)
+   parser.add_argument("--num_return_sequences", type=int, default=2)
+   parser.add_argument("--use_cache", type=bool, default=True)
+
+   parser.add_argument("--range_for_sampling", type=str, default='2-10')
+   parser.add_argument("--negative_number", type=int, default=5)
+   parser.add_argument("--use_gpu_for_searching", type=bool, default=False)
+
+   parser.add_argument("--similarity_threshold", type=float, default=0.6)
+
+   args = parser.parse_args()
+
+   llm_model = args.llm_model
+   embedding_model = args.embedding_model
+   input = args.input
+   output = args.output
+
+   temperature = args.temperature
+   top_p = args.top_p
+   top_k = args.top_k
+   repetition_penalty = args.repetition_penalty
+   max_new_tokens = args.max_new_tokens
+   do_sample = args.do_sample
+   num_beams = args.num_beams
+   num_return_sequences = args.num_return_sequences
+   use_cache = args.use_cache
+
+   range_for_sampling=args.range_for_sampling
+   negative_number=args.negative_number
+   use_gpu_for_searching=args.use_gpu_for_searching
+
+   similarity_threshold=args.similarity_threshold
+
+   try:
+      if os.path.exists(output) == False:
+         os.mkdir(output)
+      else:
+         if os.path.exists(output+'/raw.jsonl'):
+            os.remove(output+'/raw.jsonl')
+         if os.path.exists(output+'/minedHN.jsonl'):
+            os.remove(output+'/minedHN.jsonl')
+         if os.path.exists(output+'/minedHN_split.jsonl'):
+            os.remove(output+'/minedHN_split.jsonl')
+   except:
+      pass
+
+   construct_retrieval_dataset(
+      llm_model,
+      embedding_model,
+      input,
+      output,
+      temperature,
+      top_p,
+      top_k,
+      repetition_penalty,
+      max_new_tokens,
+      do_sample,
+      num_beams,
+      num_return_sequences,
+      use_cache,
+      range_for_sampling,
+      negative_number,
+      use_gpu_for_searching,
+      similarity_threshold)
+
+if __name__ == '__main__':
+    main()
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
new file mode 100644
index 00000000000..3b95a39310c
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
@@ -0,0 +1,59 @@
+# Ragas Evaluation
+
+## 1. Introduction
+[Ragas](https://github.com/explodinggradients/ragas) is a framework that helps you evaluate your Retrieval Augmented Generation (RAG) pipelines. We provide a script to use Ragas based on data files. We use four metrics: answer relevancy, faithfulness, context recall, context precision.
+* **Answer relevancy** focuses on assessing how pertinent the generated answer is to the given prompt. A lower score is assigned to answers that are incomplete or contain redundant information and higher scores indicate better relevancy.
+* **Faithfulness** measures the factual consistency of the generated answer against the given context. It is calculated from answer and retrieved context. The answer is scaled to (0,1) range. Higher is the better.
+* **Context recall** measures the extent to which the retrieved context aligns with the annotated answer, treated as the ground truth. It is computed based on the ground truth and the retrieved context, and the values range between 0 and 1, with higher values indicating better performance.
+* **Context precision** is a metric that evaluates whether all of the ground-truth relevant items present in the contexts are ranked higher or not. Ideally all the relevant chunks must appear at the top ranks. This metric is computed using the question, ground_truth and the contexts, with values ranging between 0 and 1, where higher scores indicate better precision.
+
+## 2. Installation
+Please install dependency using the following commands.
+```
+git clone https://github.com/intel/intel-extension-for-transformers.git
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/framework
+pip install -r requirements.txt
+```
+
+## 3. Evaluate RAG
+* **OpenAI**
+By default, ragas use OpenAI’s API to compute the score. If you’re using this metric, ensure that you set the environment key OPENAI_API_KEY with your API key.
+```
+export OPENAI_API_KEY=xxx
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/framework
+python ragas_evaluation.py \
+--answer_file /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/answer.jsonl \
+--ground_truth_file /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl
+```
+* **Langchain**
+You can also try other LLMs for evaluation using Langchain.
+```
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/framework
+python ragas_evaluation.py \
+--answer_file /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/answer.jsonl \
+--ground_truth_file /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl \
+--llm_model <llm model name or path> \
+--embedding_model <embedding model name or path>
+```
+
+**Some Important Arguments**:
+- `answer_file`: The path of JSON data including question and answer, where each line is a dict like this:```{"question": str, "answer": str}```. See [answer.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/answer.jsonl) for a data file.
+- `ground_truth_file`: The path of JSON data including question, context, and ground_truth, where each line is a dict like this:```{"question": str, "context": List[str], "ground_truth": str}```. See [ground_truth.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl) for a data file. The `"question"` of `answer_file` and `ground_truth_file` should correspond one-to-one.
+- `llm_model`: If you utilize Langchain for running ragas, you should input the name or path for the LLM model.
+- `embedding_model`: If you utilize Langchain for running ragas, you should input the name or path for the text embedding model. You can use "BAAI/bge-base-en-v1.5", "BAAI/bge-large-en-v1.5", "thenlper/gte-large", "infgrad/stella-base-en-v2", "thenlper/gte-base", "intfloat/e5-large-v2", "hkunlp/instructor-xl", and "hkunlp/instructor-large".
+
+## 4. Result
+The results include your input question, answer, contexts, ground_truth, as well as output answer relevancy, faithfulness, context recall, context precision.
+```
+    question     answer   contexts ground_truth  answer_relevancy  faithfulness  context_recall  context_precision
+0  What t...  The or...  [We ai...  open s...     0.900788          0.500000           1.0             1.0
+1  What a...  The co...  [Our w...  The co...     0.985826          0.250000           1.0             0.0
+......
+```
+where your input question, answer, contexts, ground_truth in the first line are
+```
+question: What types of platforms does the organization focus on?
+answer: The organization focuses on delivering open software and hardware platforms with industry-defining standards, as well as leadership products, open and secure platforms, and resilient manufacturing.
+contexts: [We aim to deliver open software and hardware platforms with industry-defining standards.]
+ground_truth: open software and hardware platforms
+```
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/__init__.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/__init__.py
new file mode 100644
index 00000000000..18896e7b549
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/__init__.py
@@ -0,0 +1,16 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/config.yaml b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/config.yaml
new file mode 100644
index 00000000000..a97ec88bfe4
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/config.yaml
@@ -0,0 +1,36 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+ground_truth_file: /home/itrex/manxin/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl
+input_path: /home/itrex/manxin/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/data.txt
+use_openai_key: false
+vector_database: Chroma
+embedding_model: facebook/opt-125m
+llm_model: facebook/opt-125m
+reranker_model: facebook/opt-125m
+retrieval_type: [default]
+polish: [true]
+search_type: [similarity, mmr]
+k: [1]
+fetch_k: [5]
+score_threshold: [0.3]
+top_n: [1]
+enable_rerank: [true]
+max_chuck_size: [256]
+temperature: [0.01]
+top_k: [1, 3, 5]
+top_p: [0.1]
+repetition_penalty: [1.0]
+num_beams: [1]
+do_sample: [true]
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.sh b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.sh
new file mode 100644
index 00000000000..a717f23cfdf
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.sh
@@ -0,0 +1,164 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+function main {
+
+  init_params "$@"
+  run_benchmark
+
+}
+
+# init params
+function init_params {
+  retrieval_type='default'
+  search_type="similarity"
+  k=1
+  fetch_k=5
+  score_threshold=0.3
+  top_n=1
+  max_chuck_size=256
+  temperature=0.01
+  top_k=1
+  top_p=0.1
+  repetition_penalty=1.0
+  num_beams=1
+
+  for var in "$@"
+  do
+    case $var in
+     --ground_truth_file=*)
+          ground_truth_file=$(echo $var |cut -f2 -d=)
+      ;;
+      --input_path=*)
+          input_path=$(echo $var |cut -f2 -d=)
+      ;;
+      --use_openai_key=*)
+          use_openai_key=$(echo $var |cut -f2 -d=)
+      ;;
+      --vector_database=*)
+          vector_database=$(echo $var |cut -f2 -d=)
+      ;;
+      --embedding_model=*)
+          embedding_model=$(echo $var |cut -f2 -d=)
+      ;;
+      --llm_model=*)
+          llm_model=$(echo $var |cut -f2 -d=)
+      ;;
+      --reranker_model=*)
+          reranker_model=$(echo ${var} |cut -f2 -d=)
+      ;;
+      --retrieval_type=*)
+          retrieval_type=$(echo $var |cut -f2 -d=)
+      ;;
+      --polish=*)
+          polish=$(echo $var |cut -f2 -d=)
+      ;;
+      --search_type=*)
+          search_type=$(echo $var |cut -f2 -d=)
+      ;;
+      --k=*)
+          k=$(echo $var |cut -f2 -d=)
+      ;;
+      --fetch_k=*)
+          fetch_k=$(echo $var |cut -f2 -d=)
+      ;;
+      --score_threshold=*)
+          score_threshold=$(echo ${var} |cut -f2 -d=)
+      ;;
+      --top_n=*)
+          top_n=$(echo ${var} |cut -f2 -d=)
+      ;;
+      --enable_rerank=*)
+          enable_rerank=$(echo $var |cut -f2 -d=)
+      ;;
+      --max_chuck_size=*)
+          max_chuck_size=$(echo $var |cut -f2 -d=)
+      ;;
+      --temperature=*)
+          temperature=$(echo $var |cut -f2 -d=)
+      ;;
+      --top_k=*)
+          top_k=$(echo $var |cut -f2 -d=)
+      ;;
+      --top_p=*)
+          top_p=$(echo $var |cut -f2 -d=)
+      ;;
+      --repetition_penalty=*)
+          repetition_penalty=$(echo ${var} |cut -f2 -d=)
+      ;;
+      --num_beams=*)
+          num_beams=$(echo ${var} |cut -f2 -d=)
+      ;;
+      --do_sample=*)
+          do_sample=$(echo $var |cut -f2 -d=)
+      ;;
+    esac
+  done
+
+}
+
+
+
+# run_benchmark
+function run_benchmark {
+
+    if [[ ${use_openai_key} == True ]]; then
+        use_openai_key="--use_openai_key"
+    else
+        use_openai_key=""
+    fi
+    if [[ ${polish} == True ]]; then
+        polish="--polish"
+    else
+        polish=""
+    fi
+    if [[ ${enable_rerank} == True ]]; then
+        enable_rerank="--enable_rerank"
+    else
+         enable_rerank=""
+    fi
+    if [[ ${do_sample} == True ]]; then
+        do_sample="--do_sample"
+    else
+        do_sample=""
+    fi
+
+    python -u ./ragas_evaluation_benchmark.py \
+        --ground_truth_file ${ground_truth_file} \
+        --input_path ${input_path} \
+        ${use_openai_key} \
+        --vector_database ${vector_database} \
+        --embedding_model ${embedding_model} \
+        --llm_model ${llm_model} \
+        --reranker_model ${reranker_model} \
+        --retrieval_type ${retrieval_type} \
+        ${polish} \
+        --search_type ${search_type} \
+        --k ${k} \
+        --fetch_k ${fetch_k} \
+        --score_threshold ${score_threshold} \
+        --top_n ${top_n} \
+        ${enable_rerank} \
+        --max_chuck_size ${max_chuck_size} \
+        --temperature ${temperature} \
+        --top_k ${top_k} \
+        --top_p ${top_p} \
+        --repetition_penalty ${repetition_penalty} \
+        --num_beams ${num_beams} \
+        ${do_sample} 
+}
+
+main "$@"
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_config_superbenchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_config_superbenchmark.py
new file mode 100644
index 00000000000..5d7a72cfa18
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_config_superbenchmark.py
@@ -0,0 +1,176 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import subprocess
+import jsonlines
+import yaml
+
+def main():
+    if os.path.exists("result_ragas.jsonl"):
+        os.remove("result_ragas.jsonl")
+    script_path = 'ragas_benchmark.sh'
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--config_path", type=str, default="config.yaml")
+    args = parser.parse_args()
+
+    data = read_yaml_file(args.config_path)
+    data = {k: [str(item) for item in v] if isinstance(v, list) else str(v) for k, v in data.items()}
+    arg1 = data['ground_truth_file']
+    arg2 = data['input_path']
+    arg3 = data['use_openai_key']
+    arg4 = data['vector_database']
+    arg5 = data['embedding_model']
+    arg6 = data['llm_model']
+    arg7 = data['reranker_model']
+    arg8_list = data['retrieval_type']
+    arg9_list = data['polish']
+    arg10_list = data['search_type']
+    arg11_list = data['k']
+    arg12_list = data['fetch_k']
+    arg13_list = data['score_threshold']
+    arg14_list = data['top_n']
+    arg15_list = data['enable_rerank']
+    arg16_list = data['max_chuck_size']
+    arg17_list = data['temperature']
+    arg18_list = data['top_k']
+    arg19_list = data['top_p']
+    arg20_list = data['repetition_penalty']
+    arg21_list = data['num_beams']
+    arg22_list = data['do_sample']
+
+    for arg8 in arg8_list:
+        print('--'*1 +'retrieval_type',arg8)
+        for arg9 in arg9_list:
+            print('--'*2 +'polish',arg9)
+            for arg10 in arg10_list:
+                print('--'*3 +'search_type',arg10)
+                for arg11 in arg11_list:
+                    print('--'*4 +'k',arg11)
+                    for arg12 in arg12_list:
+                        print('--'*5 +'fetch_k',arg12)
+                        for arg13 in arg13_list:
+                            print('--'*6 +'score_threshold',arg13)
+                            for arg14 in arg14_list:
+                                print('--'*7 +'top_n',arg14)
+                                for arg15 in arg15_list:
+                                    print('--'*8 +'enable_rerank',arg15)
+                                    for arg16 in arg16_list:
+                                        print('--'*9 +'max_chuck_size',arg16)
+                                        for arg17 in arg17_list:
+                                            print('--'*10 +'temperature',arg17)
+                                            for arg18 in arg18_list:
+                                                print('--'*11 +'top_k',arg18)
+                                                for arg19 in arg19_list:
+                                                    print('--'*12 +'top_p',arg19)
+                                                    for arg20 in arg20_list:
+                                                        print('--'*13 +'repetition_penalty',arg20)
+                                                        for arg21 in arg21_list:
+                                                            print('--'*14 +'num_beams',arg21)
+                                                            for arg22 in arg22_list:
+                                                                print('--'*15 +'do_sample',arg22)
+                                                                subprocess.run(['bash',
+                                                                                script_path,
+                                                                                '--ground_truth_file='+arg1,
+                                                                                '--input_path='+arg2,
+                                                                                '--use_openai_key='+arg3,
+                                                                                '--vector_database='+arg4,
+                                                                                '--embedding_model='+arg5,
+                                                                                '--llm_model='+arg6,
+                                                                                '--reranker_model='+arg7,
+                                                                                '--retrieval_type='+arg8,
+                                                                                '--polish='+arg9,
+                                                                                '--search_type='+arg10,
+                                                                                '--k='+arg11,
+                                                                                '--fetch_k='+arg12,
+                                                                                '--score_threshold='+arg13,
+                                                                                '--top_n='+arg14,
+                                                                                '--enable_rerank='+arg15,
+                                                                                '--max_chuck_size='+arg16,
+                                                                                '--temperature='+arg17,
+                                                                                '--top_k='+arg18,
+                                                                                '--top_p='+arg19,
+                                                                                '--repetition_penalty='+arg20,
+                                                                                '--num_beams='+arg21,
+                                                                                '--do_sample='+arg22],
+                                                                                stdout=subprocess.DEVNULL,
+                                                                                stderr=subprocess.DEVNULL)
+
+    file_jsonl_path='result_ragas.jsonl'
+
+    answer_relevancy_average_list = []
+    faithfulness_average_list = []
+    context_recall_average_list = []
+    context_precision_average_list = []
+
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            answer_relevancy_average=stu["answer_relevancy_average"]
+            faithfulness_average=stu["faithfulness_average"]
+            context_recall_average=stu["context_recall_average"]
+            context_precision_average=stu["context_precision_average"]
+
+            answer_relevancy_average_list.append(answer_relevancy_average)
+            faithfulness_average_list.append(faithfulness_average)
+            context_recall_average_list.append(context_recall_average)
+            context_precision_average_list.append(context_precision_average)
+
+    answer_relevancy_average_line_number_list = [i for i, v in enumerate(answer_relevancy_average_list) \
+                                                 if v == max(answer_relevancy_average_list)]
+    faithfulness_average_line_number_list = [i for i, v in enumerate(faithfulness_average_list) \
+                                             if v == max(faithfulness_average_list)]
+    context_recall_average_line_number_list = [i for i, v in enumerate(context_recall_average_list) \
+                                               if v == max(context_recall_average_list)]
+    context_precision_average_line_number_list = [i for i, v in enumerate(context_precision_average_list) \
+                                                  if v == max(context_precision_average_list)]
+
+    line=0
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            if line in answer_relevancy_average_line_number_list:
+                print('max_answer_relevancy_average',stu)
+            line+=1
+
+    line=0
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            if line in faithfulness_average_line_number_list:
+                print('max_faithfulness_average',stu)
+            line+=1
+
+    line=0
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            if line in context_recall_average_line_number_list:
+                print('max_context_recall_average',stu)
+            line+=1
+
+    line=0
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            if line in context_precision_average_line_number_list:
+                print('max_context_precision_average',stu)
+            line+=1
+
+def read_yaml_file(file_path):
+    with open(file_path, 'r') as stream:
+        try:
+            return yaml.safe_load(stream)
+        except yaml.YAMLError as exc:
+            print(exc)
+
+if __name__ == '__main__':
+    main()
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py
new file mode 100644
index 00000000000..b4bccae2c14
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py
@@ -0,0 +1,116 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from datasets import Dataset
+import os
+from ragas import evaluate   # pylint: disable=E0401
+from ragas.metrics import (    # pylint: disable=E0401
+    answer_relevancy,
+    faithfulness,
+    context_recall,
+    context_precision,
+)
+from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
+from intel_extension_for_transformers.langchain_community.embeddings import HuggingFaceEmbeddings, \
+    HuggingFaceInstructEmbeddings, HuggingFaceBgeEmbeddings  # pylint: disable=E0401, E0611
+from langchain_community.embeddings import GooglePalmEmbeddings
+from ragas.llms import LangchainLLMWrapper   # pylint: disable=E0611
+from ragas.embeddings import LangchainEmbeddingsWrapper   # pylint: disable=E0611
+import pandas as pd
+import jsonlines
+import argparse
+
+
+pd.set_option("display.max_rows", None)
+pd.set_option("display.max_columns", None)
+pd.set_option("display.width", None)
+pd.set_option("display.max_colwidth", 10)
+
+def load_set(file_jsonl_path, item):
+    list = []
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            passages=stu[item]
+            list.append(passages)
+    return list
+
+def ragas(answer_file, ground_truth_file, llm_model, embedding_model):
+
+    question_list=load_set(answer_file, "question")
+    answer_list=load_set(answer_file, "answer")
+    contexts_list=load_set(ground_truth_file, "context")
+    ground_truth_list=load_set(ground_truth_file, "ground_truth")
+
+    data_samples = {
+        'question': question_list,
+        'answer': answer_list,
+        'contexts' : contexts_list,
+        'ground_truth': ground_truth_list
+    }
+
+    dataset = Dataset.from_dict(data_samples)
+
+    if llm_model and embedding_model:
+        langchain_llm = HuggingFacePipeline.from_model_id(
+            model_id=llm_model,
+            task="text-generation",
+            pipeline_kwargs={"max_new_tokens": 128},
+        )
+        if "instruct" in embedding_model:
+            langchain_embeddings = HuggingFaceInstructEmbeddings(model_name=embedding_model)
+        elif "bge" in embedding_model:
+            langchain_embeddings = HuggingFaceBgeEmbeddings(
+                model_name=embedding_model,
+                encode_kwargs={'normalize_embeddings': True},
+                query_instruction="Represent this sentence for searching relevant passages:")
+        elif "Google" == embedding_model:
+            langchain_embeddings = GooglePalmEmbeddings()
+        else:
+            langchain_embeddings = HuggingFaceEmbeddings(
+                model_name=embedding_model,
+                encode_kwargs={"normalize_embeddings": True},
+            )
+
+        langchain_llm = LangchainLLMWrapper(langchain_llm)
+        langchain_embedding = LangchainEmbeddingsWrapper(langchain_embeddings)
+        score = evaluate(dataset,    # pylint: disable=E1123
+                         metrics=[answer_relevancy, faithfulness, context_recall, context_precision],
+                         llm = langchain_llm,    # pylint: disable=E1123
+                         embeddings=langchain_embedding)    # pylint: disable=E1123
+    else:
+        os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
+        score = evaluate(dataset,metrics=[answer_relevancy, faithfulness, context_recall, context_precision])
+
+    df=score.to_pandas()
+    print(df)
+    return df
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--answer_file", type=str)
+    parser.add_argument("--ground_truth_file", type=str)
+    parser.add_argument("--llm_model", type=str)
+    parser.add_argument("--embedding_model", type=str)
+    args = parser.parse_args()
+
+    answer_file = args.answer_file
+    ground_truth_file = args.ground_truth_file
+    llm_model = args.llm_model
+    embedding_model = args.embedding_model
+
+    metrics=ragas(answer_file, ground_truth_file, llm_model, embedding_model)
+    return metrics
+
+if __name__ == '__main__':
+    main()
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py
new file mode 100644
index 00000000000..7493ec06032
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py
@@ -0,0 +1,299 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from datasets import Dataset
+import os, shutil
+from ragas import evaluate   # pylint: disable=E0401
+from ragas.metrics import (    # pylint: disable=E0401
+    answer_relevancy,
+    faithfulness,
+    context_recall,
+    context_precision,
+)
+from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
+from intel_extension_for_transformers.langchain_community.embeddings import HuggingFaceEmbeddings, \
+    HuggingFaceInstructEmbeddings, HuggingFaceBgeEmbeddings  # pylint: disable=E0401, E0611
+from langchain_community.embeddings import GooglePalmEmbeddings
+from ragas.llms import LangchainLLMWrapper   # pylint: disable=E0611
+from ragas.embeddings import LangchainEmbeddingsWrapper   # pylint: disable=E0611
+import pandas as pd
+import jsonlines
+import argparse
+from intel_extension_for_transformers.neural_chat import PipelineConfig
+from intel_extension_for_transformers.neural_chat import build_chatbot
+from intel_extension_for_transformers.neural_chat import plugins
+from intel_extension_for_transformers.neural_chat.config import GenerationConfig
+
+pd.set_option("display.max_rows", None)
+pd.set_option("display.max_columns", None)
+pd.set_option("display.width", None)
+pd.set_option("display.max_colwidth", 10)
+
+def load_set(file_jsonl_path, item):
+    list = []
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            passages=stu[item]
+            list.append(passages)
+    return list
+
+def ragas(answer_file, ground_truth_file, llm_model, embedding_model, use_openai_key):
+
+    question_list=load_set(answer_file, "question")
+    answer_list=load_set(answer_file, "answer")
+    contexts_list=load_set(ground_truth_file, "context")
+    ground_truth_list=load_set(ground_truth_file, "ground_truth")
+
+    data_samples = {
+        'question': question_list,
+        'answer': answer_list,
+        'contexts' : contexts_list,
+        'ground_truth': ground_truth_list
+    }
+
+    dataset = Dataset.from_dict(data_samples)
+
+    if use_openai_key:
+        os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
+        score = evaluate(dataset,metrics=[answer_relevancy, faithfulness, context_recall, context_precision])
+    else:
+        langchain_llm = HuggingFacePipeline.from_model_id(
+            model_id=llm_model,
+            task="text-generation",
+            pipeline_kwargs={"max_new_tokens": 128},
+        )
+        if "instruct" in embedding_model:
+            langchain_embeddings = HuggingFaceInstructEmbeddings(model_name=embedding_model)
+        elif "bge" in embedding_model:
+            langchain_embeddings = HuggingFaceBgeEmbeddings(
+                model_name=embedding_model,
+                encode_kwargs={'normalize_embeddings': True},
+                query_instruction="Represent this sentence for searching relevant passages:")
+        elif "Google" == embedding_model:
+            langchain_embeddings = GooglePalmEmbeddings()
+        else:
+            langchain_embeddings = HuggingFaceEmbeddings(
+                model_name=embedding_model,
+                encode_kwargs={"normalize_embeddings": True},
+            )
+
+        langchain_llm = LangchainLLMWrapper(langchain_llm)
+        langchain_embedding = LangchainEmbeddingsWrapper(langchain_embeddings)
+        score = evaluate(dataset,    # pylint: disable=E1123
+                         metrics=[answer_relevancy, faithfulness, context_recall, context_precision],
+                         llm = langchain_llm,    # pylint: disable=E1123
+                         embeddings=langchain_embedding)    # pylint: disable=E1123
+
+    df=score.to_pandas()
+    answer_relevancy_average=df['answer_relevancy'][:].mean()
+    faithfulness_average=df['faithfulness'][:].mean()
+    context_recall_average=df['context_recall'][:].mean()
+    context_precision_average=df['context_precision'][:].mean()
+    return answer_relevancy_average, faithfulness_average, context_recall_average, context_precision_average
+
+
+def rag(text, chatbot, generation_config):
+    response = chatbot.predict(text, config=generation_config)
+    return response
+
+def result_data(ground_truth_file,
+                input_path,
+                vector_database="Chroma",
+                embedding_model="BAAI/bge-large-en-v1.5",
+                retrieval_type='default',
+                max_chuck_size=256,
+                search_type="similarity",
+                k=1,
+                fetch_k=5,
+                score_threshold=0.3,
+                polish=False,
+                top_n=1,
+                enable_rerank=False,
+                reranker_model="BAAI/bge-reranker-large",
+                llm_model='intel/neural-chat-7b-v3-1',
+                temperature=0.01,
+                top_k=1,
+                top_p=0.1,
+                repetition_penalty=1.0,
+                num_beams=1,
+                do_sample=True
+                ):
+    question_list = load_set(ground_truth_file, "question")
+
+    result_answer_path='result_answer.jsonl'
+    if os.path.exists("result_answer.jsonl"):
+        os.remove("result_answer.jsonl")
+
+    if os.path.exists("output"):
+        shutil.rmtree("output", ignore_errors=True)
+
+    plugins.retrieval.enable=True
+    plugins.retrieval.args["input_path"]=input_path
+    plugins.retrieval.args["vector_database"]=vector_database
+    plugins.retrieval.args["embedding_model"]=embedding_model
+    plugins.retrieval.args["retrieval_type"]=retrieval_type
+    plugins.retrieval.args["max_chuck_size"]=max_chuck_size
+    plugins.retrieval.args["search_type"]=search_type
+    if search_type=="similarity":
+        plugins.retrieval.args["search_kwargs"]={"k":k}
+    elif search_type=="mmr":
+        plugins.retrieval.args["search_kwargs"]={"k":k, "fetch_k":fetch_k}
+    elif search_type=="similarity_score_threshold":
+        plugins.retrieval.args["search_kwargs"]={"k":k, "score_threshold":score_threshold}
+    plugins.retrieval.args["polish"]=polish
+    plugins.retrieval.args["top_n"]=top_n
+    plugins.retrieval.args["enable_rerank"]=enable_rerank
+    plugins.retrieval.args["reranker_model"]=reranker_model
+    config = PipelineConfig(plugins=plugins, model_name_or_path=llm_model, device="cuda")
+    chatbot = build_chatbot(config)
+    generation_config=GenerationConfig(temperature=temperature,
+                                                    top_k=top_k,
+                                                    top_p=top_p,
+                                                    repetition_penalty=repetition_penalty,
+                                                    num_beams=num_beams,
+                                                    do_sample=do_sample)
+
+    for question in question_list:
+        response = rag(question, chatbot, generation_config)
+        data = {
+                "question": question,
+                "answer": response,
+            }
+        with jsonlines.open(result_answer_path,"a") as file_json:
+                file_json.write(data)
+
+def main():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--ground_truth_file", type=str)
+    parser.add_argument("--input_path", type=str)
+    parser.add_argument("--use_openai_key", default=False, action='store_true')
+
+    parser.add_argument("--vector_database", type=str, default="Chroma")
+    parser.add_argument("--embedding_model", type=str, default="BAAI/bge-large-en-v1.5")
+    parser.add_argument("--llm_model", type=str)
+    parser.add_argument("--reranker_model", type=str, default="BAAI/bge-reranker-large")
+
+    parser.add_argument("--retrieval_type", type=str, default='default')
+    parser.add_argument("--polish", default=False, action='store_true')
+    parser.add_argument("--search_type", type=str, default="similarity")
+    parser.add_argument("--k", type=int, default=1)
+    parser.add_argument("--fetch_k", type=int, default=5)
+    parser.add_argument("--score_threshold", type=float, default=0.3)
+    parser.add_argument("--top_n", type=int, default=1)
+    parser.add_argument("--enable_rerank", default=False, action='store_true')
+
+    parser.add_argument("--max_chuck_size", type=int, default=256)
+    parser.add_argument("--temperature", type=float, default=0.01)
+    parser.add_argument("--top_k", type=int, default=1)
+    parser.add_argument("--top_p", type=float, default=0.1)
+    parser.add_argument("--repetition_penalty", type=float, default=1.0)
+    parser.add_argument("--num_beams", type=int, default=1)
+    parser.add_argument("--do_sample", default=False, action='store_true')
+
+    args = parser.parse_args()
+
+    ground_truth_file = args.ground_truth_file
+    input_path = args.input_path
+    use_openai_key = args.use_openai_key
+
+    vector_database = args.vector_database
+    embedding_model = args.embedding_model
+    retrieval_type = args.retrieval_type
+    polish = args.polish
+    search_type = args.search_type
+    llm_model = args.llm_model
+    k = args.k
+    fetch_k = args.fetch_k
+    score_threshold = args.score_threshold
+    reranker_model = args.reranker_model
+    top_n = args.top_n
+    enable_rerank = args.enable_rerank
+
+    max_chuck_size = args.max_chuck_size
+    temperature = args.temperature
+    top_k = args.top_k
+    top_p = args.top_p
+    repetition_penalty = args.repetition_penalty
+    num_beams = args.num_beams
+    do_sample = args.do_sample
+
+
+    result_data(ground_truth_file,
+                input_path,
+                vector_database=vector_database,
+                embedding_model=embedding_model,
+                retrieval_type=retrieval_type,
+                max_chuck_size=max_chuck_size,
+                search_type=search_type,
+                k=k,
+                fetch_k=fetch_k,
+                score_threshold=score_threshold,
+                polish=polish,
+                top_n=top_n,
+                enable_rerank=enable_rerank,
+                reranker_model=reranker_model,
+                llm_model=llm_model,
+                temperature=temperature,
+                top_k=top_k,
+                top_p=top_p,
+                repetition_penalty=repetition_penalty,
+                num_beams=num_beams,
+                do_sample=do_sample)
+
+    answer_file = 'result_answer.jsonl'
+    answer_relevancy_average,faithfulness_average,context_recall_average,context_precision_average=ragas(
+        answer_file,
+        ground_truth_file,
+        llm_model,
+        embedding_model,
+        use_openai_key)
+
+    file_json_path='result_ragas.jsonl'
+
+    if answer_relevancy_average and faithfulness_average and context_recall_average and context_precision_average:
+        data = {
+                "ground_truth_file": args.ground_truth_file,
+                "input_path": args.input_path,
+                "use_openai_key": args.use_openai_key,
+                "vector_database": args.vector_database,
+                "embedding_model": args.embedding_model,
+                "retrieval_type": args.retrieval_type,
+                "polish": args.polish,
+                "search_type": args.search_type,
+                "llm_model": args.llm_model,
+                "k": args.k,
+                "fetch_k": args.fetch_k,
+                "score_threshold": args.score_threshold,
+                "reranker_model": args.reranker_model,
+                "top_n": args.top_n,
+                "enable_rerank": args.enable_rerank,
+                "max_chuck_size": args.max_chuck_size,
+                "temperature": args.temperature,
+                "top_k": args.top_k,
+                "top_p": args.top_p,
+                "repetition_penalty": args.repetition_penalty,
+                "num_beams": args.num_beams,
+                "do_sample": args.do_sample,
+                "answer_relevancy_average": answer_relevancy_average,
+                "faithfulness_average": faithfulness_average,
+                "context_recall_average": context_recall_average,
+                "context_precision_average": context_precision_average,
+            }
+        print(data)
+        with jsonlines.open(file_json_path,"a") as file_json:
+                file_json.write(data)
+
+if __name__ == '__main__':
+    main()
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_superbenchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_superbenchmark.py
new file mode 100644
index 00000000000..999e712871d
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_superbenchmark.py
@@ -0,0 +1,175 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import subprocess
+import jsonlines
+
+def main():
+    if os.path.exists("result_ragas.jsonl"):
+        os.remove("result_ragas.jsonl")
+    script_path = 'ragas_benchmark.sh'
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--ground_truth_file", type=str)
+    parser.add_argument("--input_path", type=str)
+    parser.add_argument("--use_openai_key", default=False, action='store_true')
+    parser.add_argument("--vector_database", type=str, default="Chroma")
+    parser.add_argument("--embedding_model", type=str, default="BAAI/bge-large-en-v1.5")
+    parser.add_argument("--llm_model", type=str)
+    parser.add_argument("--reranker_model", type=str, default="BAAI/bge-reranker-large")
+
+    args = parser.parse_args()
+
+    arg1 = args.ground_truth_file
+    arg2 = args.input_path
+    arg3 = str(args.use_openai_key)
+    arg4 = args.vector_database
+    arg5 = args.embedding_model
+    arg6 = args.llm_model
+    arg7 = args.reranker_model
+
+    arg8_list = ['default','child_parent','bm25']
+    arg9_list = ['True','False']
+    arg10_list = ['similarity','mmr','similarity_score_threshold']
+    arg11_list = ['1', '3', '5']
+    arg12_list = ['5', '10', '20']
+    arg13_list = ['0.3','0.5','0.7']
+    arg14_list = ['1','3', '5','10']
+    arg15_list = ['True','False']
+    arg16_list = ['256','512', '768','1024']
+    arg17_list = ['0.01','0.05', '0.1','0.3','0.5','0.7']
+    arg18_list = ['1','3', '10','20']
+    arg19_list = ['0.1','0.3', '0.5','0.7']
+    arg20_list = ['1.0','1.1', '1.3','1.5','1.7']
+    arg21_list = ['1','3', '10','20']
+    arg22_list = ['True','False']
+
+    for arg8 in arg8_list:
+        print('--'*1 +'retrieval_type',arg8)
+        for arg9 in arg9_list:
+            print('--'*2 +'polish',arg9)
+            for arg10 in arg10_list:
+                print('--'*3 +'search_type',arg10)
+                for arg11 in arg11_list:
+                    print('--'*4 +'k',arg11)
+                    for arg12 in arg12_list:
+                        print('--'*5 +'fetch_k',arg12)
+                        for arg13 in arg13_list:
+                            print('--'*6 +'score_threshold',arg13)
+                            for arg14 in arg14_list:
+                                print('--'*7 +'top_n',arg14)
+                                for arg15 in arg15_list:
+                                    print('--'*8 +'enable_rerank',arg15)
+                                    for arg16 in arg16_list:
+                                        print('--'*9 +'max_chuck_size',arg16)
+                                        for arg17 in arg17_list:
+                                            print('--'*10 +'temperature',arg17)
+                                            for arg18 in arg18_list:
+                                                print('--'*11 +'top_k',arg18)
+                                                for arg19 in arg19_list:
+                                                    print('--'*12 +'top_p',arg19)
+                                                    for arg20 in arg20_list:
+                                                        print('--'*13 +'repetition_penalty',arg20)
+                                                        for arg21 in arg21_list:
+                                                            print('--'*14 +'num_beams',arg21)
+                                                            for arg22 in arg22_list:
+                                                                print('--'*15 +'do_sample',arg22)
+                                                                subprocess.run(['bash',
+                                                                                script_path,
+                                                                                '--ground_truth_file='+arg1,
+                                                                                '--input_path='+arg2,
+                                                                                '--use_openai_key='+arg3,
+                                                                                '--vector_database='+arg4,
+                                                                                '--embedding_model='+arg5,
+                                                                                '--llm_model='+arg6,
+                                                                                '--reranker_model='+arg7,
+                                                                                '--retrieval_type='+arg8,
+                                                                                '--polish='+arg9,
+                                                                                '--search_type='+arg10,
+                                                                                '--k='+arg11,
+                                                                                '--fetch_k='+arg12,
+                                                                                '--score_threshold='+arg13,
+                                                                                '--top_n='+arg14,
+                                                                                '--enable_rerank='+arg15,
+                                                                                '--max_chuck_size='+arg16,
+                                                                                '--temperature='+arg17,
+                                                                                '--top_k='+arg18,
+                                                                                '--top_p='+arg19,
+                                                                                '--repetition_penalty='+arg20,
+                                                                                '--num_beams='+arg21,
+                                                                                '--do_sample='+arg22],
+                                                                                stdout=subprocess.DEVNULL,
+                                                                                stderr=subprocess.DEVNULL)
+
+    file_jsonl_path='result_ragas.jsonl'
+
+    answer_relevancy_average_list = []
+    faithfulness_average_list = []
+    context_recall_average_list = []
+    context_precision_average_list = []
+
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            answer_relevancy_average=stu["answer_relevancy_average"]
+            faithfulness_average=stu["faithfulness_average"]
+            context_recall_average=stu["context_recall_average"]
+            context_precision_average=stu["context_precision_average"]
+
+            answer_relevancy_average_list.append(answer_relevancy_average)
+            faithfulness_average_list.append(faithfulness_average)
+            context_recall_average_list.append(context_recall_average)
+            context_precision_average_list.append(context_precision_average)
+
+    answer_relevancy_average_line_number_list = [i for i, v in enumerate(answer_relevancy_average_list) \
+                                                 if v == max(answer_relevancy_average_list)]
+    faithfulness_average_line_number_list = [i for i, v in enumerate(faithfulness_average_list) \
+                                             if v == max(faithfulness_average_list)]
+    context_recall_average_line_number_list = [i for i, v in enumerate(context_recall_average_list) \
+                                               if v == max(context_recall_average_list)]
+    context_precision_average_line_number_list = [i for i, v in enumerate(context_precision_average_list) \
+                                                  if v == max(context_precision_average_list)]
+
+    line=0
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            if line in answer_relevancy_average_line_number_list:
+                print('max_answer_relevancy_average',stu)
+            line+=1
+
+    line=0
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            if line in faithfulness_average_line_number_list:
+                print('max_faithfulness_average',stu)
+            line+=1
+
+    line=0
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            if line in context_recall_average_line_number_list:
+                print('max_context_recall_average',stu)
+            line+=1
+
+    line=0
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            if line in context_precision_average_line_number_list:
+                print('max_context_precision_average',stu)
+            line+=1
+
+
+if __name__ == '__main__':
+    main()
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/requirements.txt b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/requirements.txt
new file mode 100644
index 00000000000..e8bf3fd1350
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/requirements.txt
@@ -0,0 +1,6 @@
+InstructorEmbedding
+intel-extension-for-transformers
+jsonlines
+pyyaml
+ragas
+sentence-transformers==2.3.1
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/README.md
new file mode 100644
index 00000000000..6280ffe76a3
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/README.md
@@ -0,0 +1,39 @@
+# Retrieval Evaluation
+
+## 1. Introduction
+We provide a script to evaluate the performance of the retrieval. We use two metrics: MRR (Mean reciprocal rank) and Hit (Hit Ratio). 
+* **MRR** is an internationally accepted mechanism for evaluating search algorithms. MRR emphasizes the position of ground truth in the retrieval list, the higher it is, the better. 
+* **Hit** emphasizes the accuracy of retrieval, that is, whether the ground truth is included in the retrieval items. The higher, the better. 
+
+## 2. Installation
+Please ensure the installation of requirements for NeuralChat and retrieval plugin by the following commands.
+```
+git clone https://github.com/intel/intel-extension-for-transformers.git
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat
+pip install -r requirements.txt
+cd pipeline/plugins/retrieval
+pip install -r requirements.txt
+```
+
+## 3. Evaluate Retrieval
+You can evaluate the retrieval performance by the following commands.
+```
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever
+python evaluate_retrieval.py \
+--index_file_jsonl_path /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/candidate_context.jsonl \
+--query_file_jsonl_path /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl
+```
+
+**Some Important Arguments**:
+- `index_file_jsonl_path`: path of JSON data including candidate context where each line is a dict like this:```{"context": List[str]}```. See [candidate_context.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/candidate_context.jsonl) for a data file.
+- `query_file_jsonl_path`: path of JSON data including queries and positives where each line is a dict like this:```{"query": str, "pos": List[str]}```. See [example.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl) for a data file.
+- `vector_database`: The vector database for constructing the knowledge base. The default value is "Chroma". The other option is "Qdrant".
+- `embedding_model`: The name or path for the text embedding model. The default value is "BAAI/bge-base-en-v1.5". Other options are "BAAI/bge-large-en-v1.5", "thenlper/gte-large", "infgrad/stella-base-en-v2", "thenlper/gte-base", "intfloat/e5-large-v2", "hkunlp/instructor-xl", and "hkunlp/instructor-large".
+- `retrieval_type`: The type of the retriever. The default value is "default". The other options are "child_parent" and "bm25".
+- `search_type`: Type of search to perform. The default value is "similarity". The other options are "mmr" and "similarity_score_threshold".
+
+## 4. Result
+The results include Top 1 and Top 5 of MRR and HR respectively.
+```
+{'MRR@1': 0.7, 'MRR@5': 0.72, 'Hit@1': 0.7, 'Hit@5': 0.8}
+```
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/__init__.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/__init__.py
new file mode 100644
index 00000000000..18896e7b549
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/__init__.py
@@ -0,0 +1,16 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/config.yaml b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/config.yaml
new file mode 100644
index 00000000000..7b293c3e442
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/config.yaml
@@ -0,0 +1,28 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+index_file_jsonl_path: /home/itrex/manxin/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/candidate_context.jsonl
+query_file_jsonl_path: /home/itrex/manxin/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl
+vector_database: Chroma
+embedding_model: facebook/opt-125m
+llm_model: facebook/opt-125m
+reranker_model: facebook/opt-125m
+retrieval_type: [default]
+polish: [true]
+search_type: [similarity, mmr]
+k: [1]
+fetch_k: [5]
+score_threshold: [0.3]
+top_n: [1]
+enable_rerank: [true]
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval.py
new file mode 100644
index 00000000000..010add7914d
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval.py
@@ -0,0 +1,284 @@
+# !/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import ClassVar, Collection
+from intel_extension_for_transformers.langchain_community.embeddings import HuggingFaceEmbeddings, \
+    HuggingFaceInstructEmbeddings, HuggingFaceBgeEmbeddings  # pylint: disable=E0401, E0611
+from langchain_community.embeddings import GooglePalmEmbeddings
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from intel_extension_for_transformers.langchain_community.vectorstores import Chroma, Qdrant  # pylint: disable=E0401, E0611
+import uuid
+from langchain_core.documents import Document
+from intel_extension_for_transformers.langchain_community.retrievers import ChildParentRetriever  # pylint: disable=E0401, E0611
+from langchain_core.vectorstores import VectorStoreRetriever
+from langchain_community.retrievers import BM25Retriever
+import jsonlines
+import numpy as np
+import logging
+import argparse
+
+logging.basicConfig(
+    format="%(asctime)s %(name)s:%(levelname)s:%(message)s",
+    datefmt="%d-%M-%Y %H:%M:%S",
+    level=logging.INFO
+)
+
+def document_transfer(data_collection):
+    "Transfer the raw document into langchain supported format."
+    documents = []
+    for data, meta in data_collection:
+        doc_id = str(uuid.uuid4())
+        metadata = {"source": meta, "identify_id":doc_id}
+        doc = Document(page_content=data, metadata=metadata)
+        documents.append(doc)
+    return documents
+
+def document_append_id(documents):
+    for _doc in documents:
+        _doc.metadata["doc_id"] = _doc.metadata["identify_id"]
+    return documents
+
+def index_library(index_file_jsonl_path):
+    index_list = []
+    with open(index_file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            passages=[stu["context"][0],index_file_jsonl_path]
+            index_list.append(passages)
+    return index_list
+
+def query_set(query_file_jsonl_path):
+    query_list = []
+    with open(query_file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            passages=stu["query"]
+            query_list.append(passages)
+    return query_list
+
+def load_list(file_jsonl_path, item):
+    with open(file_jsonl_path) as file:
+        data = []
+        for stu in jsonlines.Reader(file):
+            content = ",".join(stu[item])
+            data.append(content)
+    return data
+
+def evaluate(preds, labels, cutoffs=[1,5]):
+    """Evaluate MRR and Hit at cutoffs."""
+    metrics = {}
+
+    # MRR
+    mrrs = np.zeros(len(cutoffs))
+    for pred, label in zip(preds, labels):
+        jump = False
+        for i, x in enumerate(pred, 1):
+            if x in label:
+                for k, cutoff in enumerate(cutoffs):
+                    if i <= cutoff:
+                        mrrs[k] += 1 / i
+                jump = True
+            if jump:
+                break
+    mrrs /= len(preds)
+    for i, cutoff in enumerate(cutoffs):
+        mrr = mrrs[i]
+        metrics[f"MRR@{cutoff}"] = mrr
+
+    # Hit
+    hit_rate_list=[]
+    for cutoff in cutoffs:
+        hit_num = 0
+        for pred, label in zip(preds, labels):
+            hit_list=np.intersect1d(label, pred[:cutoff])
+            hit_num = hit_num+len(hit_list)
+        hit_rate = hit_num/len(labels)
+        hit_rate_list.append(hit_rate)
+    for i, cutoff in enumerate(cutoffs):
+        hit_rate = hit_rate_list[i]
+        metrics[f"Hit@{cutoff}"] = hit_rate
+
+    return metrics
+
+class Retrieval():
+    def __init__(self,
+                 vector_database="Chroma",
+                 embedding_model="BAAI/bge-base-en-v1.5",
+                 input_path = None,
+                 retrieval_type = 'default',
+                 append=True,
+                 **kwargs):
+
+        self.vector_database = vector_database
+        self.input_path = None
+        self.retrieval_type = retrieval_type
+        self.retriever = None
+        self.splitter = RecursiveCharacterTextSplitter(chunk_size= kwargs['child_size'] \
+                    if 'child_size' in kwargs else 512)
+        allowed_retrieval_type: ClassVar[Collection[str]] = (
+            "default",
+            "child_parent",
+            'bm25',
+        )
+
+        assert self.retrieval_type in allowed_retrieval_type, "search_type of {} not allowed.".format(   \
+            self.retrieval_type)
+
+        self.input_path = input_path
+        assert self.input_path != None, "Should gave an input path!"
+
+        try:
+            if "instruct" in embedding_model:
+                self.embeddings = HuggingFaceInstructEmbeddings(model_name=embedding_model)
+            elif "bge" in embedding_model:
+                self.embeddings = HuggingFaceBgeEmbeddings(
+                    model_name=embedding_model,
+                    encode_kwargs={'normalize_embeddings': True},
+                    query_instruction="Represent this sentence for searching relevant passages:")
+            elif "Google" == embedding_model:
+                self.embeddings = GooglePalmEmbeddings()
+            else:
+                self.embeddings = HuggingFaceEmbeddings(
+                    model_name=embedding_model,
+                    encode_kwargs={"normalize_embeddings": True},
+                )
+        except Exception as e:
+            logging.error("Please select a proper embedding model.")
+            logging.error(e)
+
+        data_collection = index_library(self.input_path)
+        logging.info("The parsing for the uploaded files is finished.")
+
+        langchain_documents = document_transfer(data_collection)
+        logging.info("The format of parsed documents is transferred.")
+
+        if kwargs['search_type']=="similarity":
+            kwargs['search_kwargs']={"k":5}
+        elif kwargs['search_type']=="mmr":
+            kwargs['search_kwargs']={"k":5}
+        elif kwargs['search_type']=="similarity_score_threshold":
+            kwargs['search_kwargs']={"k":5, "score_threshold":0.6}
+
+        if self.vector_database == "Chroma":
+            self.database = Chroma
+        elif self.vector_database == "Qdrant":
+            self.database = Qdrant
+        if self.retrieval_type == 'default':  # Using vector store retriever
+            if append:
+                knowledge_base = self.database.from_documents(documents=langchain_documents, embedding=self.embeddings,
+                                                              **kwargs)
+            else:
+                knowledge_base = self.database.build(documents=langchain_documents, embedding=self.embeddings, **kwargs)
+            self.retriever = RetrieverAdapter(retrieval_type=self.retrieval_type, document_store=knowledge_base, \
+                                              **kwargs)
+            if self.vector_database == "Qdrant" and knowledge_base.is_local():
+               # one local storage folder cannot be accessed by multiple instances of Qdrant client simultaneously.
+               knowledge_base.client.close()
+        elif self.retrieval_type == "child_parent":    # Using child-parent store retriever
+            child_documents = self.splitter.split_documents(langchain_documents)
+            langchain_documents = document_append_id(langchain_documents)
+            if append:
+                knowledge_base = self.database.from_documents(documents=langchain_documents, embedding=self.embeddings,
+                                                              **kwargs)
+                child_knowledge_base = self.database.from_documents(documents=child_documents, sign='child', \
+                                                                    embedding=self.embeddings, **kwargs)
+            else:
+                knowledge_base = self.database.build(documents=langchain_documents, embedding=self.embeddings, **kwargs)
+                child_knowledge_base = self.database.build(documents=langchain_documents, embedding=self.embeddings, \
+                                            sign='child', **kwargs)
+            self.retriever = RetrieverAdapter(retrieval_type=self.retrieval_type, document_store=knowledge_base, \
+                               child_document_store=child_knowledge_base, **kwargs)
+            if self.vector_database == "Qdrant" :
+                # one local storage folder cannot be accessed by multiple instances of Qdrant client simultaneously.
+                if knowledge_base.is_local():
+                    knowledge_base.client.close()
+                if child_knowledge_base.is_local():
+                    child_knowledge_base.client.close()
+        elif self.retrieval_type == "bm25":
+            self.docs = document_append_id(langchain_documents)
+            self.retriever = RetrieverAdapter(retrieval_type=self.retrieval_type, docs=self.docs, **kwargs)
+        logging.info("The retriever is successfully built.")
+
+    def pre_llm_inference_actions(self, query):
+        assert self.retriever is not None, logging.info("Please check the status of retriever")
+        context = self.retriever.get_context(query)
+        return context
+
+
+class RetrieverAdapter():
+    def __init__(self, retrieval_type='default', document_store=None, child_document_store=None, docs=None,  \
+                 reranker_model="BAAI/bge-reranker-large", top_n = 1, enable_rerank = False, **kwargs):
+        self.retrieval_type = retrieval_type
+        if enable_rerank:
+            from intel_extension_for_transformers.langchain_community.retrievers.bge_reranker import BgeReranker  # pylint: disable=E0401, E0611
+            from FlagEmbedding import FlagReranker
+            reranker = FlagReranker(reranker_model)
+            self.reranker = BgeReranker(model = reranker, top_n=top_n)
+        else:
+            self.reranker = None
+
+        if self.retrieval_type == "default":
+            self.retriever = VectorStoreRetriever(vectorstore=document_store, **kwargs)
+        elif self.retrieval_type == "bm25":
+            self.retriever = BM25Retriever.from_documents(docs, **kwargs)
+        elif self.retrieval_type == "child_parent":
+            self.retriever = ChildParentRetriever(parentstore=document_store, \
+                                                  vectorstore=child_document_store,
+                                                  **kwargs)  # pylint: disable=abstract-class-instantiated
+        else:
+            logging.error('The chosen retrieval type remains outside the supported scope.')
+
+    def get_context(self, query):
+        context = []
+        retrieved_documents = self.retriever.get_relevant_documents(query)
+        if self.reranker is not None:
+            retrieved_documents = self.reranker.compress_documents(documents = retrieved_documents, query = query)
+        for doc in retrieved_documents:
+            context.append(doc.page_content)
+        return context
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--index_file_jsonl_path", type=str)
+    parser.add_argument("--query_file_jsonl_path", type=str)
+    parser.add_argument("--vector_database", type=str, default="Chroma")
+    parser.add_argument("--embedding_model", type=str, default="BAAI/bge-base-en-v1.5")
+    parser.add_argument("--retrieval_type", type=str, default='default')
+    parser.add_argument("--search_type", type=str, default="similarity")
+    args = parser.parse_args()
+
+    index_file_jsonl_path = args.index_file_jsonl_path
+    query_file_jsonl_path = args.query_file_jsonl_path
+    vector_database = args.vector_database
+    embedding_model = args.embedding_model
+    retrieval_type = args.retrieval_type
+    search_type = args.search_type
+
+    query_list = query_set(query_file_jsonl_path)
+    retrieval_results=[]
+    for query in query_list:
+        context=Retrieval(input_path=index_file_jsonl_path,
+                         vector_database=vector_database,
+                         embedding_model=embedding_model,
+                         retrieval_type = retrieval_type,
+                         search_type=search_type).pre_llm_inference_actions(query=query)
+        retrieval_results.append(context)
+    ground_truths=load_list(query_file_jsonl_path, "pos")
+    metrics = evaluate(retrieval_results, ground_truths)
+    print(metrics)
+    return metrics
+
+if __name__ == '__main__':
+    main()
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_benchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_benchmark.py
new file mode 100644
index 00000000000..080b3434f2a
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_benchmark.py
@@ -0,0 +1,375 @@
+# !/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import ClassVar, Collection
+from intel_extension_for_transformers.langchain_community.embeddings import HuggingFaceEmbeddings, \
+    HuggingFaceInstructEmbeddings, HuggingFaceBgeEmbeddings  # pylint: disable=E0401, E0611
+from langchain_community.embeddings import GooglePalmEmbeddings
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from intel_extension_for_transformers.langchain_community.vectorstores import Chroma, Qdrant  # pylint: disable=E0401, E0611
+import uuid
+from langchain_core.documents import Document
+from intel_extension_for_transformers.langchain_community.retrievers import ChildParentRetriever  # pylint: disable=E0401, E0611
+from langchain_core.vectorstores import VectorStoreRetriever
+from langchain_community.retrievers import BM25Retriever
+from intel_extension_for_transformers.neural_chat.pipeline.plugins.retrieval.detector.query_explainer \
+    import QueryPolisher
+from intel_extension_for_transformers.neural_chat import build_chatbot, PipelineConfig
+import jsonlines
+import numpy as np
+import logging
+import argparse
+
+logging.basicConfig(
+    format="%(asctime)s %(name)s:%(levelname)s:%(message)s",
+    datefmt="%d-%M-%Y %H:%M:%S",
+    level=logging.INFO
+)
+
+def document_transfer(data_collection):
+    "Transfer the raw document into langchain supported format."
+    documents = []
+    for data, meta in data_collection:
+        doc_id = str(uuid.uuid4())
+        metadata = {"source": meta, "identify_id":doc_id}
+        doc = Document(page_content=data, metadata=metadata)
+        documents.append(doc)
+    return documents
+
+def document_append_id(documents):
+    for _doc in documents:
+        _doc.metadata["doc_id"] = _doc.metadata["identify_id"]
+    return documents
+
+def index_library(index_file_jsonl_path):
+    index_list = []
+    with open(index_file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            passages=[stu["context"][0],index_file_jsonl_path]
+            index_list.append(passages)
+    return index_list
+
+def query_set(query_file_jsonl_path):
+    query_list = []
+    with open(query_file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            passages=stu["query"]
+            query_list.append(passages)
+    return query_list
+
+def load_list(file_jsonl_path, item):
+    with open(file_jsonl_path) as file:
+        data = []
+        for stu in jsonlines.Reader(file):
+            content = ",".join(stu[item])
+            data.append(content)
+    return data
+
+def evaluate(preds, labels, cutoffs=[1]):
+    """Evaluate MRR and Hit at cutoffs."""
+    metrics = {}
+
+    # MRR
+    mrrs = np.zeros(len(cutoffs))
+    for pred, label in zip(preds, labels):
+        jump = False
+        for i, x in enumerate(pred, 1):
+            if x in label:
+                for k, cutoff in enumerate(cutoffs):
+                    if i <= cutoff:
+                        mrrs[k] += 1 / i
+                jump = True
+            if jump:
+                break
+    mrrs /= len(preds)
+    for i, cutoff in enumerate(cutoffs):
+        mrr = mrrs[i]
+        metrics[f"MRR@{cutoff}"] = mrr
+
+    # Hit
+    hit_rate_list=[]
+    for cutoff in cutoffs:
+        hit_num = 0
+        for pred, label in zip(preds, labels):
+            hit_list=np.intersect1d(label, pred[:cutoff])
+            hit_num = hit_num+len(hit_list)
+        hit_rate = hit_num/len(labels)
+        hit_rate_list.append(hit_rate)
+    for i, cutoff in enumerate(cutoffs):
+        hit_rate = hit_rate_list[i]
+        metrics[f"Hit@{cutoff}"] = hit_rate
+
+    return metrics["MRR@1"], metrics["Hit@1"]
+
+class Retrieval():
+    def __init__(self,
+                 vector_database="Chroma",
+                 embedding_model="BAAI/bge-large-en-v1.5",
+                 input_path = None,
+                 retrieval_type = 'default',
+                 append=True,
+                 polish=False,
+                 k=1,
+                 fetch_k=1,
+                 score_threshold=0.3,
+                 reranker_model= "BAAI/bge-reranker-large",
+                 top_n = 1,
+                 enable_rerank = False,
+                 **kwargs):
+
+        self.vector_database = vector_database
+        self.input_path = None
+        self.retrieval_type = retrieval_type
+        self.retriever = None
+        self.k = k
+        self.fetch_k = fetch_k
+        self.score_threshold = score_threshold
+        self.reranker_model= reranker_model,
+        self.top_n = top_n
+        self.enable_rerank=enable_rerank
+
+        self.splitter = RecursiveCharacterTextSplitter(chunk_size= kwargs['child_size'] \
+                    if 'child_size' in kwargs else 512)
+        allowed_retrieval_type: ClassVar[Collection[str]] = (
+            "default",
+            "child_parent",
+            'bm25',
+        )
+
+        if polish:
+            self.polisher = QueryPolisher()
+        else:
+            self.polisher = None
+
+        assert self.retrieval_type in allowed_retrieval_type, "search_type of {} not allowed.".format(   \
+            self.retrieval_type)
+
+        self.input_path = input_path
+        assert self.input_path != None, "Should gave an input path!"
+
+        try:
+            if "instruct" in embedding_model:
+                self.embeddings = HuggingFaceInstructEmbeddings(model_name=embedding_model)
+            elif "bge" in embedding_model:
+                self.embeddings = HuggingFaceBgeEmbeddings(
+                    model_name=embedding_model,
+                    encode_kwargs={'normalize_embeddings': True},
+                    query_instruction="Represent this sentence for searching relevant passages:")
+            elif "Google" == embedding_model:
+                self.embeddings = GooglePalmEmbeddings()
+            else:
+                self.embeddings = HuggingFaceEmbeddings(
+                    model_name=embedding_model,
+                    encode_kwargs={"normalize_embeddings": True},
+                )
+        except Exception as e:
+            logging.error("Please select a proper embedding model.")
+            logging.error(e)
+
+        data_collection = index_library(self.input_path)
+        logging.info("The parsing for the uploaded files is finished.")
+
+        langchain_documents = document_transfer(data_collection)
+        logging.info("The format of parsed documents is transferred.")
+
+        if kwargs['search_type']=="similarity":
+            kwargs['search_kwargs']={"k":self.k}
+        elif kwargs['search_type']=="mmr":
+            kwargs['search_kwargs']={"k":self.k, "fetch_k":self.fetch_k}
+        elif kwargs['search_type']=="similarity_score_threshold":
+            kwargs['search_kwargs']={"k":self.k, "score_threshold":self.score_threshold}
+
+        if self.vector_database == "Chroma":
+            self.database = Chroma
+        elif self.vector_database == "Qdrant":
+            self.database = Qdrant
+        if self.retrieval_type == 'default':  # Using vector store retriever
+            if append:
+                knowledge_base = self.database.from_documents(documents=langchain_documents, embedding=self.embeddings,
+                                                              **kwargs)
+            else:
+                knowledge_base = self.database.build(documents=langchain_documents, embedding=self.embeddings, **kwargs)
+            self.retriever = RetrieverAdapter(retrieval_type=self.retrieval_type, document_store=knowledge_base, \
+                                              **kwargs)
+            if self.vector_database == "Qdrant" and knowledge_base.is_local():
+               # one local storage folder cannot be accessed by multiple instances of Qdrant client simultaneously.
+               knowledge_base.client.close()
+        elif self.retrieval_type == "child_parent":    # Using child-parent store retriever
+            child_documents = self.splitter.split_documents(langchain_documents)
+            langchain_documents = document_append_id(langchain_documents)
+            if append:
+                knowledge_base = self.database.from_documents(documents=langchain_documents, embedding=self.embeddings,
+                                                              **kwargs)
+                child_knowledge_base = self.database.from_documents(documents=child_documents, sign='child', \
+                                                                    embedding=self.embeddings, **kwargs)
+            else:
+                knowledge_base = self.database.build(documents=langchain_documents, embedding=self.embeddings, **kwargs)
+                child_knowledge_base = self.database.build(documents=langchain_documents, embedding=self.embeddings, \
+                                            sign='child', **kwargs)
+            self.retriever = RetrieverAdapter(retrieval_type=self.retrieval_type, document_store=knowledge_base, \
+                               child_document_store=child_knowledge_base, **kwargs)
+            if self.vector_database == "Qdrant" :
+                # one local storage folder cannot be accessed by multiple instances of Qdrant client simultaneously.
+                if knowledge_base.is_local():
+                    knowledge_base.client.close()
+                if child_knowledge_base.is_local():
+                    child_knowledge_base.client.close()
+        elif self.retrieval_type == "bm25":
+            self.docs = document_append_id(langchain_documents)
+            self.retriever = RetrieverAdapter(retrieval_type=self.retrieval_type,
+                                              docs=self.docs,
+                                              reranker_model=self.reranker_model,
+                                              top_n = self.top_n,
+                                              enable_rerank = self.enable_rerank,
+                                              **kwargs)
+        logging.info("The retriever is successfully built.")
+
+    def pre_llm_inference_actions(self, model_name, query):
+        if self.polisher:
+            try:
+                query = self.polisher.polish_query(model_name, query)
+            except Exception as e:
+                logging.info(f"Polish the user query failed, {e}")
+                raise Exception("[Rereieval ERROR] query polish failed!")
+
+        assert self.retriever is not None, logging.info("Please check the status of retriever")
+        context = self.retriever.get_context(query)
+        return context
+
+
+class RetrieverAdapter():
+    def __init__(self, retrieval_type='default', document_store=None, child_document_store=None, docs=None,  \
+                 reranker_model="BAAI/bge-reranker-large", top_n = 1, enable_rerank = False, **kwargs):
+        self.retrieval_type = retrieval_type
+        if enable_rerank:
+            from intel_extension_for_transformers.langchain_community.retrievers.bge_reranker import BgeReranker  # pylint: disable=E0401, E0611
+            from FlagEmbedding import FlagReranker
+            reranker = FlagReranker(reranker_model)
+            self.reranker = BgeReranker(model = reranker, top_n=top_n)
+        else:
+            self.reranker = None
+
+        if self.retrieval_type == "default":
+            self.retriever = VectorStoreRetriever(vectorstore=document_store, **kwargs)
+        elif self.retrieval_type == "bm25":
+            self.retriever = BM25Retriever.from_documents(docs, **kwargs)
+        elif self.retrieval_type == "child_parent":
+            self.retriever = ChildParentRetriever(parentstore=document_store, \
+                                                  vectorstore=child_document_store,
+                                                  **kwargs)  # pylint: disable=abstract-class-instantiated
+        else:
+            logging.error('The chosen retrieval type remains outside the supported scope.')
+
+    def get_context(self, query):
+        context = []
+        retrieved_documents = self.retriever.get_relevant_documents(query)
+        if self.reranker is not None:
+            retrieved_documents = self.reranker.compress_documents(documents = retrieved_documents, query = query)
+        for doc in retrieved_documents:
+            context.append(doc.page_content)
+        return context
+
+def main():
+    import os, shutil
+    if os.path.exists("output"):
+        shutil.rmtree("output", ignore_errors=True)
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--index_file_jsonl_path", type=str)
+    parser.add_argument("--query_file_jsonl_path", type=str)
+    parser.add_argument("--vector_database", type=str, default="Chroma")
+    parser.add_argument("--embedding_model", type=str, default="BAAI/bge-large-en-v1.5")
+    parser.add_argument("--llm_model", type=str)
+    parser.add_argument("--reranker_model", type=str, default="BAAI/bge-reranker-large")
+
+    parser.add_argument("--retrieval_type", type=str, default='default')
+    parser.add_argument("--polish", default=False, action='store_true')
+    parser.add_argument("--search_type", type=str, default="similarity")
+    parser.add_argument("--k", type=int, default=1)
+    parser.add_argument("--fetch_k", type=int, default=5)
+    parser.add_argument("--score_threshold", type=float, default=0.3)
+    parser.add_argument("--top_n", type=int, default=1)
+    parser.add_argument("--enable_rerank", default=False, action='store_true')
+
+    args = parser.parse_args()
+
+    index_file_jsonl_path = args.index_file_jsonl_path
+    query_file_jsonl_path = args.query_file_jsonl_path
+    vector_database = args.vector_database
+    embedding_model = args.embedding_model
+    retrieval_type = args.retrieval_type
+    polish = args.polish
+    search_type = args.search_type
+    llm_model = args.llm_model
+    k = args.k
+    fetch_k = args.fetch_k
+    score_threshold = args.score_threshold
+    reranker_model = args.reranker_model
+    top_n = args.top_n
+    enable_rerank = args.enable_rerank
+
+    query_list = query_set(query_file_jsonl_path)
+
+    config = PipelineConfig(model_name_or_path=llm_model)
+    build_chatbot(config)
+
+    retrieval_results=[]
+    for query in query_list:
+        context=Retrieval(input_path=index_file_jsonl_path,
+                         vector_database=vector_database,
+                         embedding_model=embedding_model,
+                         retrieval_type = retrieval_type,
+                         polish = polish,
+                         search_type=search_type,
+                         k=k,
+                         fetch_k=fetch_k,
+                         score_threshold=score_threshold,
+                         reranker_model=reranker_model,
+                         top_n = top_n,
+                         enable_rerank = enable_rerank
+                         ).pre_llm_inference_actions(model_name=llm_model, query=query)
+        retrieval_results.append(context)
+    ground_truths=load_list(query_file_jsonl_path, "pos")
+    MRR, Hit = evaluate(retrieval_results, ground_truths)
+
+    file_json_path='result_retrieval.jsonl'
+
+    if MRR and Hit:
+        data = {
+                "index_file_jsonl_path": args.index_file_jsonl_path,
+                "query_file_jsonl_path": args.query_file_jsonl_path,
+                "vector_database": args.vector_database,
+                "embedding_model": args.embedding_model,
+                "retrieval_type": args.retrieval_type,
+                "polish": args.polish,
+                "search_type": args.search_type,
+                "llm_model": args.llm_model,
+                "k": args.k,
+                "fetch_k": args.fetch_k,
+                "score_threshold": args.score_threshold,
+                "reranker_model": args.reranker_model,
+                "top_n": args.top_n,
+                "enable_rerank": args.enable_rerank,
+                "MRR": MRR,
+                "Hit": Hit,
+            }
+        print(data)
+        with jsonlines.open(file_json_path,"a") as file_json:
+                file_json.write(data)
+
+if __name__ == '__main__':
+    main()
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.sh b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.sh
new file mode 100644
index 00000000000..903f1cf9f4e
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.sh
@@ -0,0 +1,116 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -x
+
+function main {
+
+  init_params "$@"
+  run_benchmark
+
+}
+
+# init params
+function init_params {
+  retrieval_type='default'
+  search_type="similarity"
+  k=1
+  fetch_k=5
+  score_threshold=0.3
+  top_n=1
+
+  for var in "$@"
+  do
+    case $var in
+     --index_file_jsonl_path=*)
+          index_file_jsonl_path=$(echo $var |cut -f2 -d=)
+      ;;
+      --query_file_jsonl_path=*)
+          query_file_jsonl_path=$(echo $var |cut -f2 -d=)
+      ;;
+      --vector_database=*)
+          vector_database=$(echo $var |cut -f2 -d=)
+      ;;
+      --embedding_model=*)
+          embedding_model=$(echo $var |cut -f2 -d=)
+      ;;
+      --llm_model=*)
+          llm_model=$(echo $var |cut -f2 -d=)
+      ;;
+      --reranker_model=*)
+          reranker_model=$(echo ${var} |cut -f2 -d=)
+      ;;
+      --retrieval_type=*)
+          retrieval_type=$(echo $var |cut -f2 -d=)
+      ;;
+      --polish=*)
+          polish=$(echo $var |cut -f2 -d=)
+      ;;
+      --search_type=*)
+          search_type=$(echo $var |cut -f2 -d=)
+      ;;
+      --k=*)
+          k=$(echo $var |cut -f2 -d=)
+      ;;
+      --fetch_k=*)
+          fetch_k=$(echo $var |cut -f2 -d=)
+      ;;
+      --score_threshold=*)
+          score_threshold=$(echo ${var} |cut -f2 -d=)
+      ;;
+      --top_n=*)
+          top_n=$(echo ${var} |cut -f2 -d=)
+      ;;
+      --enable_rerank=*)
+          enable_rerank=$(echo $var |cut -f2 -d=)
+      ;;
+    esac
+  done
+
+}
+
+
+# run_benchmark
+function run_benchmark {
+
+    if [[ ${polish} == True ]]; then
+        polish="--polish"
+    else
+        polish=""
+    fi
+    if [[ ${enable_rerank} == True ]]; then
+        enable_rerank="--enable_rerank"
+    else
+         enable_rerank=""
+    fi
+
+    python -u ./evaluate_retrieval_benchmark.py \
+        --index_file_jsonl_path ${index_file_jsonl_path} \
+        --query_file_jsonl_path ${query_file_jsonl_path} \
+        --vector_database ${vector_database} \
+        --embedding_model ${embedding_model} \
+        --llm_model ${llm_model} \
+        --reranker_model ${reranker_model} \
+        --retrieval_type ${retrieval_type} \
+        ${polish} \
+        --search_type ${search_type} \
+        --k ${k} \
+        --fetch_k ${fetch_k} \
+        --score_threshold ${score_threshold} \
+        --top_n ${top_n} \
+        ${enable_rerank} \
+
+}
+
+main "$@"
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_config_superbenchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_config_superbenchmark.py
new file mode 100644
index 00000000000..50c17777ff3
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_config_superbenchmark.py
@@ -0,0 +1,123 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import subprocess
+import jsonlines
+import yaml
+
+def main():
+    if os.path.exists("result_retrieval.jsonl"):
+        os.remove("result_retrieval.jsonl")
+    script_path = 'retrieval_benchmark.sh'
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--config_path", type=str, default="config.yaml")
+    args = parser.parse_args()
+
+    data = read_yaml_file(args.config_path)
+    data = {k: [str(item) for item in v] if isinstance(v, list) else str(v) for k, v in data.items()}
+    args = parser.parse_args()
+
+    data = read_yaml_file(args.config_path)
+    data = {k: [str(item) for item in v] if isinstance(v, list) else str(v) for k, v in data.items()}
+    arg1 = data['index_file_jsonl_path']
+    arg2 = data['query_file_jsonl_path']
+    arg3 = data['vector_database']
+    arg4 = data['embedding_model']
+    arg5 = data['llm_model']
+    arg6 = data['reranker_model']
+    arg7_list = data['retrieval_type']
+    arg8_list = data['polish']
+    arg9_list = data['search_type']
+    arg10_list = data['k']
+    arg11_list = data['fetch_k']
+    arg12_list = data['score_threshold']
+    arg13_list = data['top_n']
+    arg14_list = data['enable_rerank']
+
+    for arg7 in arg7_list:
+        print('--'*1 +'retrieval_type',arg7)
+        for arg8 in arg8_list:
+            print('--'*2 +'polish',arg8)
+            for arg9 in arg9_list:
+                print('--'*3 +'search_type',arg9)
+                for arg10 in arg10_list:
+                    print('--'*4 +'k',arg10)
+                    for arg11 in arg11_list:
+                        print('--'*5 +'fetch_k',arg11)
+                        for arg12 in arg12_list:
+                            print('--'*6 +'score_threshold',arg12)
+                            for arg13 in arg13_list:
+                                print('--'*7 +'top_n',arg13)
+                                for arg14 in arg14_list:
+                                    print('--'*8 +'enable_rerank',arg14)
+                                    # try:
+                                    subprocess.run(['bash',
+                                                    script_path,
+                                                    '--index_file_jsonl_path='+arg1,
+                                                    '--query_file_jsonl_path='+arg2,
+                                                    '--vector_database='+arg3,
+                                                    '--embedding_model='+arg4,
+                                                    '--llm_model='+arg5,
+                                                    '--reranker_model='+arg6,
+                                                    '--retrieval_type='+arg7,
+                                                    '--polish='+arg8,
+                                                    '--search_type='+arg9,
+                                                    '--k='+arg10,
+                                                    '--fetch_k='+arg11,
+                                                    '--score_threshold='+arg12,
+                                                    '--top_n='+arg13,
+                                                    '--enable_rerank='+arg14],
+                                                    stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+
+    file_jsonl_path='result_retrieval.jsonl'
+
+    MRR_list = []
+    Hit_list = []
+
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            MRR=stu["MRR"]
+            Hit=stu["Hit"]
+            MRR_list.append(MRR)
+            Hit_list.append(Hit)
+
+    MRR_line_number_list = [i for i, v in enumerate(MRR_list) if v == max(MRR_list)]
+    Hit_line_number_list = [i for i, v in enumerate(Hit_list) if v == max(Hit_list)]
+
+    line=0
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            if line in MRR_line_number_list:
+                print('max_MRR',stu)
+            line+=1
+
+    line=0
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            if line in Hit_line_number_list:
+                print('max_Hit',stu)
+            line+=1
+
+def read_yaml_file(file_path):
+    with open(file_path, 'r') as stream:
+        try:
+            return yaml.safe_load(stream)
+        except yaml.YAMLError as exc:
+            print(exc)
+
+if __name__ == '__main__':
+    main()
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_superbenchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_superbenchmark.py
new file mode 100644
index 00000000000..ae5cbc9c8d5
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_superbenchmark.py
@@ -0,0 +1,116 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import subprocess
+import jsonlines
+
+def main():
+    if os.path.exists("result_retrieval.jsonl"):
+        os.remove("result_retrieval.jsonl")
+    script_path = 'retrieval_benchmark.sh'
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--index_file_jsonl_path", type=str)
+    parser.add_argument("--query_file_jsonl_path", type=str)
+    parser.add_argument("--vector_database", type=str, default="Chroma")
+    parser.add_argument("--embedding_model", type=str, default="BAAI/bge-large-en-v1.5")
+    parser.add_argument("--llm_model", type=str)
+    parser.add_argument("--reranker_model", type=str, default="BAAI/bge-reranker-large")
+
+    args = parser.parse_args()
+
+    arg1 = args.index_file_jsonl_path
+    arg2 = args.query_file_jsonl_path
+    arg3 = args.vector_database
+    arg4 = args.embedding_model
+    arg5 = args.llm_model
+    arg6 = args.reranker_model
+
+    arg7_list = ['default','child_parent','bm25']
+    arg8_list = ['True','False']
+    arg9_list = ['similarity','mmr','similarity_score_threshold']
+    arg10_list = ['1', '3', '5']
+    arg11_list = ['5', '10', '20']
+    arg12_list = ['0.3','0.5','0.7']
+    arg13_list = ['1','3', '5','10']
+    arg14_list = ['True','False']
+
+    for arg7 in arg7_list:
+        print('--'*1 +'retrieval_type',arg7)
+        for arg8 in arg8_list:
+            print('--'*2 +'polish',arg8)
+            for arg9 in arg9_list:
+                print('--'*3 +'search_type',arg9)
+                for arg10 in arg10_list:
+                    print('--'*4 +'k',arg10)
+                    for arg11 in arg11_list:
+                        print('--'*5 +'fetch_k',arg11)
+                        for arg12 in arg12_list:
+                            print('--'*6 +'score_threshold',arg12)
+                            for arg13 in arg13_list:
+                                print('--'*7 +'top_n',arg13)
+                                for arg14 in arg14_list:
+                                    print('--'*8 +'enable_rerank',arg14)
+                                    # try:
+                                    subprocess.run(['bash',
+                                                    script_path,
+                                                    '--index_file_jsonl_path='+arg1,
+                                                    '--query_file_jsonl_path='+arg2,
+                                                    '--vector_database='+arg3,
+                                                    '--embedding_model='+arg4,
+                                                    '--llm_model='+arg5,
+                                                    '--reranker_model='+arg6,
+                                                    '--retrieval_type='+arg7,
+                                                    '--polish='+arg8,
+                                                    '--search_type='+arg9,
+                                                    '--k='+arg10,
+                                                    '--fetch_k='+arg11,
+                                                    '--score_threshold='+arg12,
+                                                    '--top_n='+arg13,
+                                                    '--enable_rerank='+arg14],
+                                                    stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+
+    file_jsonl_path='result_retrieval.jsonl'
+
+    MRR_list = []
+    Hit_list = []
+
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            MRR=stu["MRR"]
+            Hit=stu["Hit"]
+            MRR_list.append(MRR)
+            Hit_list.append(Hit)
+
+    MRR_line_number_list = [i for i, v in enumerate(MRR_list) if v == max(MRR_list)]
+    Hit_line_number_list = [i for i, v in enumerate(Hit_list) if v == max(Hit_list)]
+
+    line=0
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            if line in MRR_line_number_list:
+                print('max_MRR',stu)
+            line+=1
+
+    line=0
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            if line in Hit_line_number_list:
+                print('max_Hit',stu)
+            line+=1
+
+if __name__ == '__main__':
+    main()