From f820019fe5b358f16abbe8e59aa59931c53f74a1 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Fri, 1 Mar 2024 11:09:57 +0800
Subject: [PATCH 001/151] add retrieval dataset construction codes

---
 .../neural_chat/tools/evaluation/__init__.py  |  16 ++
 .../tools/evaluation/context_utils.py         | 246 ++++++++++++++++++
 .../neural_chat/tools/evaluation/hn_mine.py   |  97 +++++++
 .../tools/evaluation/html_parser.py           | 195 ++++++++++++++
 .../tools/evaluation/llm_generate_raw_data.py | 202 ++++++++++++++
 .../mine_hard_negatives_check_similarity.py   |  62 +++++
 .../tools/evaluation/requirements.txt         |  16 ++
 .../retrieval_dataset_construction.py         | 117 +++++++++
 8 files changed, 951 insertions(+)
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/__init__.py
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/context_utils.py
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/hn_mine.py
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/html_parser.py
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/llm_generate_raw_data.py
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/mine_hard_negatives_check_similarity.py
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/requirements.txt
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/retrieval_dataset_construction.py

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/__init__.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/__init__.py
new file mode 100644
index 00000000000..18896e7b549
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/__init__.py
@@ -0,0 +1,16 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/context_utils.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/context_utils.py
new file mode 100644
index 00000000000..96fa0c54509
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/context_utils.py
@@ -0,0 +1,246 @@
+# !/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unicodedata
+import pandas as pd
+import re, json
+from langchain.document_loaders import UnstructuredMarkdownLoader
+from docx import Document as DDocument
+from bs4 import BeautifulSoup
+import fitz
+import easyocr
+from PIL import Image
+import numpy as np
+import io
+
+def uni_pro(text):
+    """Check if the character is ASCII or falls in the category of non-spacing marks."""
+    normalized_text = unicodedata.normalize('NFKD', text)
+    filtered_text = ''
+    for char in normalized_text:
+        if ord(char) < 128 or unicodedata.category(char) == 'Mn':
+            filtered_text += char
+    return filtered_text
+
+
+def read_pdf(pdf_path):
+    """Read the pdf file."""
+    doc = fitz.open(pdf_path)
+    reader = easyocr.Reader(['en'])
+    result =''
+    for i in range(doc.page_count):
+        page = doc.load_page(i)
+        pagetext = page.get_text().strip()
+        if pagetext:
+            if pagetext.endswith('!') or pagetext.endswith('?') or pagetext.endswith('.'):
+                result=result+pagetext
+            else:
+                result=result+pagetext+'.'
+        if len(doc.get_page_images(i)) > 0 :
+            for img in doc.get_page_images(i):
+                if img:
+                    pageimg=''
+                    xref = img[0]
+                    img_data = doc.extract_image(xref)
+                    img_bytes = img_data['image']
+                    pil_image = Image.open(io.BytesIO(img_bytes))
+                    img = np.array(pil_image)
+                    img_result = reader.readtext(img, paragraph=True, detail=0)
+                    pageimg=pageimg + ', '.join(img_result).strip()
+                    if pageimg.endswith('!') or pageimg.endswith('?') or pageimg.endswith('.'):
+                        pass
+                    else:
+                        pageimg=pageimg+'.'
+                result=result+pageimg
+    return result
+
+
+def read_html(html_path):
+    """Read the html file."""
+    with open(html_path, 'r', encoding="utf-8") as file:
+        html = file.read()
+    soup = BeautifulSoup(html, 'html.parser')
+    text = soup.get_text(strip=True)
+    return text
+
+
+def read_txt(txt_path):
+    """Read txt file."""
+    with open(txt_path, 'r') as file:
+        text = file.read()
+    return text
+
+
+def read_docx(doc_path):
+    """Read docx file."""
+    doc = DDocument(doc_path)
+    text = ''
+    for paragraph in doc.paragraphs:
+        text += paragraph.text
+    return text
+
+
+def read_md(md_path):
+    """Read docx file."""
+    loader = UnstructuredMarkdownLoader(md_path)
+    text = loader.load()[0].page_content
+    return text
+
+
+def load_json(input, process, max_length, min_length):
+    """Load and process json file."""
+    data = []
+    with open(input, 'r') as file:
+        for line in file:
+            json_obj = json.loads(line)
+            data.append(json_obj)
+
+    new_sens = []
+    new_collect = []
+    for sub in data:
+        sub['content'].replace('#', " ")
+        sub['content'] = re.sub(r'\s+', ' ', sub['content'])
+        if not process:
+            if len(sub['content']) < min_length:
+                continue
+            new_doc = [sub['content'], sub['link']]
+            new_collect.append(new_doc)
+        else:
+            for sub in data:
+                sub['content'].replace('#', " ")
+                if len(sub['content'])<min_length:
+                    continue
+                split_sen = re.split(r'[.?!]', sub['content'])
+                for num in range(len(split_sen)):
+                    split_sen[num] = re.sub(r'\s+', ' ', split_sen[num])
+                    if num +1 < len(split_sen):
+                        if len(split_sen[num]) >max_length:
+                            new_sens.append(split_sen[num].strip())
+                        else:
+                            split_sen[num +1] =split_sen[num] +split_sen[num+1]
+                    else:
+                        new_sens.append(split_sen[num])
+
+            paragraphs = list(set(new_sens))
+            for paragraph in paragraphs:
+                new_doc = [paragraph, sub['link']]
+                new_collect.append(new_doc)
+    return new_collect
+
+
+def load_xlsx(input):
+    """Load and process xlsx file."""
+    df = pd.read_excel(input)
+    header = df.columns.tolist()
+    all_data = []
+    if 'Questions' in header and 'Answers' in header:
+        for index, row in df.iterrows():
+            sub = row["Answers"]
+            sub=sub.replace('#', " ")
+            sub = sub.replace(r'\t', " ")
+            sub = sub.replace('\n', ' ')
+            sub = sub.replace('\n\n', ' ')
+            sub = re.sub(r'\s+', ' ', sub)
+            new_doc = [sub, input]
+            all_data.append(new_doc)
+    elif 'question' in header and 'answer' in header and 'link' in header:
+        for index, row in df.iterrows():
+            sub = row["answer"]
+            sub = sub.replace('#', " ")
+            sub = sub.replace(r'\t', " ")
+            sub = sub.replace('\n', ' ')
+            sub = sub.replace('\n\n', ' ')
+            sub = re.sub(r'\s+', ' ', sub)
+            all_data.append([sub, row['link']])
+    elif 'context' in header and 'link' in header:
+        for index, row in df.iterrows():
+            sub = row['context']
+            sub = sub.replace('#', " ")
+            sub = sub.replace(r'\t', " ")
+            sub = sub.replace('\n', ' ')
+            sub = sub.replace('\n\n', ' ')
+            sub = re.sub(r'\s+', ' ', sub)
+            all_data.append([sub, row['link']])
+    return all_data
+
+def load_csv(input):
+    """ Load the csv file."""
+    df = pd.read_csv(input)
+    all_data = []
+    documents = []
+    for index, row in df.iterrows():
+        sub = row["correct_answer"]
+        all_data.append(sub)
+
+    for data in all_data:
+        data.replace('#', " ")
+        data = re.sub(r'\s+', ' ', data)
+        new_doc = [data, input]
+        documents.append(new_doc)
+    return documents
+
+def load_structured_data(input, process, max_length, min_length):
+    """Load structured context."""
+    if input.endswith("jsonl") or input.endswith("json"):
+        content = load_json(input, process, max_length, min_length)
+    elif input.endswith("xlsx"):
+        content = load_xlsx(input)
+    elif input.endswith("csv"):
+        content = load_csv(input)
+    return content
+
+def load_unstructured_data(input):
+    """Load unstructured context."""
+    if input.endswith("pdf"):
+        text = read_pdf(input)
+    elif input.endswith("docx"):
+        text = read_docx(input)
+    elif input.endswith("html"):
+        text = read_html(input)
+    elif input.endswith("txt"):
+        text = read_txt(input)
+    elif input.endswith("md"):
+        text = read_md(input)
+
+    text = text.replace('\n', ' ')
+    text = text.replace('\n\n', ' ')
+    text = uni_pro(text)
+    text = re.sub(r'\s+', ' ', text)
+    return text
+
+def get_chuck_data(content, max_length, min_length, input):
+    """Process the context to make it maintain a suitable length for the generation."""
+    sentences = re.split('(?<=[!.?])', content)
+
+    paragraphs = []
+    current_length = 0
+    count = 0
+    current_paragraph = ""
+    for sub_sen in sentences:
+        count +=1
+        sentence_length = len(sub_sen)
+        if current_length + sentence_length <= max_length:
+            current_paragraph += sub_sen
+            current_length += sentence_length
+            if count == len(sentences) and len(current_paragraph.strip())>min_length:
+                paragraphs.append([current_paragraph.strip() ,input])
+        else:
+            paragraphs.append([current_paragraph.strip() ,input])
+            current_paragraph = sub_sen
+            current_length = sentence_length
+
+    return paragraphs
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/hn_mine.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/hn_mine.py
new file mode 100644
index 00000000000..8fcaeb12153
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/hn_mine.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import random
+import numpy as np
+import faiss
+from tqdm import tqdm
+
+def create_index(embeddings, use_gpu):
+    index = faiss.IndexFlatIP(len(embeddings[0]))
+    embeddings = np.asarray(embeddings, dtype=np.float32)
+    if use_gpu:
+        co = faiss.GpuMultipleClonerOptions()
+        co.shard = True
+        co.useFloat16 = True
+        index = faiss.index_cpu_to_all_gpus(index, co=co)
+    index.add(embeddings)
+    return index
+
+def batch_search(index,
+                 query,
+                 topk: int = 200,
+                 batch_size: int = 64):
+    all_scores, all_inxs = [], []
+    for start_index in tqdm(range(0, len(query), batch_size), desc="Batches", disable=len(query) < 256):
+        batch_query = query[start_index:start_index + batch_size]
+        batch_scores, batch_inxs = index.search(np.asarray(batch_query, dtype=np.float32), k=topk)
+        all_scores.extend(batch_scores.tolist())
+        all_inxs.extend(batch_inxs.tolist())
+    return all_scores, all_inxs
+
+def get_corpus(candidate_pool):
+    corpus = []
+    for line in open(candidate_pool):
+        line = json.loads(line.strip())
+        corpus.append(line['text'])
+    return corpus
+
+def find_knn_neg(model, input_file, candidate_pool, output_file, sample_range, negative_number, use_gpu):
+    corpus = []
+    queries = []
+    train_data = []
+    for line in open(input_file):
+        line = json.loads(line.strip())
+        train_data.append(line)
+        corpus.extend(line['pos'])
+        if 'neg' in line:
+            corpus.extend(line['neg'])
+        queries.append(line['query'])
+
+    if candidate_pool is not None:
+        if not isinstance(candidate_pool, list):
+            candidate_pool = get_corpus(candidate_pool)
+        corpus = list(set(candidate_pool))
+    else:
+        corpus = list(set(corpus))
+
+    p_vecs = model.encode(corpus, batch_size=256)
+    q_vecs = model.encode(queries, batch_size=256)
+
+    index = create_index(p_vecs, use_gpu=use_gpu)
+    _, all_inxs = batch_search(index, q_vecs, topk=sample_range[-1])
+    assert len(all_inxs) == len(train_data)
+
+    for i, data in enumerate(train_data):
+        query = data['query']
+        inxs = all_inxs[i][sample_range[0]:sample_range[1]]
+        filtered_inx = []
+        for inx in inxs:
+            if inx == -1: break
+            if corpus[inx] not in data['pos'] and corpus[inx] != query:
+                filtered_inx.append(inx)
+
+        if len(filtered_inx) > negative_number:
+            filtered_inx = random.sample(filtered_inx, negative_number)
+        data['neg'] = [corpus[inx] for inx in filtered_inx]
+
+    with open(output_file, 'w') as f:
+        for data in train_data:
+            if len(data['neg']) < negative_number:
+                data['neg'].extend(random.sample(corpus, negative_number - len(data['neg'])))
+            f.write(json.dumps(data, ensure_ascii=False) + '\n')
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/html_parser.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/html_parser.py
new file mode 100644
index 00000000000..f2f4f1bedd5
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/html_parser.py
@@ -0,0 +1,195 @@
+# !/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import requests
+from urllib.parse import urlparse, urlunparse
+import multiprocessing
+import urllib3
+import langid
+from bs4 import BeautifulSoup
+import os
+import re
+from context_utils import uni_pro
+import logging
+
+logging.basicConfig(
+    format="%(asctime)s %(name)s:%(levelname)s:%(message)s",
+    datefmt="%d-%M-%Y %H:%M:%S",
+    level=logging.INFO
+)
+urllib3.disable_warnings()
+
+
+class Crawler:
+    def __init__(self, pool=None):
+        if pool:
+            assert isinstance(pool, (str, list, tuple)), 'url pool should be str, list or tuple'
+        self.pool = pool
+        self.headers = {
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng, \
+            */*;q=0.8,application/signed-exchange;v=b3;q=0.7',
+            'Accept-Encoding': 'gzip, deflate, br',
+            'Accept-Language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7',
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, \
+            like Gecko) Chrome/113.0.0.0 Safari/537.36'
+        }
+        self.fetched_pool = set()
+
+    def get_sublinks(self, soup):
+        sublinks = []
+        for links in soup.find_all('a'):
+            sublinks.append(str(links.get('href')))
+        return sublinks
+
+    def get_hyperlink(self, soup, base_url):
+        sublinks = []
+        for links in soup.find_all('a'):
+            link = str(links.get('href'))
+            if link.startswith('#') or link is None or link == 'None':
+                continue
+            suffix = link.split('/')[-1]
+            if '.' in suffix and suffix.split('.')[-1] not in ['html', 'htmld']:
+                continue
+            link_parse = urlparse(link)
+            base_url_parse = urlparse(base_url)
+            if link_parse.path == '':
+                continue
+            if link_parse.netloc != '':
+                # keep crawler works in the same domain
+                if link_parse.netloc != base_url_parse.netloc:
+                    continue
+                sublinks.append(link)
+            else:
+                sublinks.append(urlunparse((base_url_parse.scheme,
+                                            base_url_parse.netloc,
+                                            link_parse.path,
+                                            link_parse.params,
+                                            link_parse.query,
+                                            link_parse.fragment)))
+        return sublinks
+
+    def fetch(self, url, headers=None, max_times=5):
+        if not headers:
+            headers = self.headers
+        while max_times:
+            if not url.startswith('http') or not url.startswith('https'):
+                url = 'http://' + url
+            logging.info('start fetch %s...', url)
+            try:
+                response = requests.get(url, headers=headers, verify=True)
+                if response.status_code != 200:
+                    logging.error('fail to fetch %s, response status code: %s', url, response.status_code)
+                else:
+                    return response
+            except Exception as e:
+                logging.error('fail to fetch %s, caused by %s', url, e)
+            max_times -= 1
+        return None
+
+    def process_work(self, sub_url, work):
+        response = self.fetch(sub_url)
+        if response is None:
+            return []
+        self.fetched_pool.add(sub_url)
+        soup = self.parse(response.text)
+        base_url = self.get_base_url(sub_url)
+        sublinks = self.get_hyperlink(soup, base_url)
+        if work:
+            work(sub_url, soup)
+        return sublinks
+
+    def crawl(self, pool, work=None, max_depth=10, workers=10):
+        url_pool = set()
+        for url in pool:
+            base_url = self.get_base_url(url)
+            response = self.fetch(url)
+            soup = self.parse(response.text)
+            sublinks = self.get_hyperlink(soup, base_url)
+            self.fetched_pool.add(url)
+            url_pool.update(sublinks)
+            depth = 0
+            while len(url_pool) > 0 and depth < max_depth:
+                logging.info('current depth %s...', depth)
+                mp = multiprocessing.Pool(processes=workers)
+                results = []
+                for sub_url in url_pool:
+                    if sub_url not in self.fetched_pool:
+                        results.append(mp.apply_async(self.process_work, (sub_url, work)))
+                mp.close()
+                mp.join()
+                url_pool = set()
+                for result in results:
+                    sublinks = result.get()
+                    url_pool.update(sublinks)
+                depth += 1
+
+    def parse(self, html_doc):
+        soup = BeautifulSoup(html_doc, 'lxml')
+        return soup
+
+    def download(self, url, file_name):
+        logging.info('download %s into %s...', url, file_name)
+        try:
+            r = requests.get(url, stream=True, headers=self.headers, verify=True)
+            f = open(file_name, "wb")
+            for chunk in r.iter_content(chunk_size=512):
+                if chunk:
+                    f.write(chunk)
+        except Exception as e:
+            logging.error('fail to download %s, caused by %s', url, e)
+
+    def get_base_url(self, url):
+        result = urlparse(url)
+        return urlunparse((result.scheme, result.netloc, '', '', '', ''))
+
+    def clean_text(self, text):
+        text = text.strip().replace('\r', '\n')
+        text = re.sub(' +', ' ', text)
+        text = re.sub('\n+', '\n', text)
+        text = text.split('\n')
+        return '\n'.join([i for i in text if i and i != ' '])
+
+
+def load_html_data(url):
+    crawler = Crawler()
+    res = crawler.fetch(url)
+    if res == None:
+        return None
+    soup = crawler.parse(res.text)
+    all_text = crawler.clean_text(soup.select_one('body').text)
+    main_content = ''
+    for element_name in ['main', 'container']:
+        main_block = None
+        if soup.select(f'.{element_name}'):
+            main_block = soup.select(f'.{element_name}')
+        elif soup.select(f'#{element_name}'):
+            main_block = soup.select(f'#{element_name}')
+        if main_block:
+            for element in main_block:
+                text = crawler.clean_text(element.text)
+                if text not in main_content:
+                    main_content += f'\n{text}'
+            main_content = crawler.clean_text(main_content)
+
+    main_content = main_content.replace('\n', '')
+    main_content = main_content.replace('\n\n', '')
+    main_content = uni_pro(main_content)
+    main_content = re.sub(r'\s+', ' ', main_content)
+
+    # {'text': all_text, 'main_content': main_content}
+
+    return main_content
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/llm_generate_raw_data.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/llm_generate_raw_data.py
new file mode 100644
index 00000000000..d1476d13518
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/llm_generate_raw_data.py
@@ -0,0 +1,202 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import torch
+from modelscope import AutoModelForCausalLM, AutoTokenizer
+import jsonlines
+import os, re
+from typing import List
+from context_utils import load_unstructured_data, load_structured_data, get_chuck_data
+from html_parser import load_html_data
+import logging
+from intel_extension_for_transformers.neural_chat.prompts.prompt import QUERYGENERATE_PROMPT
+from transformers import GenerationConfig
+
+logging.basicConfig(
+    format="%(asctime)s %(name)s:%(levelname)s:%(message)s",
+    datefmt="%d-%M-%Y %H:%M:%S",
+    level=logging.INFO
+)
+
+class DocumentLoading:
+    def __init__(self, max_chuck_size=512, min_chuck_size=5, process=True):
+        """
+        Wrapper for document parsing.
+        """
+        self.max_chuck_size = max_chuck_size
+        self.min_chuck_size = min_chuck_size
+        self.process = process
+
+
+    def data_load(self, input, **kwargs):
+        """
+        The API for loading the file. Support single file, batch files, and urls parsing.
+        """
+        if 'max_chuck_size' in kwargs:
+            self.max_chuck_size=kwargs['max_chuck_size']
+        if 'min_chuck_size' in kwargs:
+            self.min_chuck_size = kwargs['min_chuck_size']
+        if 'process' in kwargs:
+            self.process = kwargs['process']
+
+        if isinstance(input, str):
+            if os.path.isfile(input):
+                data_collection = self.parse_document(input)
+            elif os.path.isdir(input):
+                data_collection = self.batch_parse_document(input)
+            else:
+                logging.error("Please check your upload file and try again!")
+        elif isinstance(input, List):
+            try:
+                data_collection = self.parse_html(input)
+            except:
+                logging.error("The given link/str is unavailable. Please try another one!")
+        else:
+            logging.error("The input format is invalid!")
+               
+        documents = []
+        for data, metadata in data_collection:
+             if len(data) < 5:
+                continue
+             documents.append(data)
+        return documents
+
+
+    def parse_document(self, input):
+        """
+        Parse the uploaded file.
+        """
+        if input.endswith("pdf") or input.endswith("docx") or input.endswith("html") \
+           or input.endswith("txt") or input.endswith("md"):
+            content = load_unstructured_data(input)
+            if self.process:
+                chuck = get_chuck_data(content, self.max_chuck_size, self.min_chuck_size, input)
+            else:
+                chuck = [[content.strip(),input]]
+        elif input.endswith("jsonl") or input.endswith("xlsx") or input.endswith("csv") or \
+                input.endswith("json"):
+            chuck = load_structured_data(input, self.process, \
+                                         self.max_chuck_size, self.min_chuck_size)
+        else:
+            logging.info("This file {} is ignored. Will support this file format soon.".format(input))
+            raise Exception("[Rereieval ERROR] Document format not supported!")
+        return chuck
+
+    def parse_html(self, input):
+        """
+        Parse the uploaded file.
+        """
+        chucks = []
+        for link in input:
+            if re.match(r'^https?:/{2}\w.+$', link):
+                content = load_html_data(link)
+                if content == None:
+                    continue
+                if self.process:
+                    chuck = get_chuck_data(content, self.max_chuck_size, self.min_chuck_size, link)
+                else:
+                    chuck = [[content.strip(), link]]
+                chucks += chuck
+            else:
+                logging.error("The given link/str {} cannot be parsed.".format(link))
+
+        return chucks
+
+
+    def batch_parse_document(self, input):
+        """
+        Parse the uploaded batch files in the input folder.
+        """
+        paragraphs = []
+        for dirpath, dirnames, filenames in os.walk(input):
+            for filename in filenames:
+                if filename.endswith("pdf") or filename.endswith("docx") or filename.endswith("html") \
+                    or filename.endswith("txt") or filename.endswith("md"):
+                    content = load_unstructured_data(os.path.join(dirpath, filename))
+                    if self.process:
+                        chuck = get_chuck_data(content, self.max_chuck_size, self.min_chuck_size, input)
+                    else:
+                        chuck = [[content.strip(),input]]
+                    paragraphs += chuck
+                elif filename.endswith("jsonl") or filename.endswith("xlsx") or filename.endswith("csv") or \
+                        filename.endswith("json"):
+                    chuck = load_structured_data(os.path.join(dirpath, filename), \
+                                                 self.process, self.max_chuck_size, self.min_chuck_size)
+                    paragraphs += chuck
+                else:
+                    logging.info("This file {} is ignored. Will support this file format soon.".format(filename))
+                    raise Exception("[Rereieval ERROR] Document format not supported!")
+        return paragraphs
+
+def raw_data_generate(model_id, base_dir, file_json_path,temperature,top_p,top_k,repetition_penalty,max_new_tokens,do_sample,num_beams,num_return_sequences,use_cache):
+   tokenizer = AutoTokenizer.from_pretrained(model_id)
+   model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto', torch_dtype=torch.float16)
+   documents = DocumentLoading().data_load(input=base_dir)
+
+   generation_config = GenerationConfig(
+   temperature = temperature, 
+   top_p = top_p, 
+   top_k = top_k, 
+   repetition_penalty = repetition_penalty, 
+   max_new_tokens = max_new_tokens, 
+   do_sample = do_sample, 
+   num_beams = num_beams, 
+   num_return_sequences = num_return_sequences, 
+   use_cache = use_cache,
+   pad_token_id=tokenizer.eos_token_id
+   )
+
+   for i in range(len(documents)):
+      context = documents[i]
+      
+      if context: 
+         input = QUERYGENERATE_PROMPT.format(context=context)
+
+         model_input = tokenizer(input, return_tensors="pt").to("cuda")
+         model.eval()
+         result = []
+
+         for j in range(5):
+            with torch.no_grad():
+               res = model.generate(**model_input, generation_config=generation_config)[0]
+               res=tokenizer.decode(res, skip_special_tokens=True)
+
+            res = res[res.find('Generated questions:') :]
+            res = re.sub('Generated questions:', '', res) 
+            res = re.sub('---', '', res) 
+
+            res = res.split("?")[0:2]
+            for r in res:
+               r = r.replace('1.', "").replace('2.', "")
+               r = r.replace('Evaluation:', "")
+               r = r.replace('#', " ").replace(r'\t', " ").replace('\n', ' ').replace('\n\n', ' ').strip()
+               r = r + '?'    
+               result.append(r)
+
+         result_str=''
+         result_set = list(set(result))
+         for k in range(len(result_set)):
+            result_str = result_str + str(k) + '. '+ result_set[k]
+
+         if result_str and result_str.isspace()==False:
+            data = {
+                     "query": result_str,
+                     "pos": [context],
+               }
+            with jsonlines.open(file_json_path,"a") as file_json:
+                  file_json.write(data)
\ No newline at end of file
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/mine_hard_negatives_check_similarity.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/mine_hard_negatives_check_similarity.py
new file mode 100644
index 00000000000..e922126bc64
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/mine_hard_negatives_check_similarity.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import jsonlines
+from hn_mine import find_knn_neg
+from sentence_transformers import SentenceTransformer
+
+def mine_hard_negatives(model_name_or_path, input_file, output_file, range_for_sampling, negative_number, use_gpu_for_searching):   
+   candidate_pool=None
+
+   sample_range = range_for_sampling.split('-')
+   sample_range = [int(x) for x in sample_range]
+
+   model = SentenceTransformer(model_name_or_path)
+
+   find_knn_neg(model,
+               input_file=input_file,
+               candidate_pool=candidate_pool,
+               output_file=output_file,
+               sample_range=sample_range,
+               negative_number=negative_number,
+               use_gpu=use_gpu_for_searching)
+
+def similarity_score(queries,passages,model_name_or_path):
+   queries = [queries]
+   passages = passages
+   instruction = ""
+   model = SentenceTransformer(model_name_or_path)
+   q_embeddings = model.encode([instruction+q for q in queries], normalize_embeddings=True)
+   p_embeddings = model.encode(passages, normalize_embeddings=True)
+   similarity_score =  q_embeddings @ p_embeddings.T
+   return similarity_score
+
+def similarity_check(file_jsonl_path,file_json_split_path,model_name_or_path, similarity_threshold):
+   with open(file_jsonl_path) as file:
+      for stu in jsonlines.Reader(file):
+         stu["query"]=stu["query"].split("?")[:-1]
+         for i in range(len(stu["query"])):
+               stu["query"][i]=stu["query"][i].lstrip('0123456789-. ')+ '?'
+               if similarity_score(stu["query"][i],stu["pos"],model_name_or_path) >= similarity_threshold:
+                  data = {
+                        "query": stu["query"][i],
+                        "pos": stu["pos"],
+                        "neg": stu["neg"],
+                     }
+                  with jsonlines.open(file_json_split_path,"a") as file_json:
+                     file_json.write(data)
+
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/requirements.txt b/intel_extension_for_transformers/neural_chat/tools/evaluation/requirements.txt
new file mode 100644
index 00000000000..769e9e8898f
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/requirements.txt
@@ -0,0 +1,16 @@
+torch
+modelscope
+jsonlines
+beautifulsoup4
+easyocr
+langchain==0.0.354
+langchain_core==0.1.18
+langid
+PyMuPDF
+python-docx
+sentence-transformers==2.3.1
+numpy
+faiss-gpu
+transformers
+accelerate
+openpyxl
\ No newline at end of file
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retrieval_dataset_construction.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retrieval_dataset_construction.py
new file mode 100644
index 00000000000..d345992a556
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retrieval_dataset_construction.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from llm_generate_raw_data import raw_data_generate
+from mine_hard_negatives_check_similarity import mine_hard_negatives, similarity_check
+import argparse
+
+def construct_retrieval_dataset(
+      llm_model, 
+      embedding_model, 
+      input_dir, 
+      output_raw_path,
+      temperature,
+      top_p,
+      top_k,
+      repetition_penalty,
+      max_new_tokens,
+      do_sample,
+      num_beams,
+      num_return_sequences,
+      use_cache, 
+      range_for_sampling, 
+      negative_number, 
+      use_gpu_for_searching,
+      similarity_threshold):
+
+   raw_data_generate(llm_model,input_dir,output_raw_path,temperature,top_p,top_k,repetition_penalty,max_new_tokens,do_sample,num_beams,num_return_sequences,use_cache)
+
+   output_hn_path=output_raw_path+'_minedHN.jsonl'
+ 
+   mine_hard_negatives(embedding_model, output_raw_path, output_hn_path, range_for_sampling, negative_number, use_gpu_for_searching)
+
+   output_json_split_path = output_raw_path+"_minedHN_split.jsonl"
+   similarity_check(output_hn_path,output_json_split_path,embedding_model,similarity_threshold)
+
+
+def main():
+   parser = argparse.ArgumentParser()
+   parser.add_argument("--llm_model", type=str)
+   parser.add_argument("--embedding_model", type=str)
+   parser.add_argument("--input_dir", type=str)
+   parser.add_argument("--output_raw_path", type=str)
+
+   parser.add_argument("--temperature", type=float, default=0.8)
+   parser.add_argument("--top_p", type=float, default=0.9)
+   parser.add_argument("--top_k", type=int, default=40)
+   parser.add_argument("--repetition_penalty", type=float, default=2.0)
+   parser.add_argument("--max_new_tokens", type=int, default=48)
+   parser.add_argument("--do_sample", type=bool, default=True)
+   parser.add_argument("--num_beams", type=int, default=2)
+   parser.add_argument("--num_return_sequences", type=int, default=2)
+   parser.add_argument("--use_cache", type=bool, default=True)
+
+   parser.add_argument("--range_for_sampling", type=str, default='2-10')
+   parser.add_argument("--negative_number", type=int, default=5)
+   parser.add_argument("--use_gpu_for_searching", type=bool, default=True) 
+
+   parser.add_argument("--similarity_threshold", type=float, default=0.6)
+
+   args = parser.parse_args()
+
+   llm_model = args.llm_model
+   embedding_model = args.embedding_model
+   input_dir = args.input_dir
+   output_raw_path = args.output_raw_path
+
+   temperature = args.temperature
+   top_p = args.top_p
+   top_k = args.top_k
+   repetition_penalty = args.repetition_penalty
+   max_new_tokens = args.max_new_tokens
+   do_sample = args.do_sample
+   num_beams = args.num_beams
+   num_return_sequences = args.num_return_sequences
+   use_cache = args.use_cache
+
+   range_for_sampling=args.range_for_sampling
+   negative_number=args.negative_number
+   use_gpu_for_searching=args.use_gpu_for_searching
+
+   similarity_threshold=args.similarity_threshold
+
+   construct_retrieval_dataset(
+      llm_model, 
+      embedding_model, 
+      input_dir, 
+      output_raw_path,
+      temperature,
+      top_p,
+      top_k,
+      repetition_penalty,
+      max_new_tokens,
+      do_sample,
+      num_beams,
+      num_return_sequences,
+      use_cache, 
+      range_for_sampling, 
+      negative_number, 
+      use_gpu_for_searching,
+      similarity_threshold)
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file

From 06f8162137d863b65c78293d654fbe84888069a2 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 1 Mar 2024 03:13:58 +0000
Subject: [PATCH 002/151] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../tools/evaluation/llm_generate_raw_data.py | 30 +++++++++----------
 .../mine_hard_negatives_check_similarity.py   |  3 +-
 .../tools/evaluation/requirements.txt         | 14 ++++-----
 .../retrieval_dataset_construction.py         | 30 +++++++++----------
 4 files changed, 38 insertions(+), 39 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/llm_generate_raw_data.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/llm_generate_raw_data.py
index d1476d13518..45860591e4e 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/llm_generate_raw_data.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/llm_generate_raw_data.py
@@ -68,7 +68,7 @@ def data_load(self, input, **kwargs):
                 logging.error("The given link/str is unavailable. Please try another one!")
         else:
             logging.error("The input format is invalid!")
-               
+
         documents = []
         for data, metadata in data_collection:
              if len(data) < 5:
@@ -149,22 +149,22 @@ def raw_data_generate(model_id, base_dir, file_json_path,temperature,top_p,top_k
    documents = DocumentLoading().data_load(input=base_dir)
 
    generation_config = GenerationConfig(
-   temperature = temperature, 
-   top_p = top_p, 
-   top_k = top_k, 
-   repetition_penalty = repetition_penalty, 
-   max_new_tokens = max_new_tokens, 
-   do_sample = do_sample, 
-   num_beams = num_beams, 
-   num_return_sequences = num_return_sequences, 
+   temperature = temperature,
+   top_p = top_p,
+   top_k = top_k,
+   repetition_penalty = repetition_penalty,
+   max_new_tokens = max_new_tokens,
+   do_sample = do_sample,
+   num_beams = num_beams,
+   num_return_sequences = num_return_sequences,
    use_cache = use_cache,
    pad_token_id=tokenizer.eos_token_id
    )
 
    for i in range(len(documents)):
       context = documents[i]
-      
-      if context: 
+
+      if context:
          input = QUERYGENERATE_PROMPT.format(context=context)
 
          model_input = tokenizer(input, return_tensors="pt").to("cuda")
@@ -177,15 +177,15 @@ def raw_data_generate(model_id, base_dir, file_json_path,temperature,top_p,top_k
                res=tokenizer.decode(res, skip_special_tokens=True)
 
             res = res[res.find('Generated questions:') :]
-            res = re.sub('Generated questions:', '', res) 
-            res = re.sub('---', '', res) 
+            res = re.sub('Generated questions:', '', res)
+            res = re.sub('---', '', res)
 
             res = res.split("?")[0:2]
             for r in res:
                r = r.replace('1.', "").replace('2.', "")
                r = r.replace('Evaluation:', "")
                r = r.replace('#', " ").replace(r'\t', " ").replace('\n', ' ').replace('\n\n', ' ').strip()
-               r = r + '?'    
+               r = r + '?'
                result.append(r)
 
          result_str=''
@@ -199,4 +199,4 @@ def raw_data_generate(model_id, base_dir, file_json_path,temperature,top_p,top_k
                      "pos": [context],
                }
             with jsonlines.open(file_json_path,"a") as file_json:
-                  file_json.write(data)
\ No newline at end of file
+                  file_json.write(data)
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/mine_hard_negatives_check_similarity.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/mine_hard_negatives_check_similarity.py
index e922126bc64..7869a51a706 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/mine_hard_negatives_check_similarity.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/mine_hard_negatives_check_similarity.py
@@ -19,7 +19,7 @@
 from hn_mine import find_knn_neg
 from sentence_transformers import SentenceTransformer
 
-def mine_hard_negatives(model_name_or_path, input_file, output_file, range_for_sampling, negative_number, use_gpu_for_searching):   
+def mine_hard_negatives(model_name_or_path, input_file, output_file, range_for_sampling, negative_number, use_gpu_for_searching):
    candidate_pool=None
 
    sample_range = range_for_sampling.split('-')
@@ -59,4 +59,3 @@ def similarity_check(file_jsonl_path,file_json_split_path,model_name_or_path, si
                      }
                   with jsonlines.open(file_json_split_path,"a") as file_json:
                      file_json.write(data)
-
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/requirements.txt b/intel_extension_for_transformers/neural_chat/tools/evaluation/requirements.txt
index 769e9e8898f..0466e299d3d 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/requirements.txt
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/requirements.txt
@@ -1,16 +1,16 @@
-torch
-modelscope
-jsonlines
+accelerate
 beautifulsoup4
 easyocr
+faiss-gpu
+jsonlines
 langchain==0.0.354
 langchain_core==0.1.18
 langid
+modelscope
+numpy
+openpyxl
 PyMuPDF
 python-docx
 sentence-transformers==2.3.1
-numpy
-faiss-gpu
+torch
 transformers
-accelerate
-openpyxl
\ No newline at end of file
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retrieval_dataset_construction.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retrieval_dataset_construction.py
index d345992a556..1f8b4a1057f 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/retrieval_dataset_construction.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retrieval_dataset_construction.py
@@ -20,9 +20,9 @@
 import argparse
 
 def construct_retrieval_dataset(
-      llm_model, 
-      embedding_model, 
-      input_dir, 
+      llm_model,
+      embedding_model,
+      input_dir,
       output_raw_path,
       temperature,
       top_p,
@@ -32,16 +32,16 @@ def construct_retrieval_dataset(
       do_sample,
       num_beams,
       num_return_sequences,
-      use_cache, 
-      range_for_sampling, 
-      negative_number, 
+      use_cache,
+      range_for_sampling,
+      negative_number,
       use_gpu_for_searching,
       similarity_threshold):
 
    raw_data_generate(llm_model,input_dir,output_raw_path,temperature,top_p,top_k,repetition_penalty,max_new_tokens,do_sample,num_beams,num_return_sequences,use_cache)
 
    output_hn_path=output_raw_path+'_minedHN.jsonl'
- 
+
    mine_hard_negatives(embedding_model, output_raw_path, output_hn_path, range_for_sampling, negative_number, use_gpu_for_searching)
 
    output_json_split_path = output_raw_path+"_minedHN_split.jsonl"
@@ -67,7 +67,7 @@ def main():
 
    parser.add_argument("--range_for_sampling", type=str, default='2-10')
    parser.add_argument("--negative_number", type=int, default=5)
-   parser.add_argument("--use_gpu_for_searching", type=bool, default=True) 
+   parser.add_argument("--use_gpu_for_searching", type=bool, default=True)
 
    parser.add_argument("--similarity_threshold", type=float, default=0.6)
 
@@ -95,9 +95,9 @@ def main():
    similarity_threshold=args.similarity_threshold
 
    construct_retrieval_dataset(
-      llm_model, 
-      embedding_model, 
-      input_dir, 
+      llm_model,
+      embedding_model,
+      input_dir,
       output_raw_path,
       temperature,
       top_p,
@@ -107,11 +107,11 @@ def main():
       do_sample,
       num_beams,
       num_return_sequences,
-      use_cache, 
-      range_for_sampling, 
-      negative_number, 
+      use_cache,
+      range_for_sampling,
+      negative_number,
       use_gpu_for_searching,
       similarity_threshold)
 
 if __name__ == '__main__':
-    main()
\ No newline at end of file
+    main()

From 5ef03325cac5efe6db226067afbd55e27b21c53b Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Fri, 1 Mar 2024 14:58:03 +0800
Subject: [PATCH 003/151] Update llm_generate_raw_data.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tools/evaluation/llm_generate_raw_data.py | 126 ++----------------
 1 file changed, 12 insertions(+), 114 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/llm_generate_raw_data.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/llm_generate_raw_data.py
index 45860591e4e..99c00842437 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/llm_generate_raw_data.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/llm_generate_raw_data.py
@@ -21,8 +21,7 @@
 import jsonlines
 import os, re
 from typing import List
-from context_utils import load_unstructured_data, load_structured_data, get_chuck_data
-from html_parser import load_html_data
+from intel_extension_for_transformers.neural_chat.pipeline.plugins.retrieval.parser.parser import DocumentParser
 import logging
 from intel_extension_for_transformers.neural_chat.prompts.prompt import QUERYGENERATE_PROMPT
 from transformers import GenerationConfig
@@ -33,120 +32,19 @@
     level=logging.INFO
 )
 
-class DocumentLoading:
-    def __init__(self, max_chuck_size=512, min_chuck_size=5, process=True):
-        """
-        Wrapper for document parsing.
-        """
-        self.max_chuck_size = max_chuck_size
-        self.min_chuck_size = min_chuck_size
-        self.process = process
-
-
-    def data_load(self, input, **kwargs):
-        """
-        The API for loading the file. Support single file, batch files, and urls parsing.
-        """
-        if 'max_chuck_size' in kwargs:
-            self.max_chuck_size=kwargs['max_chuck_size']
-        if 'min_chuck_size' in kwargs:
-            self.min_chuck_size = kwargs['min_chuck_size']
-        if 'process' in kwargs:
-            self.process = kwargs['process']
-
-        if isinstance(input, str):
-            if os.path.isfile(input):
-                data_collection = self.parse_document(input)
-            elif os.path.isdir(input):
-                data_collection = self.batch_parse_document(input)
-            else:
-                logging.error("Please check your upload file and try again!")
-        elif isinstance(input, List):
-            try:
-                data_collection = self.parse_html(input)
-            except:
-                logging.error("The given link/str is unavailable. Please try another one!")
-        else:
-            logging.error("The input format is invalid!")
-
-        documents = []
-        for data, metadata in data_collection:
-             if len(data) < 5:
-                continue
-             documents.append(data)
-        return documents
-
-
-    def parse_document(self, input):
-        """
-        Parse the uploaded file.
-        """
-        if input.endswith("pdf") or input.endswith("docx") or input.endswith("html") \
-           or input.endswith("txt") or input.endswith("md"):
-            content = load_unstructured_data(input)
-            if self.process:
-                chuck = get_chuck_data(content, self.max_chuck_size, self.min_chuck_size, input)
-            else:
-                chuck = [[content.strip(),input]]
-        elif input.endswith("jsonl") or input.endswith("xlsx") or input.endswith("csv") or \
-                input.endswith("json"):
-            chuck = load_structured_data(input, self.process, \
-                                         self.max_chuck_size, self.min_chuck_size)
-        else:
-            logging.info("This file {} is ignored. Will support this file format soon.".format(input))
-            raise Exception("[Rereieval ERROR] Document format not supported!")
-        return chuck
-
-    def parse_html(self, input):
-        """
-        Parse the uploaded file.
-        """
-        chucks = []
-        for link in input:
-            if re.match(r'^https?:/{2}\w.+$', link):
-                content = load_html_data(link)
-                if content == None:
-                    continue
-                if self.process:
-                    chuck = get_chuck_data(content, self.max_chuck_size, self.min_chuck_size, link)
-                else:
-                    chuck = [[content.strip(), link]]
-                chucks += chuck
-            else:
-                logging.error("The given link/str {} cannot be parsed.".format(link))
-
-        return chucks
-
-
-    def batch_parse_document(self, input):
-        """
-        Parse the uploaded batch files in the input folder.
-        """
-        paragraphs = []
-        for dirpath, dirnames, filenames in os.walk(input):
-            for filename in filenames:
-                if filename.endswith("pdf") or filename.endswith("docx") or filename.endswith("html") \
-                    or filename.endswith("txt") or filename.endswith("md"):
-                    content = load_unstructured_data(os.path.join(dirpath, filename))
-                    if self.process:
-                        chuck = get_chuck_data(content, self.max_chuck_size, self.min_chuck_size, input)
-                    else:
-                        chuck = [[content.strip(),input]]
-                    paragraphs += chuck
-                elif filename.endswith("jsonl") or filename.endswith("xlsx") or filename.endswith("csv") or \
-                        filename.endswith("json"):
-                    chuck = load_structured_data(os.path.join(dirpath, filename), \
-                                                 self.process, self.max_chuck_size, self.min_chuck_size)
-                    paragraphs += chuck
-                else:
-                    logging.info("This file {} is ignored. Will support this file format soon.".format(filename))
-                    raise Exception("[Rereieval ERROR] Document format not supported!")
-        return paragraphs
-
+def document_append(data_collection):
+    documents = []
+    for data, metadata in data_collection:
+        if len(data) < 5:
+            continue
+        documents.append(data)
+    return documents
+    
 def raw_data_generate(model_id, base_dir, file_json_path,temperature,top_p,top_k,repetition_penalty,max_new_tokens,do_sample,num_beams,num_return_sequences,use_cache):
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto', torch_dtype=torch.float16)
-   documents = DocumentLoading().data_load(input=base_dir)
+   data_collection = DocumentParser().data_load(input=base_dir)
+   documents = document_append(data_collection)
 
    generation_config = GenerationConfig(
    temperature = temperature,
@@ -199,4 +97,4 @@ def raw_data_generate(model_id, base_dir, file_json_path,temperature,top_p,top_k
                      "pos": [context],
                }
             with jsonlines.open(file_json_path,"a") as file_json:
-                  file_json.write(data)
+                  file_json.write(data)

From ee1db83212c0049e040cdffd577d95f209486bd1 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Fri, 1 Mar 2024 14:58:25 +0800
Subject: [PATCH 004/151] Delete
 intel_extension_for_transformers/neural_chat/tools/evaluation/context_utils.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tools/evaluation/context_utils.py         | 246 ------------------
 1 file changed, 246 deletions(-)
 delete mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/context_utils.py

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/context_utils.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/context_utils.py
deleted file mode 100644
index 96fa0c54509..00000000000
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/context_utils.py
+++ /dev/null
@@ -1,246 +0,0 @@
-# !/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2023 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unicodedata
-import pandas as pd
-import re, json
-from langchain.document_loaders import UnstructuredMarkdownLoader
-from docx import Document as DDocument
-from bs4 import BeautifulSoup
-import fitz
-import easyocr
-from PIL import Image
-import numpy as np
-import io
-
-def uni_pro(text):
-    """Check if the character is ASCII or falls in the category of non-spacing marks."""
-    normalized_text = unicodedata.normalize('NFKD', text)
-    filtered_text = ''
-    for char in normalized_text:
-        if ord(char) < 128 or unicodedata.category(char) == 'Mn':
-            filtered_text += char
-    return filtered_text
-
-
-def read_pdf(pdf_path):
-    """Read the pdf file."""
-    doc = fitz.open(pdf_path)
-    reader = easyocr.Reader(['en'])
-    result =''
-    for i in range(doc.page_count):
-        page = doc.load_page(i)
-        pagetext = page.get_text().strip()
-        if pagetext:
-            if pagetext.endswith('!') or pagetext.endswith('?') or pagetext.endswith('.'):
-                result=result+pagetext
-            else:
-                result=result+pagetext+'.'
-        if len(doc.get_page_images(i)) > 0 :
-            for img in doc.get_page_images(i):
-                if img:
-                    pageimg=''
-                    xref = img[0]
-                    img_data = doc.extract_image(xref)
-                    img_bytes = img_data['image']
-                    pil_image = Image.open(io.BytesIO(img_bytes))
-                    img = np.array(pil_image)
-                    img_result = reader.readtext(img, paragraph=True, detail=0)
-                    pageimg=pageimg + ', '.join(img_result).strip()
-                    if pageimg.endswith('!') or pageimg.endswith('?') or pageimg.endswith('.'):
-                        pass
-                    else:
-                        pageimg=pageimg+'.'
-                result=result+pageimg
-    return result
-
-
-def read_html(html_path):
-    """Read the html file."""
-    with open(html_path, 'r', encoding="utf-8") as file:
-        html = file.read()
-    soup = BeautifulSoup(html, 'html.parser')
-    text = soup.get_text(strip=True)
-    return text
-
-
-def read_txt(txt_path):
-    """Read txt file."""
-    with open(txt_path, 'r') as file:
-        text = file.read()
-    return text
-
-
-def read_docx(doc_path):
-    """Read docx file."""
-    doc = DDocument(doc_path)
-    text = ''
-    for paragraph in doc.paragraphs:
-        text += paragraph.text
-    return text
-
-
-def read_md(md_path):
-    """Read docx file."""
-    loader = UnstructuredMarkdownLoader(md_path)
-    text = loader.load()[0].page_content
-    return text
-
-
-def load_json(input, process, max_length, min_length):
-    """Load and process json file."""
-    data = []
-    with open(input, 'r') as file:
-        for line in file:
-            json_obj = json.loads(line)
-            data.append(json_obj)
-
-    new_sens = []
-    new_collect = []
-    for sub in data:
-        sub['content'].replace('#', " ")
-        sub['content'] = re.sub(r'\s+', ' ', sub['content'])
-        if not process:
-            if len(sub['content']) < min_length:
-                continue
-            new_doc = [sub['content'], sub['link']]
-            new_collect.append(new_doc)
-        else:
-            for sub in data:
-                sub['content'].replace('#', " ")
-                if len(sub['content'])<min_length:
-                    continue
-                split_sen = re.split(r'[.?!]', sub['content'])
-                for num in range(len(split_sen)):
-                    split_sen[num] = re.sub(r'\s+', ' ', split_sen[num])
-                    if num +1 < len(split_sen):
-                        if len(split_sen[num]) >max_length:
-                            new_sens.append(split_sen[num].strip())
-                        else:
-                            split_sen[num +1] =split_sen[num] +split_sen[num+1]
-                    else:
-                        new_sens.append(split_sen[num])
-
-            paragraphs = list(set(new_sens))
-            for paragraph in paragraphs:
-                new_doc = [paragraph, sub['link']]
-                new_collect.append(new_doc)
-    return new_collect
-
-
-def load_xlsx(input):
-    """Load and process xlsx file."""
-    df = pd.read_excel(input)
-    header = df.columns.tolist()
-    all_data = []
-    if 'Questions' in header and 'Answers' in header:
-        for index, row in df.iterrows():
-            sub = row["Answers"]
-            sub=sub.replace('#', " ")
-            sub = sub.replace(r'\t', " ")
-            sub = sub.replace('\n', ' ')
-            sub = sub.replace('\n\n', ' ')
-            sub = re.sub(r'\s+', ' ', sub)
-            new_doc = [sub, input]
-            all_data.append(new_doc)
-    elif 'question' in header and 'answer' in header and 'link' in header:
-        for index, row in df.iterrows():
-            sub = row["answer"]
-            sub = sub.replace('#', " ")
-            sub = sub.replace(r'\t', " ")
-            sub = sub.replace('\n', ' ')
-            sub = sub.replace('\n\n', ' ')
-            sub = re.sub(r'\s+', ' ', sub)
-            all_data.append([sub, row['link']])
-    elif 'context' in header and 'link' in header:
-        for index, row in df.iterrows():
-            sub = row['context']
-            sub = sub.replace('#', " ")
-            sub = sub.replace(r'\t', " ")
-            sub = sub.replace('\n', ' ')
-            sub = sub.replace('\n\n', ' ')
-            sub = re.sub(r'\s+', ' ', sub)
-            all_data.append([sub, row['link']])
-    return all_data
-
-def load_csv(input):
-    """ Load the csv file."""
-    df = pd.read_csv(input)
-    all_data = []
-    documents = []
-    for index, row in df.iterrows():
-        sub = row["correct_answer"]
-        all_data.append(sub)
-
-    for data in all_data:
-        data.replace('#', " ")
-        data = re.sub(r'\s+', ' ', data)
-        new_doc = [data, input]
-        documents.append(new_doc)
-    return documents
-
-def load_structured_data(input, process, max_length, min_length):
-    """Load structured context."""
-    if input.endswith("jsonl") or input.endswith("json"):
-        content = load_json(input, process, max_length, min_length)
-    elif input.endswith("xlsx"):
-        content = load_xlsx(input)
-    elif input.endswith("csv"):
-        content = load_csv(input)
-    return content
-
-def load_unstructured_data(input):
-    """Load unstructured context."""
-    if input.endswith("pdf"):
-        text = read_pdf(input)
-    elif input.endswith("docx"):
-        text = read_docx(input)
-    elif input.endswith("html"):
-        text = read_html(input)
-    elif input.endswith("txt"):
-        text = read_txt(input)
-    elif input.endswith("md"):
-        text = read_md(input)
-
-    text = text.replace('\n', ' ')
-    text = text.replace('\n\n', ' ')
-    text = uni_pro(text)
-    text = re.sub(r'\s+', ' ', text)
-    return text
-
-def get_chuck_data(content, max_length, min_length, input):
-    """Process the context to make it maintain a suitable length for the generation."""
-    sentences = re.split('(?<=[!.?])', content)
-
-    paragraphs = []
-    current_length = 0
-    count = 0
-    current_paragraph = ""
-    for sub_sen in sentences:
-        count +=1
-        sentence_length = len(sub_sen)
-        if current_length + sentence_length <= max_length:
-            current_paragraph += sub_sen
-            current_length += sentence_length
-            if count == len(sentences) and len(current_paragraph.strip())>min_length:
-                paragraphs.append([current_paragraph.strip() ,input])
-        else:
-            paragraphs.append([current_paragraph.strip() ,input])
-            current_paragraph = sub_sen
-            current_length = sentence_length
-
-    return paragraphs

From 89597f281bbb80462a03d912bf282a64348f650c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 1 Mar 2024 06:58:23 +0000
Subject: [PATCH 005/151] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../neural_chat/tools/evaluation/llm_generate_raw_data.py       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/llm_generate_raw_data.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/llm_generate_raw_data.py
index 99c00842437..b4733fa0e90 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/llm_generate_raw_data.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/llm_generate_raw_data.py
@@ -39,7 +39,7 @@ def document_append(data_collection):
             continue
         documents.append(data)
     return documents
-    
+
 def raw_data_generate(model_id, base_dir, file_json_path,temperature,top_p,top_k,repetition_penalty,max_new_tokens,do_sample,num_beams,num_return_sequences,use_cache):
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto', torch_dtype=torch.float16)

From b132d66b9158ae61e5905282f7aba1e3af639fa0 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Fri, 1 Mar 2024 14:58:40 +0800
Subject: [PATCH 006/151] Delete
 intel_extension_for_transformers/neural_chat/tools/evaluation/html_parser.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tools/evaluation/html_parser.py           | 195 ------------------
 1 file changed, 195 deletions(-)
 delete mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/html_parser.py

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/html_parser.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/html_parser.py
deleted file mode 100644
index f2f4f1bedd5..00000000000
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/html_parser.py
+++ /dev/null
@@ -1,195 +0,0 @@
-# !/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2023 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import requests
-from urllib.parse import urlparse, urlunparse
-import multiprocessing
-import urllib3
-import langid
-from bs4 import BeautifulSoup
-import os
-import re
-from context_utils import uni_pro
-import logging
-
-logging.basicConfig(
-    format="%(asctime)s %(name)s:%(levelname)s:%(message)s",
-    datefmt="%d-%M-%Y %H:%M:%S",
-    level=logging.INFO
-)
-urllib3.disable_warnings()
-
-
-class Crawler:
-    def __init__(self, pool=None):
-        if pool:
-            assert isinstance(pool, (str, list, tuple)), 'url pool should be str, list or tuple'
-        self.pool = pool
-        self.headers = {
-            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng, \
-            */*;q=0.8,application/signed-exchange;v=b3;q=0.7',
-            'Accept-Encoding': 'gzip, deflate, br',
-            'Accept-Language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7',
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, \
-            like Gecko) Chrome/113.0.0.0 Safari/537.36'
-        }
-        self.fetched_pool = set()
-
-    def get_sublinks(self, soup):
-        sublinks = []
-        for links in soup.find_all('a'):
-            sublinks.append(str(links.get('href')))
-        return sublinks
-
-    def get_hyperlink(self, soup, base_url):
-        sublinks = []
-        for links in soup.find_all('a'):
-            link = str(links.get('href'))
-            if link.startswith('#') or link is None or link == 'None':
-                continue
-            suffix = link.split('/')[-1]
-            if '.' in suffix and suffix.split('.')[-1] not in ['html', 'htmld']:
-                continue
-            link_parse = urlparse(link)
-            base_url_parse = urlparse(base_url)
-            if link_parse.path == '':
-                continue
-            if link_parse.netloc != '':
-                # keep crawler works in the same domain
-                if link_parse.netloc != base_url_parse.netloc:
-                    continue
-                sublinks.append(link)
-            else:
-                sublinks.append(urlunparse((base_url_parse.scheme,
-                                            base_url_parse.netloc,
-                                            link_parse.path,
-                                            link_parse.params,
-                                            link_parse.query,
-                                            link_parse.fragment)))
-        return sublinks
-
-    def fetch(self, url, headers=None, max_times=5):
-        if not headers:
-            headers = self.headers
-        while max_times:
-            if not url.startswith('http') or not url.startswith('https'):
-                url = 'http://' + url
-            logging.info('start fetch %s...', url)
-            try:
-                response = requests.get(url, headers=headers, verify=True)
-                if response.status_code != 200:
-                    logging.error('fail to fetch %s, response status code: %s', url, response.status_code)
-                else:
-                    return response
-            except Exception as e:
-                logging.error('fail to fetch %s, caused by %s', url, e)
-            max_times -= 1
-        return None
-
-    def process_work(self, sub_url, work):
-        response = self.fetch(sub_url)
-        if response is None:
-            return []
-        self.fetched_pool.add(sub_url)
-        soup = self.parse(response.text)
-        base_url = self.get_base_url(sub_url)
-        sublinks = self.get_hyperlink(soup, base_url)
-        if work:
-            work(sub_url, soup)
-        return sublinks
-
-    def crawl(self, pool, work=None, max_depth=10, workers=10):
-        url_pool = set()
-        for url in pool:
-            base_url = self.get_base_url(url)
-            response = self.fetch(url)
-            soup = self.parse(response.text)
-            sublinks = self.get_hyperlink(soup, base_url)
-            self.fetched_pool.add(url)
-            url_pool.update(sublinks)
-            depth = 0
-            while len(url_pool) > 0 and depth < max_depth:
-                logging.info('current depth %s...', depth)
-                mp = multiprocessing.Pool(processes=workers)
-                results = []
-                for sub_url in url_pool:
-                    if sub_url not in self.fetched_pool:
-                        results.append(mp.apply_async(self.process_work, (sub_url, work)))
-                mp.close()
-                mp.join()
-                url_pool = set()
-                for result in results:
-                    sublinks = result.get()
-                    url_pool.update(sublinks)
-                depth += 1
-
-    def parse(self, html_doc):
-        soup = BeautifulSoup(html_doc, 'lxml')
-        return soup
-
-    def download(self, url, file_name):
-        logging.info('download %s into %s...', url, file_name)
-        try:
-            r = requests.get(url, stream=True, headers=self.headers, verify=True)
-            f = open(file_name, "wb")
-            for chunk in r.iter_content(chunk_size=512):
-                if chunk:
-                    f.write(chunk)
-        except Exception as e:
-            logging.error('fail to download %s, caused by %s', url, e)
-
-    def get_base_url(self, url):
-        result = urlparse(url)
-        return urlunparse((result.scheme, result.netloc, '', '', '', ''))
-
-    def clean_text(self, text):
-        text = text.strip().replace('\r', '\n')
-        text = re.sub(' +', ' ', text)
-        text = re.sub('\n+', '\n', text)
-        text = text.split('\n')
-        return '\n'.join([i for i in text if i and i != ' '])
-
-
-def load_html_data(url):
-    crawler = Crawler()
-    res = crawler.fetch(url)
-    if res == None:
-        return None
-    soup = crawler.parse(res.text)
-    all_text = crawler.clean_text(soup.select_one('body').text)
-    main_content = ''
-    for element_name in ['main', 'container']:
-        main_block = None
-        if soup.select(f'.{element_name}'):
-            main_block = soup.select(f'.{element_name}')
-        elif soup.select(f'#{element_name}'):
-            main_block = soup.select(f'#{element_name}')
-        if main_block:
-            for element in main_block:
-                text = crawler.clean_text(element.text)
-                if text not in main_content:
-                    main_content += f'\n{text}'
-            main_content = crawler.clean_text(main_content)
-
-    main_content = main_content.replace('\n', '')
-    main_content = main_content.replace('\n\n', '')
-    main_content = uni_pro(main_content)
-    main_content = re.sub(r'\s+', ' ', main_content)
-
-    # {'text': all_text, 'main_content': main_content}
-
-    return main_content

From 8e955ced83c03f86ca42e6be38b806e267344361 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Fri, 1 Mar 2024 15:34:37 +0800
Subject: [PATCH 007/151] update

---
 .../evaluation/data_augmentation/__init__.py  |  16 +++
 .../evaluation/data_augmentation/hn_mine.py   |  97 +++++++++++++
 .../llm_generate_raw_data.py                  | 111 ++++++++++++++
 .../mine_hard_negatives_check_similarity.py   |  66 +++++++++
 .../data_augmentation/requirements.txt        |  16 +++
 .../retrieval_dataset_construction.py         | 136 ++++++++++++++++++
 .../tools/evaluation/framework/__init__.py    |  16 +++
 .../tools/evaluation/retriever/__init__.py    |  16 +++
 8 files changed, 474 insertions(+)
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/__init__.py
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/hn_mine.py
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/mine_hard_negatives_check_similarity.py
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/requirements.txt
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/framework/__init__.py
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/__init__.py

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/__init__.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/__init__.py
new file mode 100644
index 00000000000..18896e7b549
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/__init__.py
@@ -0,0 +1,16 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/hn_mine.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/hn_mine.py
new file mode 100644
index 00000000000..976713a846c
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/hn_mine.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import random
+import numpy as np
+import faiss
+from tqdm import tqdm
+
+def create_index(embeddings, use_gpu):
+    index = faiss.IndexFlatIP(len(embeddings[0]))
+    embeddings = np.asarray(embeddings, dtype=np.float32)
+    if use_gpu:
+        co = faiss.GpuMultipleClonerOptions() # pylint: disable=E1101
+        co.shard = True
+        co.useFloat16 = True
+        index = faiss.index_cpu_to_all_gpus(index, co=co)
+    index.add(embeddings)
+    return index
+
+def batch_search(index,
+                 query,
+                 topk: int = 200,
+                 batch_size: int = 64):
+    all_scores, all_inxs = [], []
+    for start_index in tqdm(range(0, len(query), batch_size), desc="Batches", disable=len(query) < 256):
+        batch_query = query[start_index:start_index + batch_size]
+        batch_scores, batch_inxs = index.search(np.asarray(batch_query, dtype=np.float32), k=topk)
+        all_scores.extend(batch_scores.tolist())
+        all_inxs.extend(batch_inxs.tolist())
+    return all_scores, all_inxs
+
+def get_corpus(candidate_pool):
+    corpus = []
+    for line in open(candidate_pool):
+        line = json.loads(line.strip())
+        corpus.append(line['text'])
+    return corpus
+
+def find_knn_neg(model, input_file, candidate_pool, output_file, sample_range, negative_number, use_gpu):
+    corpus = []
+    queries = []
+    train_data = []
+    for line in open(input_file):
+        line = json.loads(line.strip())
+        train_data.append(line)
+        corpus.extend(line['pos'])
+        if 'neg' in line:
+            corpus.extend(line['neg'])
+        queries.append(line['query'])
+
+    if candidate_pool is not None:
+        if not isinstance(candidate_pool, list):
+            candidate_pool = get_corpus(candidate_pool)
+        corpus = list(set(candidate_pool))
+    else:
+        corpus = list(set(corpus))
+
+    p_vecs = model.encode(corpus, batch_size=256)
+    q_vecs = model.encode(queries, batch_size=256)
+
+    index = create_index(p_vecs, use_gpu=use_gpu)
+    _, all_inxs = batch_search(index, q_vecs, topk=sample_range[-1])
+    assert len(all_inxs) == len(train_data)
+
+    for i, data in enumerate(train_data):
+        query = data['query']
+        inxs = all_inxs[i][sample_range[0]:sample_range[1]]
+        filtered_inx = []
+        for inx in inxs:
+            if inx == -1: break
+            if corpus[inx] not in data['pos'] and corpus[inx] != query:
+                filtered_inx.append(inx)
+
+        if len(filtered_inx) > negative_number:
+            filtered_inx = random.sample(filtered_inx, negative_number)
+        data['neg'] = [corpus[inx] for inx in filtered_inx]
+
+    with open(output_file, 'w') as f:
+        for data in train_data:
+            if len(data['neg']) < negative_number:
+                data['neg'].extend(random.sample(corpus, negative_number - len(data['neg'])))
+            f.write(json.dumps(data, ensure_ascii=False) + '\n')
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py
new file mode 100644
index 00000000000..139a3465475
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py
@@ -0,0 +1,111 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import torch
+from modelscope import AutoModelForCausalLM, AutoTokenizer  # pylint: disable=E0401
+import jsonlines
+import os, re
+from typing import List
+from intel_extension_for_transformers.neural_chat.pipeline.plugins.retrieval.parser.parser import DocumentParser
+import logging
+from intel_extension_for_transformers.neural_chat.prompts.prompt import QUERYGENERATE_PROMPT  # pylint: disable=E0611
+from transformers import GenerationConfig
+
+logging.basicConfig(
+    format="%(asctime)s %(name)s:%(levelname)s:%(message)s",
+    datefmt="%d-%M-%Y %H:%M:%S",
+    level=logging.INFO
+)
+
+def document_append(data_collection):
+    documents = []
+    for data, metadata in data_collection:
+        if len(data) < 5:
+            continue
+        documents.append(data)
+    return documents
+
+def raw_data_generate(model_id, 
+                      base_dir, 
+                      file_json_path,
+                      temperature,
+                      top_p,
+                      top_k,
+                      repetition_penalty,
+                      max_new_tokens,
+                      do_sample,
+                      num_beams,
+                      num_return_sequences,
+                      use_cache):
+   tokenizer = AutoTokenizer.from_pretrained(model_id)
+   model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto', torch_dtype=torch.float16)
+   data_collection = DocumentParser().data_load(input=base_dir)
+   documents = document_append(data_collection)
+
+   generation_config = GenerationConfig(
+   temperature = temperature,
+   top_p = top_p,
+   top_k = top_k,
+   repetition_penalty = repetition_penalty,
+   max_new_tokens = max_new_tokens,
+   do_sample = do_sample,
+   num_beams = num_beams,
+   num_return_sequences = num_return_sequences,
+   use_cache = use_cache,
+   pad_token_id=tokenizer.eos_token_id
+   )
+
+   for i in range(len(documents)):
+      context = documents[i]
+
+      if context:
+         input = QUERYGENERATE_PROMPT.format(context=context)
+
+         model_input = tokenizer(input, return_tensors="pt").to("cuda")
+         model.eval()
+         result = []
+
+         for j in range(5):
+            with torch.no_grad():
+               res = model.generate(**model_input, generation_config=generation_config)[0]
+               res=tokenizer.decode(res, skip_special_tokens=True)
+
+            res = res[res.find('Generated questions:') :]
+            res = re.sub('Generated questions:', '', res)
+            res = re.sub('---', '', res)
+
+            res = res.split("?")[0:2]
+            for r in res:
+               r = r.replace('1.', "").replace('2.', "")
+               r = r.replace('Evaluation:', "")
+               r = r.replace('#', " ").replace(r'\t', " ").replace('\n', ' ').replace('\n\n', ' ').strip()
+               r = r + '?'
+               result.append(r)
+
+         result_str=''
+         result_set = list(set(result))
+         for k in range(len(result_set)):
+            result_str = result_str + str(k) + '. '+ result_set[k]
+
+         if result_str and result_str.isspace()==False:
+            data = {
+                     "query": result_str,
+                     "pos": [context],
+               }
+            with jsonlines.open(file_json_path,"a") as file_json:
+                  file_json.write(data)
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/mine_hard_negatives_check_similarity.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/mine_hard_negatives_check_similarity.py
new file mode 100644
index 00000000000..77a95eff8f1
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/mine_hard_negatives_check_similarity.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import jsonlines
+from hn_mine import find_knn_neg
+from sentence_transformers import SentenceTransformer
+
+def mine_hard_negatives(model_name_or_path, 
+                        input_file, 
+                        output_file, 
+                        range_for_sampling, 
+                        negative_number, 
+                        use_gpu_for_searching):
+   candidate_pool=None
+
+   sample_range = range_for_sampling.split('-')
+   sample_range = [int(x) for x in sample_range]
+
+   model = SentenceTransformer(model_name_or_path)
+
+   find_knn_neg(model,
+               input_file=input_file,
+               candidate_pool=candidate_pool,
+               output_file=output_file,
+               sample_range=sample_range,
+               negative_number=negative_number,
+               use_gpu=use_gpu_for_searching)
+
+def similarity_score(queries,passages,model_name_or_path):
+   queries = [queries]
+   passages = passages
+   instruction = ""
+   model = SentenceTransformer(model_name_or_path)
+   q_embeddings = model.encode([instruction+q for q in queries], normalize_embeddings=True)
+   p_embeddings = model.encode(passages, normalize_embeddings=True)
+   similarity_score =  q_embeddings @ p_embeddings.T
+   return similarity_score
+
+def similarity_check(file_jsonl_path,file_json_split_path,model_name_or_path, similarity_threshold):
+   with open(file_jsonl_path) as file:
+      for stu in jsonlines.Reader(file):
+         stu["query"]=stu["query"].split("?")[:-1]
+         for i in range(len(stu["query"])):
+               stu["query"][i]=stu["query"][i].lstrip('0123456789-. ')+ '?'
+               if similarity_score(stu["query"][i],stu["pos"],model_name_or_path) >= similarity_threshold:
+                  data = {
+                        "query": stu["query"][i],
+                        "pos": stu["pos"],
+                        "neg": stu["neg"],
+                     }
+                  with jsonlines.open(file_json_split_path,"a") as file_json:
+                     file_json.write(data)
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/requirements.txt b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/requirements.txt
new file mode 100644
index 00000000000..0466e299d3d
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/requirements.txt
@@ -0,0 +1,16 @@
+accelerate
+beautifulsoup4
+easyocr
+faiss-gpu
+jsonlines
+langchain==0.0.354
+langchain_core==0.1.18
+langid
+modelscope
+numpy
+openpyxl
+PyMuPDF
+python-docx
+sentence-transformers==2.3.1
+torch
+transformers
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py
new file mode 100644
index 00000000000..b1a0b462f19
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py
@@ -0,0 +1,136 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from llm_generate_raw_data import raw_data_generate
+from mine_hard_negatives_check_similarity import mine_hard_negatives, similarity_check
+import argparse
+
+def construct_retrieval_dataset(
+      llm_model,
+      embedding_model,
+      input_dir,
+      output_raw_path,
+      temperature,
+      top_p,
+      top_k,
+      repetition_penalty,
+      max_new_tokens,
+      do_sample,
+      num_beams,
+      num_return_sequences,
+      use_cache,
+      range_for_sampling,
+      negative_number,
+      use_gpu_for_searching,
+      similarity_threshold):
+
+   raw_data_generate(llm_model,
+                     input_dir,
+                     output_raw_path,
+                     temperature,
+                     top_p,
+                     top_k,
+                     repetition_penalty,
+                     max_new_tokens,
+                     do_sample,
+                     num_beams,
+                     num_return_sequences,
+                     use_cache)
+
+   output_hn_path=output_raw_path+'_minedHN.jsonl'
+ 
+   mine_hard_negatives(embedding_model, 
+                       output_raw_path, 
+                       output_hn_path, 
+                       range_for_sampling, 
+                       negative_number, 
+                       use_gpu_for_searching)
+
+   output_json_split_path = output_raw_path+"_minedHN_split.jsonl"
+   similarity_check(output_hn_path,
+                    output_json_split_path,
+                    embedding_model,
+                    similarity_threshold)
+
+
+def main():
+   parser = argparse.ArgumentParser()
+   parser.add_argument("--llm_model", type=str)
+   parser.add_argument("--embedding_model", type=str)
+   parser.add_argument("--input_dir", type=str)
+   parser.add_argument("--output_raw_path", type=str)
+
+   parser.add_argument("--temperature", type=float, default=0.8)
+   parser.add_argument("--top_p", type=float, default=0.9)
+   parser.add_argument("--top_k", type=int, default=40)
+   parser.add_argument("--repetition_penalty", type=float, default=2.0)
+   parser.add_argument("--max_new_tokens", type=int, default=48)
+   parser.add_argument("--do_sample", type=bool, default=True)
+   parser.add_argument("--num_beams", type=int, default=2)
+   parser.add_argument("--num_return_sequences", type=int, default=2)
+   parser.add_argument("--use_cache", type=bool, default=True)
+
+   parser.add_argument("--range_for_sampling", type=str, default='2-10')
+   parser.add_argument("--negative_number", type=int, default=5)
+   parser.add_argument("--use_gpu_for_searching", type=bool, default=True)
+
+   parser.add_argument("--similarity_threshold", type=float, default=0.6)
+
+   args = parser.parse_args()
+
+   llm_model = args.llm_model
+   embedding_model = args.embedding_model
+   input_dir = args.input_dir
+   output_raw_path = args.output_raw_path
+
+   temperature = args.temperature
+   top_p = args.top_p
+   top_k = args.top_k
+   repetition_penalty = args.repetition_penalty
+   max_new_tokens = args.max_new_tokens
+   do_sample = args.do_sample
+   num_beams = args.num_beams
+   num_return_sequences = args.num_return_sequences
+   use_cache = args.use_cache
+
+   range_for_sampling=args.range_for_sampling
+   negative_number=args.negative_number
+   use_gpu_for_searching=args.use_gpu_for_searching
+
+   similarity_threshold=args.similarity_threshold
+
+   construct_retrieval_dataset(
+      llm_model,
+      embedding_model,
+      input_dir,
+      output_raw_path,
+      temperature,
+      top_p,
+      top_k,
+      repetition_penalty,
+      max_new_tokens,
+      do_sample,
+      num_beams,
+      num_return_sequences,
+      use_cache,
+      range_for_sampling,
+      negative_number,
+      use_gpu_for_searching,
+      similarity_threshold)
+
+if __name__ == '__main__':
+    main()
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/__init__.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/__init__.py
new file mode 100644
index 00000000000..18896e7b549
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/__init__.py
@@ -0,0 +1,16 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/__init__.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/__init__.py
new file mode 100644
index 00000000000..18896e7b549
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/__init__.py
@@ -0,0 +1,16 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

From 635b9063ae2367fdc8283306f22da79382c5de06 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 1 Mar 2024 07:35:29 +0000
Subject: [PATCH 008/151] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../data_augmentation/llm_generate_raw_data.py       |  4 ++--
 .../mine_hard_negatives_check_similarity.py          | 10 +++++-----
 .../retrieval_dataset_construction.py                | 12 ++++++------
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py
index 139a3465475..770a17bd0dd 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py
@@ -40,8 +40,8 @@ def document_append(data_collection):
         documents.append(data)
     return documents
 
-def raw_data_generate(model_id, 
-                      base_dir, 
+def raw_data_generate(model_id,
+                      base_dir,
                       file_json_path,
                       temperature,
                       top_p,
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/mine_hard_negatives_check_similarity.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/mine_hard_negatives_check_similarity.py
index 77a95eff8f1..67a811fa92d 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/mine_hard_negatives_check_similarity.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/mine_hard_negatives_check_similarity.py
@@ -19,11 +19,11 @@
 from hn_mine import find_knn_neg
 from sentence_transformers import SentenceTransformer
 
-def mine_hard_negatives(model_name_or_path, 
-                        input_file, 
-                        output_file, 
-                        range_for_sampling, 
-                        negative_number, 
+def mine_hard_negatives(model_name_or_path,
+                        input_file,
+                        output_file,
+                        range_for_sampling,
+                        negative_number,
                         use_gpu_for_searching):
    candidate_pool=None
 
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py
index b1a0b462f19..6d91c981fe7 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py
@@ -52,12 +52,12 @@ def construct_retrieval_dataset(
                      use_cache)
 
    output_hn_path=output_raw_path+'_minedHN.jsonl'
- 
-   mine_hard_negatives(embedding_model, 
-                       output_raw_path, 
-                       output_hn_path, 
-                       range_for_sampling, 
-                       negative_number, 
+
+   mine_hard_negatives(embedding_model,
+                       output_raw_path,
+                       output_hn_path,
+                       range_for_sampling,
+                       negative_number,
                        use_gpu_for_searching)
 
    output_json_split_path = output_raw_path+"_minedHN_split.jsonl"

From d7d3d03a878127cd812a3a9b019bdc007d8389ea Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Fri, 1 Mar 2024 15:37:20 +0800
Subject: [PATCH 009/151] Delete
 intel_extension_for_transformers/neural_chat/tools/evaluation/hn_mine.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tools/evaluation/hn_mine.py   | 97 -------------------
 1 file changed, 97 deletions(-)
 delete mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/hn_mine.py

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/hn_mine.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/hn_mine.py
deleted file mode 100644
index 8fcaeb12153..00000000000
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/hn_mine.py
+++ /dev/null
@@ -1,97 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2023 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import random
-import numpy as np
-import faiss
-from tqdm import tqdm
-
-def create_index(embeddings, use_gpu):
-    index = faiss.IndexFlatIP(len(embeddings[0]))
-    embeddings = np.asarray(embeddings, dtype=np.float32)
-    if use_gpu:
-        co = faiss.GpuMultipleClonerOptions()
-        co.shard = True
-        co.useFloat16 = True
-        index = faiss.index_cpu_to_all_gpus(index, co=co)
-    index.add(embeddings)
-    return index
-
-def batch_search(index,
-                 query,
-                 topk: int = 200,
-                 batch_size: int = 64):
-    all_scores, all_inxs = [], []
-    for start_index in tqdm(range(0, len(query), batch_size), desc="Batches", disable=len(query) < 256):
-        batch_query = query[start_index:start_index + batch_size]
-        batch_scores, batch_inxs = index.search(np.asarray(batch_query, dtype=np.float32), k=topk)
-        all_scores.extend(batch_scores.tolist())
-        all_inxs.extend(batch_inxs.tolist())
-    return all_scores, all_inxs
-
-def get_corpus(candidate_pool):
-    corpus = []
-    for line in open(candidate_pool):
-        line = json.loads(line.strip())
-        corpus.append(line['text'])
-    return corpus
-
-def find_knn_neg(model, input_file, candidate_pool, output_file, sample_range, negative_number, use_gpu):
-    corpus = []
-    queries = []
-    train_data = []
-    for line in open(input_file):
-        line = json.loads(line.strip())
-        train_data.append(line)
-        corpus.extend(line['pos'])
-        if 'neg' in line:
-            corpus.extend(line['neg'])
-        queries.append(line['query'])
-
-    if candidate_pool is not None:
-        if not isinstance(candidate_pool, list):
-            candidate_pool = get_corpus(candidate_pool)
-        corpus = list(set(candidate_pool))
-    else:
-        corpus = list(set(corpus))
-
-    p_vecs = model.encode(corpus, batch_size=256)
-    q_vecs = model.encode(queries, batch_size=256)
-
-    index = create_index(p_vecs, use_gpu=use_gpu)
-    _, all_inxs = batch_search(index, q_vecs, topk=sample_range[-1])
-    assert len(all_inxs) == len(train_data)
-
-    for i, data in enumerate(train_data):
-        query = data['query']
-        inxs = all_inxs[i][sample_range[0]:sample_range[1]]
-        filtered_inx = []
-        for inx in inxs:
-            if inx == -1: break
-            if corpus[inx] not in data['pos'] and corpus[inx] != query:
-                filtered_inx.append(inx)
-
-        if len(filtered_inx) > negative_number:
-            filtered_inx = random.sample(filtered_inx, negative_number)
-        data['neg'] = [corpus[inx] for inx in filtered_inx]
-
-    with open(output_file, 'w') as f:
-        for data in train_data:
-            if len(data['neg']) < negative_number:
-                data['neg'].extend(random.sample(corpus, negative_number - len(data['neg'])))
-            f.write(json.dumps(data, ensure_ascii=False) + '\n')

From c9fec02095509bea56af94c86a51f33b7acfd099 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Fri, 1 Mar 2024 15:37:33 +0800
Subject: [PATCH 010/151] Delete
 intel_extension_for_transformers/neural_chat/tools/evaluation/llm_generate_raw_data.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tools/evaluation/llm_generate_raw_data.py | 100 ------------------
 1 file changed, 100 deletions(-)
 delete mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/llm_generate_raw_data.py

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/llm_generate_raw_data.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/llm_generate_raw_data.py
deleted file mode 100644
index b4733fa0e90..00000000000
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/llm_generate_raw_data.py
+++ /dev/null
@@ -1,100 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2023 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import torch
-from modelscope import AutoModelForCausalLM, AutoTokenizer
-import jsonlines
-import os, re
-from typing import List
-from intel_extension_for_transformers.neural_chat.pipeline.plugins.retrieval.parser.parser import DocumentParser
-import logging
-from intel_extension_for_transformers.neural_chat.prompts.prompt import QUERYGENERATE_PROMPT
-from transformers import GenerationConfig
-
-logging.basicConfig(
-    format="%(asctime)s %(name)s:%(levelname)s:%(message)s",
-    datefmt="%d-%M-%Y %H:%M:%S",
-    level=logging.INFO
-)
-
-def document_append(data_collection):
-    documents = []
-    for data, metadata in data_collection:
-        if len(data) < 5:
-            continue
-        documents.append(data)
-    return documents
-
-def raw_data_generate(model_id, base_dir, file_json_path,temperature,top_p,top_k,repetition_penalty,max_new_tokens,do_sample,num_beams,num_return_sequences,use_cache):
-   tokenizer = AutoTokenizer.from_pretrained(model_id)
-   model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto', torch_dtype=torch.float16)
-   data_collection = DocumentParser().data_load(input=base_dir)
-   documents = document_append(data_collection)
-
-   generation_config = GenerationConfig(
-   temperature = temperature,
-   top_p = top_p,
-   top_k = top_k,
-   repetition_penalty = repetition_penalty,
-   max_new_tokens = max_new_tokens,
-   do_sample = do_sample,
-   num_beams = num_beams,
-   num_return_sequences = num_return_sequences,
-   use_cache = use_cache,
-   pad_token_id=tokenizer.eos_token_id
-   )
-
-   for i in range(len(documents)):
-      context = documents[i]
-
-      if context:
-         input = QUERYGENERATE_PROMPT.format(context=context)
-
-         model_input = tokenizer(input, return_tensors="pt").to("cuda")
-         model.eval()
-         result = []
-
-         for j in range(5):
-            with torch.no_grad():
-               res = model.generate(**model_input, generation_config=generation_config)[0]
-               res=tokenizer.decode(res, skip_special_tokens=True)
-
-            res = res[res.find('Generated questions:') :]
-            res = re.sub('Generated questions:', '', res)
-            res = re.sub('---', '', res)
-
-            res = res.split("?")[0:2]
-            for r in res:
-               r = r.replace('1.', "").replace('2.', "")
-               r = r.replace('Evaluation:', "")
-               r = r.replace('#', " ").replace(r'\t', " ").replace('\n', ' ').replace('\n\n', ' ').strip()
-               r = r + '?'
-               result.append(r)
-
-         result_str=''
-         result_set = list(set(result))
-         for k in range(len(result_set)):
-            result_str = result_str + str(k) + '. '+ result_set[k]
-
-         if result_str and result_str.isspace()==False:
-            data = {
-                     "query": result_str,
-                     "pos": [context],
-               }
-            with jsonlines.open(file_json_path,"a") as file_json:
-                  file_json.write(data)

From 5e3211395d6e5729f1545df41ab36f1731785fbc Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Fri, 1 Mar 2024 15:37:43 +0800
Subject: [PATCH 011/151] Delete
 intel_extension_for_transformers/neural_chat/tools/evaluation/mine_hard_negatives_check_similarity.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../mine_hard_negatives_check_similarity.py   | 61 -------------------
 1 file changed, 61 deletions(-)
 delete mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/mine_hard_negatives_check_similarity.py

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/mine_hard_negatives_check_similarity.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/mine_hard_negatives_check_similarity.py
deleted file mode 100644
index 7869a51a706..00000000000
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/mine_hard_negatives_check_similarity.py
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2023 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import jsonlines
-from hn_mine import find_knn_neg
-from sentence_transformers import SentenceTransformer
-
-def mine_hard_negatives(model_name_or_path, input_file, output_file, range_for_sampling, negative_number, use_gpu_for_searching):
-   candidate_pool=None
-
-   sample_range = range_for_sampling.split('-')
-   sample_range = [int(x) for x in sample_range]
-
-   model = SentenceTransformer(model_name_or_path)
-
-   find_knn_neg(model,
-               input_file=input_file,
-               candidate_pool=candidate_pool,
-               output_file=output_file,
-               sample_range=sample_range,
-               negative_number=negative_number,
-               use_gpu=use_gpu_for_searching)
-
-def similarity_score(queries,passages,model_name_or_path):
-   queries = [queries]
-   passages = passages
-   instruction = ""
-   model = SentenceTransformer(model_name_or_path)
-   q_embeddings = model.encode([instruction+q for q in queries], normalize_embeddings=True)
-   p_embeddings = model.encode(passages, normalize_embeddings=True)
-   similarity_score =  q_embeddings @ p_embeddings.T
-   return similarity_score
-
-def similarity_check(file_jsonl_path,file_json_split_path,model_name_or_path, similarity_threshold):
-   with open(file_jsonl_path) as file:
-      for stu in jsonlines.Reader(file):
-         stu["query"]=stu["query"].split("?")[:-1]
-         for i in range(len(stu["query"])):
-               stu["query"][i]=stu["query"][i].lstrip('0123456789-. ')+ '?'
-               if similarity_score(stu["query"][i],stu["pos"],model_name_or_path) >= similarity_threshold:
-                  data = {
-                        "query": stu["query"][i],
-                        "pos": stu["pos"],
-                        "neg": stu["neg"],
-                     }
-                  with jsonlines.open(file_json_split_path,"a") as file_json:
-                     file_json.write(data)

From f67622c619a181e869ee89e39c287f949a84975a Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Fri, 1 Mar 2024 15:37:53 +0800
Subject: [PATCH 012/151] Delete
 intel_extension_for_transformers/neural_chat/tools/evaluation/requirements.txt

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tools/evaluation/requirements.txt            | 16 ----------------
 1 file changed, 16 deletions(-)
 delete mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/requirements.txt

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/requirements.txt b/intel_extension_for_transformers/neural_chat/tools/evaluation/requirements.txt
deleted file mode 100644
index 0466e299d3d..00000000000
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/requirements.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-accelerate
-beautifulsoup4
-easyocr
-faiss-gpu
-jsonlines
-langchain==0.0.354
-langchain_core==0.1.18
-langid
-modelscope
-numpy
-openpyxl
-PyMuPDF
-python-docx
-sentence-transformers==2.3.1
-torch
-transformers

From f2e344a86f223dad66c0a57918aef6b2e2b8ce3c Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Fri, 1 Mar 2024 15:38:05 +0800
Subject: [PATCH 013/151] Delete
 intel_extension_for_transformers/neural_chat/tools/evaluation/retrieval_dataset_construction.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../retrieval_dataset_construction.py         | 117 ------------------
 1 file changed, 117 deletions(-)
 delete mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/retrieval_dataset_construction.py

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retrieval_dataset_construction.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retrieval_dataset_construction.py
deleted file mode 100644
index 1f8b4a1057f..00000000000
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/retrieval_dataset_construction.py
+++ /dev/null
@@ -1,117 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2023 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from llm_generate_raw_data import raw_data_generate
-from mine_hard_negatives_check_similarity import mine_hard_negatives, similarity_check
-import argparse
-
-def construct_retrieval_dataset(
-      llm_model,
-      embedding_model,
-      input_dir,
-      output_raw_path,
-      temperature,
-      top_p,
-      top_k,
-      repetition_penalty,
-      max_new_tokens,
-      do_sample,
-      num_beams,
-      num_return_sequences,
-      use_cache,
-      range_for_sampling,
-      negative_number,
-      use_gpu_for_searching,
-      similarity_threshold):
-
-   raw_data_generate(llm_model,input_dir,output_raw_path,temperature,top_p,top_k,repetition_penalty,max_new_tokens,do_sample,num_beams,num_return_sequences,use_cache)
-
-   output_hn_path=output_raw_path+'_minedHN.jsonl'
-
-   mine_hard_negatives(embedding_model, output_raw_path, output_hn_path, range_for_sampling, negative_number, use_gpu_for_searching)
-
-   output_json_split_path = output_raw_path+"_minedHN_split.jsonl"
-   similarity_check(output_hn_path,output_json_split_path,embedding_model,similarity_threshold)
-
-
-def main():
-   parser = argparse.ArgumentParser()
-   parser.add_argument("--llm_model", type=str)
-   parser.add_argument("--embedding_model", type=str)
-   parser.add_argument("--input_dir", type=str)
-   parser.add_argument("--output_raw_path", type=str)
-
-   parser.add_argument("--temperature", type=float, default=0.8)
-   parser.add_argument("--top_p", type=float, default=0.9)
-   parser.add_argument("--top_k", type=int, default=40)
-   parser.add_argument("--repetition_penalty", type=float, default=2.0)
-   parser.add_argument("--max_new_tokens", type=int, default=48)
-   parser.add_argument("--do_sample", type=bool, default=True)
-   parser.add_argument("--num_beams", type=int, default=2)
-   parser.add_argument("--num_return_sequences", type=int, default=2)
-   parser.add_argument("--use_cache", type=bool, default=True)
-
-   parser.add_argument("--range_for_sampling", type=str, default='2-10')
-   parser.add_argument("--negative_number", type=int, default=5)
-   parser.add_argument("--use_gpu_for_searching", type=bool, default=True)
-
-   parser.add_argument("--similarity_threshold", type=float, default=0.6)
-
-   args = parser.parse_args()
-
-   llm_model = args.llm_model
-   embedding_model = args.embedding_model
-   input_dir = args.input_dir
-   output_raw_path = args.output_raw_path
-
-   temperature = args.temperature
-   top_p = args.top_p
-   top_k = args.top_k
-   repetition_penalty = args.repetition_penalty
-   max_new_tokens = args.max_new_tokens
-   do_sample = args.do_sample
-   num_beams = args.num_beams
-   num_return_sequences = args.num_return_sequences
-   use_cache = args.use_cache
-
-   range_for_sampling=args.range_for_sampling
-   negative_number=args.negative_number
-   use_gpu_for_searching=args.use_gpu_for_searching
-
-   similarity_threshold=args.similarity_threshold
-
-   construct_retrieval_dataset(
-      llm_model,
-      embedding_model,
-      input_dir,
-      output_raw_path,
-      temperature,
-      top_p,
-      top_k,
-      repetition_penalty,
-      max_new_tokens,
-      do_sample,
-      num_beams,
-      num_return_sequences,
-      use_cache,
-      range_for_sampling,
-      negative_number,
-      use_gpu_for_searching,
-      similarity_threshold)
-
-if __name__ == '__main__':
-    main()

From 383e5b3337d8c29f79ac178281b2c22c4a8d17fa Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 4 Mar 2024 10:19:35 +0800
Subject: [PATCH 014/151] Update prompt.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/prompts/prompt.py             | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/intel_extension_for_transformers/neural_chat/prompts/prompt.py b/intel_extension_for_transformers/neural_chat/prompts/prompt.py
index 7dcd5512d0d..0d4f4583652 100644
--- a/intel_extension_for_transformers/neural_chat/prompts/prompt.py
+++ b/intel_extension_for_transformers/neural_chat/prompts/prompt.py
@@ -321,3 +321,22 @@ def generate_sqlcoder_prompt(qurey, metadata_file):
         qurey=qurey, table_metadata_string=table_metadata_string
     )
     return prompt
+
+QUERYGENERATE_PROMPT = """
+Task: You are asked to act as a human annotator. Your role is to generate 2 specific, open-ended questions based on the provided context. 
+Each question should aim to extract or clarify key information from the context, focusing on a single aspect or detail. 
+The questions must be directly related to the context to form a query-positive pair, suitable for use in constructing a retrieval dataset.
+--- 
+Requirements:
+1. Questions should be based on the keywords, such as phrases at the beginning, phrases before colon, and recurring phrases in the context.
+2. Use the terms in the context instead of pronouns.
+--- 
+Desired format:
+1. <question_1>
+2. <question_2>
+--- 
+Context:
+### {context}
+--- 
+Generated questions:
+"""

From 81014d132a31ea71f0858929d724ec6949320dd7 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 4 Mar 2024 02:19:55 +0000
Subject: [PATCH 015/151] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../neural_chat/prompts/prompt.py                    | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/prompts/prompt.py b/intel_extension_for_transformers/neural_chat/prompts/prompt.py
index 0d4f4583652..ccb2ae835da 100644
--- a/intel_extension_for_transformers/neural_chat/prompts/prompt.py
+++ b/intel_extension_for_transformers/neural_chat/prompts/prompt.py
@@ -323,20 +323,20 @@ def generate_sqlcoder_prompt(qurey, metadata_file):
     return prompt
 
 QUERYGENERATE_PROMPT = """
-Task: You are asked to act as a human annotator. Your role is to generate 2 specific, open-ended questions based on the provided context. 
-Each question should aim to extract or clarify key information from the context, focusing on a single aspect or detail. 
+Task: You are asked to act as a human annotator. Your role is to generate 2 specific, open-ended questions based on the provided context.
+Each question should aim to extract or clarify key information from the context, focusing on a single aspect or detail.
 The questions must be directly related to the context to form a query-positive pair, suitable for use in constructing a retrieval dataset.
---- 
+---
 Requirements:
 1. Questions should be based on the keywords, such as phrases at the beginning, phrases before colon, and recurring phrases in the context.
 2. Use the terms in the context instead of pronouns.
---- 
+---
 Desired format:
 1. <question_1>
 2. <question_2>
---- 
+---
 Context:
 ### {context}
---- 
+---
 Generated questions:
 """

From 4b7bec7e4385c576b654bf1fd3a166770a0029d1 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 4 Mar 2024 10:20:10 +0800
Subject: [PATCH 016/151] Update llm_generate_raw_data.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tools/evaluation/data_augmentation/llm_generate_raw_data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py
index 770a17bd0dd..fb54e694635 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py
@@ -23,7 +23,7 @@
 from typing import List
 from intel_extension_for_transformers.neural_chat.pipeline.plugins.retrieval.parser.parser import DocumentParser
 import logging
-from intel_extension_for_transformers.neural_chat.prompts.prompt import QUERYGENERATE_PROMPT  # pylint: disable=E0611
+from intel_extension_for_transformers.neural_chat.prompts.prompt import QUERYGENERATE_PROMPT
 from transformers import GenerationConfig
 
 logging.basicConfig(

From 0df51a6b2d2f7ce9abdbc7250f1a275bc04a8202 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 4 Mar 2024 10:36:14 +0800
Subject: [PATCH 017/151] Update llm_generate_raw_data.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tools/evaluation/data_augmentation/llm_generate_raw_data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py
index fb54e694635..ac6f2bb3a09 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py
@@ -54,7 +54,7 @@ def raw_data_generate(model_id,
                       use_cache):
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto', torch_dtype=torch.float16)
-   data_collection = DocumentParser().data_load(input=base_dir)
+   data_collection = DocumentParser().load(input=base_dir)
    documents = document_append(data_collection)
 
    generation_config = GenerationConfig(

From 95b16bd95e7c3493fca0651d5e536f97380d1b42 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 4 Mar 2024 12:00:21 +0800
Subject: [PATCH 018/151] Update retrieval_dataset_construction.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../data_augmentation/retrieval_dataset_construction.py     | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py
index 6d91c981fe7..2fcb7625e7e 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py
@@ -15,8 +15,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from llm_generate_raw_data import raw_data_generate
-from mine_hard_negatives_check_similarity import mine_hard_negatives, similarity_check
+from .llm_generate_raw_data import raw_data_generate
+from .mine_hard_negatives_check_similarity import mine_hard_negatives, similarity_check
 import argparse
 
 def construct_retrieval_dataset(
@@ -86,7 +86,7 @@ def main():
 
    parser.add_argument("--range_for_sampling", type=str, default='2-10')
    parser.add_argument("--negative_number", type=int, default=5)
-   parser.add_argument("--use_gpu_for_searching", type=bool, default=True)
+   parser.add_argument("--use_gpu_for_searching", type=bool, default=False)
 
    parser.add_argument("--similarity_threshold", type=float, default=0.6)
 

From 80dd21bb438ea8d8e1c7fea7248177d1f591fb6f Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 4 Mar 2024 12:14:05 +0800
Subject: [PATCH 019/151] Update llm_generate_raw_data.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../evaluation/data_augmentation/llm_generate_raw_data.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py
index ac6f2bb3a09..f04c5cb0ed8 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py
@@ -32,6 +32,8 @@
     level=logging.INFO
 )
 
+device = "cuda" if torch.cuda.is_available() else "cpu"
+
 def document_append(data_collection):
     documents = []
     for data, metadata in data_collection:
@@ -75,8 +77,10 @@ def raw_data_generate(model_id,
 
       if context:
          input = QUERYGENERATE_PROMPT.format(context=context)
-
-         model_input = tokenizer(input, return_tensors="pt").to("cuda")
+         if device=="cpu":
+            model_input = tokenizer(input, return_tensors="pt")
+         elif device=="cuda":
+            model_input = tokenizer(input, return_tensors="pt").to("cuda")
          model.eval()
          result = []
 

From f495b2229987b57103a826b9af8716d5bedc3fbb Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 4 Mar 2024 12:16:26 +0800
Subject: [PATCH 020/151] Update mine_hard_negatives_check_similarity.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../data_augmentation/mine_hard_negatives_check_similarity.py   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/mine_hard_negatives_check_similarity.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/mine_hard_negatives_check_similarity.py
index 67a811fa92d..78848a1fb19 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/mine_hard_negatives_check_similarity.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/mine_hard_negatives_check_similarity.py
@@ -16,7 +16,7 @@
 # limitations under the License.
 
 import jsonlines
-from hn_mine import find_knn_neg
+from .hn_mine import find_knn_neg
 from sentence_transformers import SentenceTransformer
 
 def mine_hard_negatives(model_name_or_path,

From 593dee3247958bf9d1abaae25fbafb93e58c28f4 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 4 Mar 2024 14:24:38 +0800
Subject: [PATCH 021/151] add test_evaluation.py to nightly test

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tests/nightly/tools/test_evaluation.py    | 54 +++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100644 intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py

diff --git a/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py b/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py
new file mode 100644
index 00000000000..42beca8d987
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py
@@ -0,0 +1,54 @@
+# !/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest, os, shutil
+from unittest.mock import patch
+from intel_extension_for_transformers.neural_chat.tools.evaluation.data_augmentation import retrieval_dataset_construction
+
+class TestEvaluation(unittest.TestCase):
+    def setUp(self) -> None:
+        if os.path.exists("data.jsonl"):
+            os.remove("data.jsonl")
+        if os.path.exists("data_minedHN.jsonl"):
+            os.remove("data_minedHN.jsonl")
+        if os.path.exists("data_minedHN_split.jsonl"):
+            os.remove("data_minedHN_split.jsonl")
+        return super().setUp()
+
+    def tearDown(self) -> None:
+        if os.path.exists("data.jsonl"):
+            os.remove("data.jsonl")
+        if os.path.exists("data_minedHN.jsonl"):
+            os.remove("data_minedHN.jsonl")
+        if os.path.exists("data_minedHN_split.jsonl"):
+            os.remove("data_minedHN_split.jsonl")
+        return super().tearDown()
+
+    def test_retrieval_dataset_construction(self):
+        argv = ['--llm_model', '/tf_dataset2/models/nlp_toolkit/neural-chat-7b-v3-1', \
+                '--embedding_model', '/tf_dataset2/inc-ut/gte-base', \
+                '--input_dir', '/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/assets/docs/retrieve_multi_doc/', \
+                '--output_raw_path', 'data', \
+                '--range_for_sampling', '2-2', \
+                '--negative_number', '1']
+
+        with patch('sys.argv', ['python retrieval_dataset_construction.py'] + argv):
+            retrieval_dataset_construction.main()
+            self.assertTrue(os.path.exists("data_minedHN_split.jsonl"))
+
+if __name__ == '__main__':
+    unittest.main()

From cf59b1863d8e16e6396877bd6cf59296d524263d Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 4 Mar 2024 14:29:01 +0800
Subject: [PATCH 022/151] Update and rename requirements.txt to
 requirements_cpu.txt

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../data_augmentation/requirements.txt           | 16 ----------------
 .../data_augmentation/requirements_cpu.txt       |  2 ++
 2 files changed, 2 insertions(+), 16 deletions(-)
 delete mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/requirements.txt
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/requirements_cpu.txt

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/requirements.txt b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/requirements.txt
deleted file mode 100644
index 0466e299d3d..00000000000
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/requirements.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-accelerate
-beautifulsoup4
-easyocr
-faiss-gpu
-jsonlines
-langchain==0.0.354
-langchain_core==0.1.18
-langid
-modelscope
-numpy
-openpyxl
-PyMuPDF
-python-docx
-sentence-transformers==2.3.1
-torch
-transformers
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/requirements_cpu.txt b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/requirements_cpu.txt
new file mode 100644
index 00000000000..e0841115212
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/requirements_cpu.txt
@@ -0,0 +1,2 @@
+faiss-cpu
+modelscope

From 40e0b0e12009e63eae336478f638e9f8481b1b75 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 4 Mar 2024 14:29:21 +0800
Subject: [PATCH 023/151] Create requirements_cuda.txt

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tools/evaluation/data_augmentation/requirements_cuda.txt    | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/requirements_cuda.txt

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/requirements_cuda.txt b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/requirements_cuda.txt
new file mode 100644
index 00000000000..9e91dfbd37d
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/requirements_cuda.txt
@@ -0,0 +1,2 @@
+faiss-gpu
+modelscope

From bf1b1aafe7bf1ff22e492967f9a9660eb301baf3 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 4 Mar 2024 14:30:43 +0800
Subject: [PATCH 024/151] Update requirements.txt

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tests/requirements.txt                           | 1 +
 1 file changed, 1 insertion(+)

diff --git a/intel_extension_for_transformers/neural_chat/tests/requirements.txt b/intel_extension_for_transformers/neural_chat/tests/requirements.txt
index a698410b49a..40d1924de1c 100644
--- a/intel_extension_for_transformers/neural_chat/tests/requirements.txt
+++ b/intel_extension_for_transformers/neural_chat/tests/requirements.txt
@@ -34,6 +34,7 @@ langchain_core==0.1.18
 langid
 librosa
 markdown
+modelscope
 neural-compressor
 neural_speed
 num2words

From 5552ebcd52ba47312c355f5a9d6c40b074303b8a Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 4 Mar 2024 15:34:32 +0800
Subject: [PATCH 025/151] Update retrieval_dataset_construction.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../retrieval_dataset_construction.py          | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py
index 2fcb7625e7e..9a574d592fd 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py
@@ -22,8 +22,8 @@
 def construct_retrieval_dataset(
       llm_model,
       embedding_model,
-      input_dir,
-      output_raw_path,
+      input,
+      output,
       temperature,
       top_p,
       top_k,
@@ -39,8 +39,8 @@ def construct_retrieval_dataset(
       similarity_threshold):
 
    raw_data_generate(llm_model,
-                     input_dir,
-                     output_raw_path,
+                     input,
+                     output,
                      temperature,
                      top_p,
                      top_k,
@@ -51,16 +51,16 @@ def construct_retrieval_dataset(
                      num_return_sequences,
                      use_cache)
 
-   output_hn_path=output_raw_path+'_minedHN.jsonl'
+   output_hn_path=output+'_minedHN.jsonl'
 
    mine_hard_negatives(embedding_model,
-                       output_raw_path,
+                       output,
                        output_hn_path,
                        range_for_sampling,
                        negative_number,
                        use_gpu_for_searching)
 
-   output_json_split_path = output_raw_path+"_minedHN_split.jsonl"
+   output_json_split_path = output+"_minedHN_split.jsonl"
    similarity_check(output_hn_path,
                     output_json_split_path,
                     embedding_model,
@@ -71,7 +71,7 @@ def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--llm_model", type=str)
    parser.add_argument("--embedding_model", type=str)
-   parser.add_argument("--input_dir", type=str)
+   parser.add_argument("--input", type=str)
    parser.add_argument("--output_raw_path", type=str)
 
    parser.add_argument("--temperature", type=float, default=0.8)
@@ -94,7 +94,7 @@ def main():
 
    llm_model = args.llm_model
    embedding_model = args.embedding_model
-   input_dir = args.input_dir
+   input = args.input
    output_raw_path = args.output_raw_path
 
    temperature = args.temperature

From d3b7579ff7b7fd7caae600af4e0904a601a1df44 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 4 Mar 2024 15:36:39 +0800
Subject: [PATCH 026/151] Update llm_generate_raw_data.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../evaluation/data_augmentation/llm_generate_raw_data.py     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py
index f04c5cb0ed8..7f33eecbabd 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py
@@ -43,7 +43,7 @@ def document_append(data_collection):
     return documents
 
 def raw_data_generate(model_id,
-                      base_dir,
+                      input_path,
                       file_json_path,
                       temperature,
                       top_p,
@@ -56,7 +56,7 @@ def raw_data_generate(model_id,
                       use_cache):
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto', torch_dtype=torch.float16)
-   data_collection = DocumentParser().load(input=base_dir)
+   data_collection = DocumentParser().load(input=input_path)
    documents = document_append(data_collection)
 
    generation_config = GenerationConfig(

From f500b2b6e22c47a647fdd712cf7f2ba0ac55417a Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 4 Mar 2024 15:47:32 +0800
Subject: [PATCH 027/151] Update retrieval_dataset_construction.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../data_augmentation/retrieval_dataset_construction.py   | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py
index 9a574d592fd..138165a77cf 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py
@@ -72,7 +72,7 @@ def main():
    parser.add_argument("--llm_model", type=str)
    parser.add_argument("--embedding_model", type=str)
    parser.add_argument("--input", type=str)
-   parser.add_argument("--output_raw_path", type=str)
+   parser.add_argument("--output", type=str)
 
    parser.add_argument("--temperature", type=float, default=0.8)
    parser.add_argument("--top_p", type=float, default=0.9)
@@ -95,7 +95,7 @@ def main():
    llm_model = args.llm_model
    embedding_model = args.embedding_model
    input = args.input
-   output_raw_path = args.output_raw_path
+   output = args.output
 
    temperature = args.temperature
    top_p = args.top_p
@@ -116,8 +116,8 @@ def main():
    construct_retrieval_dataset(
       llm_model,
       embedding_model,
-      input_dir,
-      output_raw_path,
+      input,
+      output,
       temperature,
       top_p,
       top_k,

From b65c4bf4a66f03db800a091ab296515ce5cf0b0c Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 4 Mar 2024 15:53:14 +0800
Subject: [PATCH 028/151] Update llm_generate_raw_data.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tools/evaluation/data_augmentation/llm_generate_raw_data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py
index 7f33eecbabd..6970a4b5799 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py
@@ -111,5 +111,5 @@ def raw_data_generate(model_id,
                      "query": result_str,
                      "pos": [context],
                }
-            with jsonlines.open(file_json_path,"a") as file_json:
+            with jsonlines.open(file_json_path+'.jsonl',"a") as file_json:
                   file_json.write(data)

From c43ab73011cab63b254705291cb892cacc3dadad Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 4 Mar 2024 15:53:45 +0800
Subject: [PATCH 029/151] Update test_evaluation.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tests/nightly/tools/test_evaluation.py        | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py b/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py
index 42beca8d987..99ff68bf468 100644
--- a/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py
+++ b/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py
@@ -41,8 +41,8 @@ def tearDown(self) -> None:
     def test_retrieval_dataset_construction(self):
         argv = ['--llm_model', '/tf_dataset2/models/nlp_toolkit/neural-chat-7b-v3-1', \
                 '--embedding_model', '/tf_dataset2/inc-ut/gte-base', \
-                '--input_dir', '/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/assets/docs/retrieve_multi_doc/', \
-                '--output_raw_path', 'data', \
+                '--input', '/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/assets/docs/retrieve_multi_doc/', \
+                '--output', 'data', \
                 '--range_for_sampling', '2-2', \
                 '--negative_number', '1']
 

From feda3c075c9103c363b4b8c651781791522f16ae Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 4 Mar 2024 16:25:55 +0800
Subject: [PATCH 030/151] Update retrieval_dataset_construction.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../data_augmentation/retrieval_dataset_construction.py         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py
index 138165a77cf..66bae2c7736 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py
@@ -72,7 +72,7 @@ def main():
    parser.add_argument("--llm_model", type=str)
    parser.add_argument("--embedding_model", type=str)
    parser.add_argument("--input", type=str)
-   parser.add_argument("--output", type=str)
+   parser.add_argument("--output", type=str, default='data')
 
    parser.add_argument("--temperature", type=float, default=0.8)
    parser.add_argument("--top_p", type=float, default=0.9)

From 1c2c22c2918888ade656d2011ad27732b5a10878 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 4 Mar 2024 16:26:32 +0800
Subject: [PATCH 031/151] Update mine_hard_negatives_check_similarity.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../data_augmentation/mine_hard_negatives_check_similarity.py   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/mine_hard_negatives_check_similarity.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/mine_hard_negatives_check_similarity.py
index 78848a1fb19..87203e922d5 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/mine_hard_negatives_check_similarity.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/mine_hard_negatives_check_similarity.py
@@ -33,7 +33,7 @@ def mine_hard_negatives(model_name_or_path,
    model = SentenceTransformer(model_name_or_path)
 
    find_knn_neg(model,
-               input_file=input_file,
+               input_file=input_file+'.jsonl',
                candidate_pool=candidate_pool,
                output_file=output_file,
                sample_range=sample_range,

From 55a5cdaef8c7ebbd02a83a01fea39abbd8a3787a Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Wed, 6 Mar 2024 15:30:33 +0800
Subject: [PATCH 032/151] add README.md

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../evaluation/data_augmentation/README.md    | 72 +++++++++++++++++++
 1 file changed, 72 insertions(+)
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md
new file mode 100644
index 00000000000..fb6bc713f68
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md
@@ -0,0 +1,72 @@
+# Retrieval Data Augmentation
+
+## 1. Introduction
+In this example, we show how to do data augmentation to construct a retrieval dataset. Specifically, the effect is to generate specific open-ended questions based on the context of the input file provided. The questions are directly related to the context to form a query-positive pair, suitable for use in constructing a retrieval dataset.
+
+## 2. Requirements
+```
+git clone https://github.com/intel/intel-extension-for-transformers.git
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat
+pip install -r requirements.txt
+cd pipeline/plugins/retrieval
+pip install -r requirements.txt
+```
+
+* **On CPU**
+```
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation
+pip install -r requirements_cpu.txt
+```
+
+* **On CUDA**
+```
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation
+pip install -r requirements_cuda.txt
+```
+
+## 3. Retrieval Dataset Construction
+* **On CPU**
+```
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation
+python -m data_augmentation.retrieval_dataset_construction \
+--llm_model <llm model path> \
+--embedding_model <embedding model path> \
+--input <your input file path>
+```
+
+* **On CUDA**
+```
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation
+python -m data_augmentation.retrieval_dataset_construction \
+--llm_model <llm model path> \
+--embedding_model <embedding model path> \
+--input <your input file path> \
+--use_gpu_for_searching True
+```
+
+**Some Important Arguments**:
+- `llm_model`: The path for the LLM model.
+- `embedding_model`: The path for the text embedding model.
+- `input`: The path of the file/folder/link of the content.
+- `output`: The name of output files. The default value is 'data'. The default output files are 'data.jsonl', 'data_minedHN.jsonl', 'data_minedHN_split.jsonl'.
+- `temperature`: The value is used to modulate the next token probabilities, and will influence the distribution of similarity scores. The default value is 0.8.
+- `top_p`: If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. The default value is 0.9.
+- `top_k`: The number of highest probability vocabulary tokens to keep for top-k-filtering. The default value is 40.
+- `repetition_penalty`: The parameter for repetition penalty. 1.0 means no penalty. The default value is 2.0.
+- `max_new_tokens`: The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt. The default value is 48.
+- `do_sample`: Whether or not to use sampling ; use greedy decoding otherwise. The default value is True.
+- `num_beams`: Number of beams for beam search. 1 means no beam search. The default value is 2.
+- `num_return_sequences`: The number of independently computed returned sequences for each element in the batch. The default value is 2.
+- `use_cache`: Whether or not the model should use the past last key/values attentions (if applicable to the model) to speed up decoding. The default value is True.
+- `range_for_sampling`: The range to sample negatives. For example, `2-100` means sampling `negative_number` negatives from top2-top200 documents. You can set a larger value to reduce the difficulty of negatives (e.g., set it `60-300` to sample negatives from top60-300 passages). The default value is '2-10'.
+- `negative_number`: The number of sampled negatives. The default value is 5.
+- `use_gpu_for_searching`: Whether to use faiss-gpu to retrieve negatives. The default value is False.
+- `similarity_threshold`: The cosine similarity threshold used to filter the generated queries. The default value is 0.6.
+
+## 4. Result
+Three files will be generated. The default output files are `data.jsonl`, `data_minedHN.jsonl`, `data_minedHN_split.jsonl`. The third is the final output dataset, where each line is a dict like this:
+```
+{"query": str, "pos": List[str], "neg": List[str]}
+```
+`query` is the query, and `pos` is a positive text, based on the context of the input file provided, `neg` is a list of negative texts.
+See [augmented_example.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/embedding_finetune/augmented_example.jsonl) for a data file.
\ No newline at end of file

From 7a74f86df64cbc7d019ae3575fb039aa37097ba2 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 6 Mar 2024 07:30:54 +0000
Subject: [PATCH 033/151] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../neural_chat/tools/evaluation/data_augmentation/README.md    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md
index fb6bc713f68..fca239bdc73 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md
@@ -69,4 +69,4 @@ Three files will be generated. The default output files are `data.jsonl`, `data_
 {"query": str, "pos": List[str], "neg": List[str]}
 ```
 `query` is the query, and `pos` is a positive text, based on the context of the input file provided, `neg` is a list of negative texts.
-See [augmented_example.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/embedding_finetune/augmented_example.jsonl) for a data file.
\ No newline at end of file
+See [augmented_example.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/embedding_finetune/augmented_example.jsonl) for a data file.

From 39754d046a202a49858e8e16f75b6f8722ee14ac Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Thu, 7 Mar 2024 17:24:29 +0800
Subject: [PATCH 034/151] Update README.md

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tools/evaluation/data_augmentation/README.md         | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md
index fca239bdc73..bf28652297b 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md
@@ -3,7 +3,10 @@
 ## 1. Introduction
 In this example, we show how to do data augmentation to construct a retrieval dataset. Specifically, the effect is to generate specific open-ended questions based on the context of the input file provided. The questions are directly related to the context to form a query-positive pair, suitable for use in constructing a retrieval dataset.
 
-## 2. Requirements
+## 2. Supported Devices
+CPU, CUDA
+
+## 3. Requirements
 ```
 git clone https://github.com/intel/intel-extension-for-transformers.git
 cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat
@@ -24,7 +27,7 @@ cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat
 pip install -r requirements_cuda.txt
 ```
 
-## 3. Retrieval Dataset Construction
+## 4. Retrieval Dataset Construction
 * **On CPU**
 ```
 cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation
@@ -63,7 +66,7 @@ python -m data_augmentation.retrieval_dataset_construction \
 - `use_gpu_for_searching`: Whether to use faiss-gpu to retrieve negatives. The default value is False.
 - `similarity_threshold`: The cosine similarity threshold used to filter the generated queries. The default value is 0.6.
 
-## 4. Result
+## 5. Result
 Three files will be generated. The default output files are `data.jsonl`, `data_minedHN.jsonl`, `data_minedHN_split.jsonl`. The third is the final output dataset, where each line is a dict like this:
 ```
 {"query": str, "pos": List[str], "neg": List[str]}

From d7e95f0c415195a2dfd3c10db05b469d6e124a83 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Fri, 8 Mar 2024 14:27:51 +0800
Subject: [PATCH 035/151] add evaluate_retrieval.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../retriever/evaluate_retrieval.py           | 286 ++++++++++++++++++
 1 file changed, 286 insertions(+)
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval.py

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval.py
new file mode 100644
index 00000000000..3f71a6ffab2
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval.py
@@ -0,0 +1,286 @@
+# !/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import ClassVar, Collection
+from intel_extension_for_transformers.langchain.embeddings import HuggingFaceEmbeddings, \
+    HuggingFaceInstructEmbeddings, HuggingFaceBgeEmbeddings
+from langchain.embeddings import GooglePalmEmbeddings
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from intel_extension_for_transformers.langchain.vectorstores import Chroma, Qdrant
+import uuid
+from langchain_core.documents import Document
+from intel_extension_for_transformers.langchain.retrievers import ChildParentRetriever
+from langchain_core.vectorstores import VectorStoreRetriever
+from langchain.retrievers import BM25Retriever
+import jsonlines
+import numpy as np
+import logging
+import argparse
+
+logging.basicConfig(
+    format="%(asctime)s %(name)s:%(levelname)s:%(message)s",
+    datefmt="%d-%M-%Y %H:%M:%S",
+    level=logging.INFO
+)
+
+def document_transfer(data_collection):
+    "Transfer the raw document into langchain supported format."
+    documents = []
+    for data, meta in data_collection:
+        doc_id = str(uuid.uuid4())
+        metadata = {"source": meta, "identify_id":doc_id}
+        doc = Document(page_content=data, metadata=metadata)
+        documents.append(doc)
+    return documents
+
+def document_append_id(documents):
+    for _doc in documents:
+        _doc.metadata["doc_id"] = _doc.metadata["identify_id"]
+    return documents
+
+def index_library(index_file_jsonl_path):
+    index_list = []
+    with open(index_file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            passages=[stu["context"][0],index_file_jsonl_path]
+            index_list.append(passages)
+    return index_list
+
+def query_set(query_file_jsonl_path):
+    query_list = []
+    with open(query_file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            passages=stu["query"]
+            query_list.append(passages)
+    return query_list
+
+def load_list(file_jsonl_path, item):
+    with open(file_jsonl_path) as file:
+        data = []
+        for stu in jsonlines.Reader(file):
+            content = ",".join(stu[item])
+            data.append(content)
+    return data
+
+def evaluate(preds, labels, cutoffs=[1,5]):
+    """
+    Evaluate MRR and Hit at cutoffs.
+    """
+    metrics = {}
+
+    # MRR
+    mrrs = np.zeros(len(cutoffs))
+    for pred, label in zip(preds, labels):
+        jump = False
+        for i, x in enumerate(pred, 1):
+            if x in label:
+                for k, cutoff in enumerate(cutoffs):
+                    if i <= cutoff:
+                        mrrs[k] += 1 / i
+                jump = True
+            if jump:
+                break
+    mrrs /= len(preds)
+    for i, cutoff in enumerate(cutoffs):
+        mrr = mrrs[i]
+        metrics[f"MRR@{cutoff}"] = mrr
+
+    # Hit
+    hit_rate_list=[]
+    for cutoff in cutoffs:
+        hit_num = 0
+        for pred, label in zip(preds, labels):
+            hit_list=np.intersect1d(label, pred[:cutoff])
+            hit_num = hit_num+len(hit_list)
+        hit_rate = hit_num/len(labels)
+        hit_rate_list.append(hit_rate)
+    for i, cutoff in enumerate(cutoffs):
+        hit_rate = hit_rate_list[i]
+        metrics[f"Hit@{cutoff}"] = hit_rate
+
+    return metrics
+
+class Retrieval():
+    def __init__(self,
+                 vector_database="Chroma",
+                 embedding_model="BAAI/bge-base-en-v1.5",
+                 input_path = None,
+                 retrieval_type = 'default',
+                 append=True,
+                 **kwargs):
+
+        self.vector_database = vector_database
+        self.input_path = None
+        self.retrieval_type = retrieval_type
+        self.retriever = None
+        self.splitter = RecursiveCharacterTextSplitter(chunk_size= kwargs['child_size'] \
+                    if 'child_size' in kwargs else 512)
+        allowed_retrieval_type: ClassVar[Collection[str]] = (
+            "default",
+            "child_parent",
+            'bm25',
+        )
+
+        assert self.retrieval_type in allowed_retrieval_type, "search_type of {} not allowed.".format(   \
+            self.retrieval_type)
+
+        self.input_path = input_path
+        assert self.input_path != None, "Should gave an input path!"
+
+        try:
+            if "instruct" in embedding_model:
+                self.embeddings = HuggingFaceInstructEmbeddings(model_name=embedding_model)
+            elif "bge" in embedding_model:
+                self.embeddings = HuggingFaceBgeEmbeddings(
+                    model_name=embedding_model,
+                    encode_kwargs={'normalize_embeddings': True},
+                    query_instruction="Represent this sentence for searching relevant passages:")
+            elif "Google" == embedding_model:
+                self.embeddings = GooglePalmEmbeddings()
+            else:
+                self.embeddings = HuggingFaceEmbeddings(
+                    model_name=embedding_model,
+                    encode_kwargs={"normalize_embeddings": True},
+                )
+        except Exception as e:
+            logging.error("Please select a proper embedding model.")
+            logging.error(e)
+
+        data_collection = index_library(self.input_path)
+        logging.info("The parsing for the uploaded files is finished.")
+
+        langchain_documents = document_transfer(data_collection)
+        logging.info("The format of parsed documents is transferred.")
+
+        if kwargs['search_type']=="similarity":
+            kwargs['search_kwargs']={"k":5}      
+        elif kwargs['search_type']=="mmr":
+            kwargs['search_kwargs']={"k":5}
+        elif kwargs['search_type']=="similarity_score_threshold":
+            kwargs['search_kwargs']={"k":5, "score_threshold":0.6}
+
+        if self.vector_database == "Chroma":
+            self.database = Chroma
+        elif self.vector_database == "Qdrant":
+            self.database = Qdrant
+        if self.retrieval_type == 'default':  # Using vector store retriever
+            if append:
+                knowledge_base = self.database.from_documents(documents=langchain_documents, embedding=self.embeddings,
+                                                              **kwargs)
+            else:
+                knowledge_base = self.database.build(documents=langchain_documents, embedding=self.embeddings, **kwargs)
+            self.retriever = RetrieverAdapter(retrieval_type=self.retrieval_type, document_store=knowledge_base, \
+                                              **kwargs)
+            if self.vector_database == "Qdrant" and knowledge_base.is_local():
+               # one local storage folder cannot be accessed by multiple instances of Qdrant client simultaneously.
+               knowledge_base.client.close()
+        elif self.retrieval_type == "child_parent":    # Using child-parent store retriever
+            child_documents = self.splitter.split_documents(langchain_documents)
+            langchain_documents = document_append_id(langchain_documents)
+            if append:
+                knowledge_base = self.database.from_documents(documents=langchain_documents, embedding=self.embeddings,
+                                                              **kwargs)
+                child_knowledge_base = self.database.from_documents(documents=child_documents, sign='child', \
+                                                                    embedding=self.embeddings, **kwargs)
+            else:
+                knowledge_base = self.database.build(documents=langchain_documents, embedding=self.embeddings, **kwargs)
+                child_knowledge_base = self.database.build(documents=langchain_documents, embedding=self.embeddings, \
+                                            sign='child', **kwargs)
+            self.retriever = RetrieverAdapter(retrieval_type=self.retrieval_type, document_store=knowledge_base, \
+                               child_document_store=child_knowledge_base, **kwargs)
+            if self.vector_database == "Qdrant" :
+                # one local storage folder cannot be accessed by multiple instances of Qdrant client simultaneously.
+                if knowledge_base.is_local():
+                    knowledge_base.client.close()
+                if child_knowledge_base.is_local():
+                    child_knowledge_base.client.close()
+        elif self.retrieval_type == "bm25":
+            self.docs = document_append_id(langchain_documents)
+            self.retriever = RetrieverAdapter(retrieval_type=self.retrieval_type, docs=self.docs, **kwargs)
+        logging.info("The retriever is successfully built.")
+
+    def pre_llm_inference_actions(self, query):
+        assert self.retriever is not None, logging.info("Please check the status of retriever")
+        context = self.retriever.get_context(query)
+        return context
+
+
+class RetrieverAdapter():
+    def __init__(self, retrieval_type='default', document_store=None, child_document_store=None, docs=None,  \
+                 reranker_model="BAAI/bge-reranker-large", top_n = 1, enable_rerank = False, **kwargs):
+        self.retrieval_type = retrieval_type
+        if enable_rerank:
+            from intel_extension_for_transformers.langchain.retrievers.bge_reranker import BgeReranker
+            from FlagEmbedding import FlagReranker
+            reranker = FlagReranker(reranker_model)
+            self.reranker = BgeReranker(model = reranker, top_n=top_n)
+        else:
+            self.reranker = None
+
+        if self.retrieval_type == "default":
+            self.retriever = VectorStoreRetriever(vectorstore=document_store, **kwargs)
+        elif self.retrieval_type == "bm25":
+            self.retriever = BM25Retriever.from_documents(docs, **kwargs)
+        elif self.retrieval_type == "child_parent":
+            self.retriever = ChildParentRetriever(parentstore=document_store, \
+                                                  vectorstore=child_document_store,
+                                                  **kwargs)  # pylint: disable=abstract-class-instantiated
+        else:
+            logging.error('The chosen retrieval type remains outside the supported scope.')
+
+    def get_context(self, query):
+        context = []
+        retrieved_documents = self.retriever.get_relevant_documents(query)
+        if self.reranker is not None:
+            retrieved_documents = self.reranker.compress_documents(documents = retrieved_documents, query = query)
+        for doc in retrieved_documents:
+            context.append(doc.page_content)
+        return context
+    
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--index_file_jsonl_path", type=str)
+    parser.add_argument("--query_file_jsonl_path", type=str)
+    parser.add_argument("--vector_database", type=str, default="Chroma")
+    parser.add_argument("--embedding_model", type=str, default="BAAI/bge-base-en-v1.5")
+    parser.add_argument("--retrieval_type", type=str, default='default')
+    parser.add_argument("--search_type", type=str, default="similarity")
+    args = parser.parse_args() 
+
+    index_file_jsonl_path = args.index_file_jsonl_path
+    query_file_jsonl_path = args.query_file_jsonl_path
+    vector_database = args.vector_database
+    embedding_model = args.embedding_model
+    retrieval_type = args.retrieval_type
+    search_type = args.search_type
+
+    query_list = query_set(query_file_jsonl_path)
+    retrieval_results=[]
+    for query in query_list:
+        context=Retrieval(input_path=index_file_jsonl_path,                 
+                         vector_database=vector_database,
+                         embedding_model=embedding_model,
+                         retrieval_type = retrieval_type,
+                         search_type=search_type).pre_llm_inference_actions(query=query) 
+        retrieval_results.append(context)
+    ground_truths=load_list(query_file_jsonl_path, "pos")
+    metrics = evaluate(retrieval_results, ground_truths)
+    print(metrics)
+    return metrics
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file

From 186ab43f74ff623be3f2f44700f72fc48ac0fd5e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 8 Mar 2024 06:28:27 +0000
Subject: [PATCH 036/151] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../tools/evaluation/retriever/evaluate_retrieval.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval.py
index 3f71a6ffab2..02ae5d5971d 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval.py
@@ -167,7 +167,7 @@ def __init__(self,
         logging.info("The format of parsed documents is transferred.")
 
         if kwargs['search_type']=="similarity":
-            kwargs['search_kwargs']={"k":5}      
+            kwargs['search_kwargs']={"k":5}
         elif kwargs['search_type']=="mmr":
             kwargs['search_kwargs']={"k":5}
         elif kwargs['search_type']=="similarity_score_threshold":
@@ -250,7 +250,7 @@ def get_context(self, query):
         for doc in retrieved_documents:
             context.append(doc.page_content)
         return context
-    
+
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument("--index_file_jsonl_path", type=str)
@@ -259,7 +259,7 @@ def main():
     parser.add_argument("--embedding_model", type=str, default="BAAI/bge-base-en-v1.5")
     parser.add_argument("--retrieval_type", type=str, default='default')
     parser.add_argument("--search_type", type=str, default="similarity")
-    args = parser.parse_args() 
+    args = parser.parse_args()
 
     index_file_jsonl_path = args.index_file_jsonl_path
     query_file_jsonl_path = args.query_file_jsonl_path
@@ -271,11 +271,11 @@ def main():
     query_list = query_set(query_file_jsonl_path)
     retrieval_results=[]
     for query in query_list:
-        context=Retrieval(input_path=index_file_jsonl_path,                 
+        context=Retrieval(input_path=index_file_jsonl_path,
                          vector_database=vector_database,
                          embedding_model=embedding_model,
                          retrieval_type = retrieval_type,
-                         search_type=search_type).pre_llm_inference_actions(query=query) 
+                         search_type=search_type).pre_llm_inference_actions(query=query)
         retrieval_results.append(context)
     ground_truths=load_list(query_file_jsonl_path, "pos")
     metrics = evaluate(retrieval_results, ground_truths)
@@ -283,4 +283,4 @@ def main():
     return metrics
 
 if __name__ == '__main__':
-    main()
\ No newline at end of file
+    main()

From 1496219127f1695690656f91940ae9c049025a0c Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 11 Mar 2024 10:58:41 +0800
Subject: [PATCH 037/151] Update test_evaluation.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tests/nightly/tools/test_evaluation.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py b/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py
index 99ff68bf468..0d7f93bdfb5 100644
--- a/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py
+++ b/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py
@@ -18,6 +18,7 @@
 import unittest, os, shutil
 from unittest.mock import patch
 from intel_extension_for_transformers.neural_chat.tools.evaluation.data_augmentation import retrieval_dataset_construction
+from intel_extension_for_transformers.neural_chat.tools.evaluation.retriever import evaluate_retrieval
 
 class TestEvaluation(unittest.TestCase):
     def setUp(self) -> None:
@@ -49,6 +50,15 @@ def test_retrieval_dataset_construction(self):
         with patch('sys.argv', ['python retrieval_dataset_construction.py'] + argv):
             retrieval_dataset_construction.main()
             self.assertTrue(os.path.exists("data_minedHN_split.jsonl"))
+            
+    def test_evaluate_retrieval(self):
+        argv = ['--index_file_jsonl_path', '/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/embedding_finetune/candidate_context.jsonl', \
+                '--query_file_jsonl_path', '/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/embedding_finetune/example.jsonl', \
+                '--embedding_model', '/tf_dataset2/inc-ut/gte-base']
+
+        with patch('sys.argv', ['python evaluate_retrieval.py'] + argv):
+            result = evaluate_retrieval.main()
+            self.assertIsNotNone(result)
 
 if __name__ == '__main__':
     unittest.main()

From 03a768ed3976dd8a3b98a94ef557ab5cfccd42a0 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 11 Mar 2024 02:58:59 +0000
Subject: [PATCH 038/151] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../neural_chat/tests/nightly/tools/test_evaluation.py          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py b/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py
index 0d7f93bdfb5..6aed5802275 100644
--- a/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py
+++ b/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py
@@ -50,7 +50,7 @@ def test_retrieval_dataset_construction(self):
         with patch('sys.argv', ['python retrieval_dataset_construction.py'] + argv):
             retrieval_dataset_construction.main()
             self.assertTrue(os.path.exists("data_minedHN_split.jsonl"))
-            
+
     def test_evaluate_retrieval(self):
         argv = ['--index_file_jsonl_path', '/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/embedding_finetune/candidate_context.jsonl', \
                 '--query_file_jsonl_path', '/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/embedding_finetune/example.jsonl', \

From 128d587f543da6b0a58b1f8a71575799a69e1696 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 11 Mar 2024 11:24:30 +0800
Subject: [PATCH 039/151] Update test_evaluation.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tests/nightly/tools/test_evaluation.py        | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py b/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py
index 6aed5802275..2472edb81c2 100644
--- a/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py
+++ b/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py
@@ -28,6 +28,8 @@ def setUp(self) -> None:
             os.remove("data_minedHN.jsonl")
         if os.path.exists("data_minedHN_split.jsonl"):
             os.remove("data_minedHN_split.jsonl")
+        if os.path.exists("output"):
+            shutil.rmtree("output", ignore_errors=True)
         return super().setUp()
 
     def tearDown(self) -> None:
@@ -37,6 +39,8 @@ def tearDown(self) -> None:
             os.remove("data_minedHN.jsonl")
         if os.path.exists("data_minedHN_split.jsonl"):
             os.remove("data_minedHN_split.jsonl")
+        if os.path.exists("output"):
+            shutil.rmtree("output", ignore_errors=True)
         return super().tearDown()
 
     def test_retrieval_dataset_construction(self):

From 705752a52e30aaaf76ec22270162188c18e0766f Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 11 Mar 2024 14:33:11 +0800
Subject: [PATCH 040/151] add README.md

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tools/evaluation/retriever/README.md      | 35 +++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/README.md

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/README.md
new file mode 100644
index 00000000000..b078373ceab
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/README.md
@@ -0,0 +1,35 @@
+# Retrieval Evaluation
+
+## 1. Introduction
+We provide a script to evaluate the performance of the retrieval. We use two metrics: MRR (Mean reciprocal rank) and Hit (Hit Ratio). MRR is an internationally accepted mechanism for evaluating search algorithms. MRR emphasizes the position of ground truth in the retrieval list, the higher it is, the better. Hit emphasizes the accuracy of retrieval, that is, whether the ground truth is included in the retrieval items.
+
+## 2. Requirements
+```
+git clone https://github.com/intel/intel-extension-for-transformers.git
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat
+pip install -r requirements.txt
+cd pipeline/plugins/retrieval
+pip install -r requirements.txt
+```
+
+## 3. Evaluate Retrieval
+```
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever
+python evaluate_retrieval.py \
+--index_file_jsonl_path /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/embedding_finetune/candidate_context.jsonl \
+--query_file_jsonl_path /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/embedding_finetune/example.jsonl
+```
+
+**Some Important Arguments**:
+- `index_file_jsonl_path`: path of JSON data including candidate context where each line is a dict like this:```{"context": List[str]}```.
+- `query_file_jsonl_path`: path of JSON data including queries and positives where each line is a dict like this:```{"query": str, "pos": List[str]}```.
+- `vector_database`: The vector database for constructing the knowledge base. The default value is "Chroma". The other option is "Qdrant".
+- `embedding_model`: The name or path for the text embedding model. The default value is "BAAI/bge-base-en-v1.5". Other options are "BAAI/bge-large-en-v1.5", "thenlper/gte-large", "infgrad/stella-base-en-v2", "thenlper/gte-base", "intfloat/e5-large-v2", "hkunlp/instructor-xl", and "hkunlp/instructor-large".
+- `retrieval_type`: The type of the retriever. The default value is "default". The other options are "child_parent" and "bm25".
+- `search_type`: Type of search to perform. The default value is "similarity". The other options are "mmr" and "similarity_score_threshold".
+
+## 4. Result
+The results include Top 1 and Top 5 of MRR and HR respectively.
+```
+{'MRR@1': 0.7, 'MRR@5': 0.72, 'Hit@1': 0.7, 'Hit@5': 0.8}
+```

From 675fe2ee85772c750ae0f0ea9c5baf4bfcd077d1 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Tue, 12 Mar 2024 15:56:28 +0800
Subject: [PATCH 041/151] Update prompt.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/prompts/prompt.py               | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/intel_extension_for_transformers/neural_chat/prompts/prompt.py b/intel_extension_for_transformers/neural_chat/prompts/prompt.py
index ccb2ae835da..41a1c50c303 100644
--- a/intel_extension_for_transformers/neural_chat/prompts/prompt.py
+++ b/intel_extension_for_transformers/neural_chat/prompts/prompt.py
@@ -340,3 +340,20 @@ def generate_sqlcoder_prompt(qurey, metadata_file):
 ---
 Generated questions:
 """
+
+TRUTHGENERATE_PROMPT = """
+Task: You are asked to act as a human annotator. Your role is to generate the right answer based on the context and question provided.
+Answers should aim to extract or clarify the key information of the question from the context, focusing on a single aspect or detail.
+The answer must be directly related to the context and the question, suitable for use in constructing a synthetic retrieval evaluation dataset.
+--- 
+Desired format:
+1. <ground_truth>
+--- 
+Question:
+### {question}
+--- 
+Context:
+### {context}
+--- 
+Generated ground_truth:
+"""

From 988e5427e63e2ef8ac1abc71c69403a90ba26166 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 12 Mar 2024 07:58:03 +0000
Subject: [PATCH 042/151] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../neural_chat/prompts/prompt.py                         | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/prompts/prompt.py b/intel_extension_for_transformers/neural_chat/prompts/prompt.py
index 41a1c50c303..3dc78b6b792 100644
--- a/intel_extension_for_transformers/neural_chat/prompts/prompt.py
+++ b/intel_extension_for_transformers/neural_chat/prompts/prompt.py
@@ -345,15 +345,15 @@ def generate_sqlcoder_prompt(qurey, metadata_file):
 Task: You are asked to act as a human annotator. Your role is to generate the right answer based on the context and question provided.
 Answers should aim to extract or clarify the key information of the question from the context, focusing on a single aspect or detail.
 The answer must be directly related to the context and the question, suitable for use in constructing a synthetic retrieval evaluation dataset.
---- 
+---
 Desired format:
 1. <ground_truth>
---- 
+---
 Question:
 ### {question}
---- 
+---
 Context:
 ### {context}
---- 
+---
 Generated ground_truth:
 """

From d0c3c3463de3b33df863d539cfba77c17a9b8df3 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Tue, 12 Mar 2024 16:01:10 +0800
Subject: [PATCH 043/151] add llm_generate_truth.py and data

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../evaluation/data_augmentation/answer.jsonl |  10 ++
 .../data_augmentation/augmented_example.jsonl |  10 ++
 .../data_augmentation/candidate_context.jsonl |  50 ++++++
 .../data_augmentation/example.jsonl           |  10 ++
 .../data_augmentation/ground_truth.jsonl      |  10 ++
 .../data_augmentation/llm_generate_truth.py   | 148 ++++++++++++++++++
 6 files changed, 238 insertions(+)
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/answer.jsonl
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/augmented_example.jsonl
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/candidate_context.jsonl
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_truth.py

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/answer.jsonl b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/answer.jsonl
new file mode 100644
index 00000000000..fe8976774e2
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/answer.jsonl
@@ -0,0 +1,10 @@
+{"question": "What types of platforms does the organization focus on?", "answer": "The organization focuses on delivering open software and hardware platforms with industry-defining standards, as well as leadership products, open and secure platforms, and resilient manufacturing."}
+{"question": "What are the core values that drive our company's actions?", "answer": "The core values driving the company's actions include focusing on having a positive impact on business, society, and the planet by working together with talented individuals. They also emphasize delivering leadership products, open and secure platforms, and resilient manufacturing to support global digitalization and ensure customer success."}
+{"question": "What types of companies does Intel invest in?", "answer": "Intel invests in public and private companies."}
+{"question": "How has technology been central to our lives in recent years?", "answer": "In recent years, technology has become more essential as it permeates various aspects of our daily lives. This includes advancements in communication, entertainment, transportation, healthcare, and many other sectors. All these rely heavily on semiconductors, which play a crucial role in powering and enabling these technologies."}
+{"question": "What is Intel's focus in terms of delivering leadership products?", "answer": "Intel's focus in terms of delivering leadership products includes providing open and secure platforms as well as resilient manufacturing for enabling global digitalization and fueling customer success."}
+{"question": "How has Intel been affected by the COVID-19 pandemic so far, and what?", "answer": "Intel has not provided specific details on how they have been directly affected by the COVID-19 pandemic. However, it can be inferred that like many other companies, they might have experienced challenges related to supply chain disruptions, workforce adjustments, and potential changes in demand for their products due to the global economic impact of the pandemic."}
+{"question": "How does the company protect personal data to prevent unauthorized access or misuse?", "answer": "The text provided doesn't specifically mention how the company protects personal data to prevent unauthorized access or misuse. However, it highlights the potential consequences of such incidents, which might imply that they have measures in place to minimize these risks."}
+{"question": "What are the conditions for accessing third-party IP?", "answer": "The conditions for accessing third-party IP can vary depending on the specific agreement between the parties involved. However, generally, it includes ensuring availability on commercially reasonable terms or at all."}
+{"question": "How many customers contribute to the majority of our revenue?", "answer": "A limited number of customers contribute to the majority of your revenue."}
+{"question": "When does Intel plan to deliver on its goal of five manufacturing technology nodes in four years?", "answer": "Intel remains on track to deliver on this goal within four years."}
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/augmented_example.jsonl b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/augmented_example.jsonl
new file mode 100644
index 00000000000..4ae26cb49b7
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/augmented_example.jsonl
@@ -0,0 +1,10 @@
+{"query": "What types of platforms does the organization focus on?", "pos": ["We aim to deliver open software and hardware platforms with industry-defining standards."], "neg": ["The COVID-19 pandemic could materially adversely affect our financial condition and results of operations.", "We rely on access to third-party IP, which may not be available to us on commercially reasonable terms or at all.", "Intel plans to regain transistor performance and power performance leadership by 2025, and we remain on track to deliver on our goal of five manufacturing technology nodes in four years.", "We invest in public and private companies and do not always realize a return on our investments.", "We receive a significant portion of our revenue from a limited number of customers."]}
+{"query": "What are the core values that drive our company's actions?", "pos": ["Our world-class talent is at the heart of everything we do. Together we strive to have a positive effect on business, society, and the planet."], "neg": ["Theft, loss, or misuse of personal data about our employees, customers, or other third parties could increase our expenses, damage our reputation, or result in legal or regulatory proceedings.", "Intel plans to regain transistor performance and power performance leadership by 2025, and we remain on track to deliver on our goal of five manufacturing technology nodes in four years.", "We aim to deliver open software and hardware platforms with industry-defining standards.", "The COVID-19 pandemic could materially adversely affect our financial condition and results of operations.", "We receive a significant portion of our revenue from a limited number of customers."]}
+{"query": "What types of companies does Intel invest in?", "pos": ["We invest in public and private companies and do not always realize a return on our investments."], "neg": ["The past several years demonstrated just how much technology is increasingly central to every aspect of our lives, all of which depends on semiconductors.", "We aim to deliver open software and hardware platforms with industry-defining standards.", "Our world-class talent is at the heart of everything we do. Together we strive to have a positive effect on business, society, and the planet.", "The COVID-19 pandemic could materially adversely affect our financial condition and results of operations.", "Intel plans to regain transistor performance and power performance leadership by 2025, and we remain on track to deliver on our goal of five manufacturing technology nodes in four years."]}
+{"query": "How has technology been central to our lives in recent years?", "pos": ["The past several years demonstrated just how much technology is increasingly central to every aspect of our lives, all of which depends on semiconductors."], "neg": ["Our world-class talent is at the heart of everything we do. Together we strive to have a positive effect on business, society, and the planet.", "We rely on access to third-party IP, which may not be available to us on commercially reasonable terms or at all.", "We receive a significant portion of our revenue from a limited number of customers.", "Intel plans to regain transistor performance and power performance leadership by 2025, and we remain on track to deliver on our goal of five manufacturing technology nodes in four years.", "We invest in public and private companies and do not always realize a return on our investments."]}
+{"query": "What is Intel's focus in terms of delivering leadership products?", "pos": ["With our focus on delivering leadership products, open and secure platforms and resilient manufacturing, Intel has the right strategy in place to enable this global digitalization and fuel customer success."], "neg": ["The past several years demonstrated just how much technology is increasingly central to every aspect of our lives, all of which depends on semiconductors.", "Theft, loss, or misuse of personal data about our employees, customers, or other third parties could increase our expenses, damage our reputation, or result in legal or regulatory proceedings.", "We rely on access to third-party IP, which may not be available to us on commercially reasonable terms or at all.", "Our world-class talent is at the heart of everything we do. Together we strive to have a positive effect on business, society, and the planet.", "We aim to deliver open software and hardware platforms with industry-defining standards."]}
+{"query": "How has Intel been affected by the COVID-19 pandemic so far, and what?", "pos": ["The COVID-19 pandemic could materially adversely affect our financial condition and results of operations."], "neg": ["The past several years demonstrated just how much technology is increasingly central to every aspect of our lives, all of which depends on semiconductors.", "We invest in public and private companies and do not always realize a return on our investments.", "Our world-class talent is at the heart of everything we do. Together we strive to have a positive effect on business, society, and the planet.", "Theft, loss, or misuse of personal data about our employees, customers, or other third parties could increase our expenses, damage our reputation, or result in legal or regulatory proceedings.", "With our focus on delivering leadership products, open and secure platforms and resilient manufacturing, Intel has the right strategy in place to enable this global digitalization and fuel customer success."]}
+{"query": "How does the company protect personal data to prevent unauthorized access or misuse?", "pos": ["Theft, loss, or misuse of personal data about our employees, customers, or other third parties could increase our expenses, damage our reputation, or result in legal or regulatory proceedings."], "neg": ["We aim to deliver open software and hardware platforms with industry-defining standards.", "Our world-class talent is at the heart of everything we do. Together we strive to have a positive effect on business, society, and the planet.", "Intel plans to regain transistor performance and power performance leadership by 2025, and we remain on track to deliver on our goal of five manufacturing technology nodes in four years.", "The past several years demonstrated just how much technology is increasingly central to every aspect of our lives, all of which depends on semiconductors.", "With our focus on delivering leadership products, open and secure platforms and resilient manufacturing, Intel has the right strategy in place to enable this global digitalization and fuel customer success."]}
+{"query": "What are the conditions for accessing third-party IP?", "pos": ["We rely on access to third-party IP, which may not be available to us on commercially reasonable terms or at all."], "neg": ["The past several years demonstrated just how much technology is increasingly central to every aspect of our lives, all of which depends on semiconductors.", "With our focus on delivering leadership products, open and secure platforms and resilient manufacturing, Intel has the right strategy in place to enable this global digitalization and fuel customer success.", "We invest in public and private companies and do not always realize a return on our investments.", "We receive a significant portion of our revenue from a limited number of customers.", "We aim to deliver open software and hardware platforms with industry-defining standards."]}
+{"query": "How many customers contribute to the majority of our revenue?", "pos": ["We receive a significant portion of our revenue from a limited number of customers."], "neg": ["Our world-class talent is at the heart of everything we do. Together we strive to have a positive effect on business, society, and the planet.", "The past several years demonstrated just how much technology is increasingly central to every aspect of our lives, all of which depends on semiconductors.", "We invest in public and private companies and do not always realize a return on our investments.", "Intel plans to regain transistor performance and power performance leadership by 2025, and we remain on track to deliver on our goal of five manufacturing technology nodes in four years.", "We rely on access to third-party IP, which may not be available to us on commercially reasonable terms or at all."]}
+{"query": "When does Intel plan to deliver on its goal of five manufacturing technology nodes in four years?", "pos": ["Intel plans to regain transistor performance and power performance leadership by 2025, and we remain on track to deliver on our goal of five manufacturing technology nodes in four years."], "neg": ["Our world-class talent is at the heart of everything we do. Together we strive to have a positive effect on business, society, and the planet.", "The past several years demonstrated just how much technology is increasingly central to every aspect of our lives, all of which depends on semiconductors.", "We receive a significant portion of our revenue from a limited number of customers.", "We invest in public and private companies and do not always realize a return on our investments.", "We aim to deliver open software and hardware platforms with industry-defining standards."]}
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/candidate_context.jsonl b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/candidate_context.jsonl
new file mode 100644
index 00000000000..1c9589eb633
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/candidate_context.jsonl
@@ -0,0 +1,50 @@
+{"context": ["Intel is creating a sustainable, resilient, and secure supply chain."]}
+{"context": ["We are fueling customer growth through tech innovation."]}
+{"context": ["Intel plans to regain transistor performance and power performance leadership by 2025, and we remain on track to deliver on our goal of five manufacturing technology nodes in four years."]}
+{"context": ["Intel is focused on navigating the challenges of today, while anticipating the needs of the future."]}
+{"context": ["The past several years demonstrated just how much technology is increasingly central to every aspect of our lives, all of which depends on semiconductors."]}
+{"context": ["With our focus on delivering leadership products, open and secure platforms and resilient manufacturing, Intel has the right strategy in place to enable this global digitalization and fuel customer success."]}
+{"context": ["Lead and democratize compute with Intel x86 and xPU."]}
+{"context": ["We aim to deliver open software and hardware platforms with industry-defining standards."]}
+{"context": ["Our world-class talent is at the heart of everything we do. Together we strive to have a positive effect on business, society, and the planet."]}
+{"context": ["Demand for our products is variable and hard to predict."]}
+{"context": ["Due to the complexity of our manufacturing operations, we are not always able to timely respond to fluctuations in demand and we may incur significant charges and costs."]}
+{"context": ["We face significant competition."]}
+{"context": ["We invest significantly in R&D, and to the extent our R&D efforts are unsuccessful, our competitive position can be harmed, and we may not realize a return on our investments."]}
+{"context": ["Our investments in new businesses, products, and technologies are inherently risky and do not always succeed."]}
+{"context": ["Changes in the mix of products sold can materially impact our financial results."]}
+{"context": ["We are subject to risks associated with the development and implementation of new manufacturing technologies."]}
+{"context": ["We face supply chain risks."]}
+{"context": ["Our disaggregated design strategy introduces additional production risks."]}
+{"context": ["We are subject to the risks of product defects, errata, or other product issues."]}
+{"context": ["We face risks related to security vulnerabilities in our products."]}
+{"context": ["We are subject to risks associated with environmental, health, and safety and product regulations."]}
+{"context": ["We have established and report on our initiatives, aspirations, and goals related to corporate responsibility matters, which exposes us to numerous risks."]}
+{"context": ["The COVID-19 pandemic could materially adversely affect our financial condition and results of operations."]}
+{"context": ["We operate globally and are subject to significant risks in many jurisdictions."]}
+{"context": ["Global or regional conditions can harm our financial results."]}
+{"context": ["We are subject to risks related to trade policies and regulations."]}
+{"context": ["Laws and regulations can have a negative impact on our business."]}
+{"context": ["We are affected by fluctuations in currency exchange rates."]}
+{"context": ["Changes in our effective tax rate may impact our net income."]}
+{"context": ["Catastrophic events can have a material adverse effect on our operations and financial results."]}
+{"context": ["Damage to our reputation can damage our business."]}
+{"context": ["We are subject to cybersecurity and privacy risks."]}
+{"context": ["We face risks related to cybersecurity threats and incidents."]}
+{"context": ["Theft, loss, or misuse of personal data about our employees, customers, or other third parties could increase our expenses, damage our reputation, or result in legal or regulatory proceedings."]}
+{"context": ["We are subject to IP risks and risks associated with litigation and regulatory proceedings."]}
+{"context": ["We cannot always protect our IP or enforce our IP rights."]}
+{"context": ["Our licenses with other companies and participation in industry initiatives at times allow competitors to use some of our patent rights."]}
+{"context": ["Third parties assert claims based on IP rights against us and our products, which could harm our business."]}
+{"context": ["We rely on access to third-party IP, which may not be available to us on commercially reasonable terms or at all."]}
+{"context": ["We are subject to risks associated with litigation and regulatory matters."]}
+{"context": ["We must attract, retain, and motivate key employees."]}
+{"context": ["We are subject to risks associated with our strategic transactions."]}
+{"context": ["Our acquisitions, divestitures, and other strategic transactions could fail to achieve our financial or strategic objectives, disrupt our ongoing business, and adversely impact our results of operations."]}
+{"context": ["We invest in public and private companies and do not always realize a return on our investments."]}
+{"context": ["We face risks related to our debt obligations."]}
+{"context": ["We are subject to sales-related risks."]}
+{"context": ["We face risks related to sales through distributors and other third parties."]}
+{"context": ["We receive a significant portion of our revenue from a limited number of customers."]}
+{"context": ["We face risks related to transactions with government entities."]}
+{"context": ["We have fluctuations in our stock price and the amount and frequency of our stock repurchases."]}
\ No newline at end of file
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl
new file mode 100644
index 00000000000..06bf031621e
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl
@@ -0,0 +1,10 @@
+{"query": "What types of platforms does the organization focus on?", "pos": ["We aim to deliver open software and hardware platforms with industry-defining standards."]}
+{"query": "What are the core values that drive our company's actions?", "pos": ["Our world-class talent is at the heart of everything we do. Together we strive to have a positive effect on business, society, and the planet."]}
+{"query": "What types of companies does Intel invest in?", "pos": ["We invest in public and private companies and do not always realize a return on our investments."]}
+{"query": "How has technology been central to our lives in recent years?", "pos": ["The past several years demonstrated just how much technology is increasingly central to every aspect of our lives, all of which depends on semiconductors."]}
+{"query": "What is Intel's focus in terms of delivering leadership products?", "pos": ["With our focus on delivering leadership products, open and secure platforms and resilient manufacturing, Intel has the right strategy in place to enable this global digitalization and fuel customer success."]}
+{"query": "How has Intel been affected by the COVID-19 pandemic so far, and what?", "pos": ["The COVID-19 pandemic could materially adversely affect our financial condition and results of operations."]}
+{"query": "How does the company protect personal data to prevent unauthorized access or misuse?", "pos": ["Theft, loss, or misuse of personal data about our employees, customers, or other third parties could increase our expenses, damage our reputation, or result in legal or regulatory proceedings."]}
+{"query": "What are the conditions for accessing third-party IP?", "pos": ["We rely on access to third-party IP, which may not be available to us on commercially reasonable terms or at all."]}
+{"query": "How many customers contribute to the majority of our revenue?", "pos": ["We receive a significant portion of our revenue from a limited number of customers."]}
+{"query": "When does Intel plan to deliver on its goal of five manufacturing technology nodes in four years?", "pos": ["Intel plans to regain transistor performance and power performance leadership by 2025, and we remain on track to deliver on our goal of five manufacturing technology nodes in four years."]}
\ No newline at end of file
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl
new file mode 100644
index 00000000000..cd471552fe6
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl
@@ -0,0 +1,10 @@
+{"question": "What types of platforms does the organization focus on?", "context": ["We aim to deliver open software and hardware platforms with industry-defining standards."], "ground_truth": "open software and hardware platforms"}
+{"question": "What are the core values that drive our company's actions?", "context": ["Our world-class talent is at the heart of everything we do. Together we strive to have a positive effect on business, society, and the planet."], "ground_truth": "The core values driving the company's actions include focus on talent, positively impacting business, society, and the environment."}
+{"question": "What types of companies does Intel invest in?", "context": ["We invest in public and private companies and do not always realize a return on our investments."], "ground_truth": "Intel invests in public and private companies."}
+{"question": "How has technology been central to our lives in recent years?", "context": ["The past several years demonstrated just how much technology is increasingly central to every aspect of our lives, all of which depends on semiconductors."], "ground_truth": "Technology has become integral to various aspects of life through its dependence on semiconductors."}
+{"question": "What is Intel's focus in terms of delivering leadership products?", "context": ["With our focus on delivering leadership products, open and secure platforms and resilient manufacturing, Intel has the right strategy in place to enable this global digitalization and fuel customer success."], "ground_truth": "Intel's focus in terms of delivering leadership products is on open and secure platforms and resilient manufacturing."}
+{"question": "How has Intel been affected by the COVID-19 pandemic so far, and what?", "context": ["The COVID-19 pandemic could materially adversely affect our financial condition and results of operations."], "ground_truth": "Intel has been financially impacted by the COVID-19 pandemic."}
+{"question": "How does the company protect personal data to prevent unauthorized access or misuse?", "context": ["Theft, loss, or misuse of personal data about our employees, customers, or other third parties could increase our expenses, damage our reputation, or result in legal or regulatory proceedings."], "ground_truth": "The company focuses on preventing unauthorized access or misuse of personal data by protecting it from theft, loss, or misuse."}
+{"question": "What are the conditions for accessing third-party IP?", "context": ["We rely on access to third-party IP, which may not be available to us on commercially reasonable terms or at all."], "ground_truth": "The conditions for accessing third-party IP depend on factors such as availability, commercial terms, and reasonableness."}
+{"question": "How many customers contribute to the majority of our revenue?", "context": ["We receive a significant portion of our revenue from a limited number of customers."], "ground_truth": "a limited number of customers"}
+{"question": "When does Intel plan to deliver on its goal of five manufacturing technology nodes in four years?", "context": ["Intel plans to regain transistor performance and power performance leadership by 2025, and we remain on track to deliver on our goal of five manufacturing technology nodes in four years."], "ground_truth": "Intel aims to achieve five manufacturing technology nodes within four years, with the goal being fulfilled by 2025."}
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_truth.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_truth.py
new file mode 100644
index 00000000000..813cd959daf
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_truth.py
@@ -0,0 +1,148 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from modelscope import AutoModelForCausalLM, AutoTokenizer  # pylint: disable=E0401
+import jsonlines
+import re
+import logging
+from intel_extension_for_transformers.neural_chat.prompts.prompt import QUERYGENERATE_PROMPT
+from transformers import GenerationConfig
+import argparse
+
+logging.basicConfig(
+    format="%(asctime)s %(name)s:%(levelname)s:%(message)s",
+    datefmt="%d-%M-%Y %H:%M:%S",
+    level=logging.INFO
+)
+
+device = "cuda" if torch.cuda.is_available() else "cpu"
+
+def document_set(document_file_jsonl_path):
+    document_list = []
+    with open(document_file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            passages=[stu["query"],stu["pos"][0]]
+            document_list.append(passages)
+    return document_list
+
+def raw_data_generate(model_id,
+                      base_dir,
+                      file_json_path,
+                      temperature,
+                      top_p,
+                      top_k,
+                      repetition_penalty,
+                      max_new_tokens,
+                      do_sample,
+                      num_beams,
+                      num_return_sequences,
+                      use_cache):
+   tokenizer = AutoTokenizer.from_pretrained(model_id)
+   model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto', torch_dtype=torch.float16)
+   documents = document_set(base_dir)
+   generation_config = GenerationConfig(
+   temperature = temperature,
+   top_p = top_p,
+   top_k = top_k,
+   repetition_penalty = repetition_penalty,
+   max_new_tokens = max_new_tokens,
+   do_sample = do_sample,
+   num_beams = num_beams,
+   num_return_sequences = num_return_sequences,
+   use_cache = use_cache,
+   pad_token_id=tokenizer.eos_token_id
+   )
+
+   for i in range(len(documents)):
+      [question, context] = documents[i]
+
+      if context:
+         input = TRUTHGENERATE_PROMPT.format(question=question,context=context)
+         if device=="cpu":
+            model_input = tokenizer(input, return_tensors="pt")
+         elif device=="cuda":
+            model_input = tokenizer(input, return_tensors="pt").to("cuda")
+         model.eval()
+
+         with torch.no_grad():
+            res = model.generate(**model_input, generation_config=generation_config)[0]
+            res=tokenizer.decode(res, skip_special_tokens=True)
+
+         res = res[res.find('Generated ground_truth:') :]
+         res = re.sub('Generated ground_truth:', '', res)
+         res = re.sub('---', '', res)
+
+         result_str=res.replace('#', " ").replace(r'\t', " ").replace('\n', ' ').replace('\n\n', ' ').strip()
+
+         print('result_str',result_str)
+         if result_str and result_str.isspace()==False:
+            data = {
+                     "question": question,
+                     "context": [context],
+                     "ground_truth": result_str,
+               }
+            with jsonlines.open(file_json_path,"a") as file_json:
+                  file_json.write(data)
+
+def main():   
+   parser = argparse.ArgumentParser()
+   parser.add_argument("--llm_model", type=str)
+   parser.add_argument("--input", type=str)
+   parser.add_argument("--output", type=str)
+
+   parser.add_argument("--temperature", type=float, default=0.8)
+   parser.add_argument("--top_p", type=float, default=0.9)
+   parser.add_argument("--top_k", type=int, default=40)
+   parser.add_argument("--repetition_penalty", type=float, default=2.0)
+   parser.add_argument("--max_new_tokens", type=int, default=48)
+   parser.add_argument("--do_sample", type=bool, default=True)
+   parser.add_argument("--num_beams", type=int, default=2)
+   parser.add_argument("--num_return_sequences", type=int, default=2)
+   parser.add_argument("--use_cache", type=bool, default=True)
+
+   args = parser.parse_args()
+
+   llm_model = args.llm_model
+   input = args.input
+   output = args.output
+
+   temperature = args.temperature
+   top_p = args.top_p
+   top_k = args.top_k
+   repetition_penalty = args.repetition_penalty
+   max_new_tokens = args.max_new_tokens
+   do_sample = args.do_sample
+   num_beams = args.num_beams
+   num_return_sequences = args.num_return_sequences
+   use_cache = args.use_cache
+
+   raw_data_generate(llm_model,
+                     input,
+                     output,
+                     temperature,
+                     top_p,
+                     top_k,
+                     repetition_penalty,
+                     max_new_tokens,
+                     do_sample,
+                     num_beams,
+                     num_return_sequences,
+                     use_cache)
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file

From be1106b47e35b5ad23057f685b577b23b17fd06b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 12 Mar 2024 08:02:13 +0000
Subject: [PATCH 044/151] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../tools/evaluation/data_augmentation/llm_generate_truth.py  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_truth.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_truth.py
index 813cd959daf..19277db9f9b 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_truth.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_truth.py
@@ -99,7 +99,7 @@ def raw_data_generate(model_id,
             with jsonlines.open(file_json_path,"a") as file_json:
                   file_json.write(data)
 
-def main():   
+def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--llm_model", type=str)
    parser.add_argument("--input", type=str)
@@ -145,4 +145,4 @@ def main():
                      use_cache)
 
 if __name__ == '__main__':
-    main()
\ No newline at end of file
+    main()

From 48788d4156085cb059b2fad6c36b61de286dd752 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Tue, 12 Mar 2024 16:08:15 +0800
Subject: [PATCH 045/151] add ragas_evaluation.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../evaluation/framework/ragas_evaluation.py  | 62 +++++++++++++++++++
 1 file changed, 62 insertions(+)
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py
new file mode 100644
index 00000000000..83f173b314d
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py
@@ -0,0 +1,62 @@
+from datasets import Dataset 
+import os
+from ragas import evaluate
+from ragas.metrics import (
+    answer_relevancy,
+    faithfulness,
+    context_recall,
+    context_precision,
+)
+import pandas as pd
+import jsonlines
+import argparse
+
+
+pd.set_option("display.max_rows", None)
+pd.set_option("display.max_columns", None)
+pd.set_option("display.width", None)
+pd.set_option("display.max_colwidth", 10)
+
+def load_set(file_jsonl_path, item):
+    list = []
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            passages=stu[item]
+            list.append(passages)
+    return list
+
+def ragas(answer_file, ground_truth_file, openai_api_key):
+    os.environ["OPENAI_API_KEY"] = openai_api_key
+    question_list=load_set(answer_file, "question")
+    answer_list=load_set(answer_file, "answer")
+    contexts_list=load_set(ground_truth_file, "context")
+    ground_truth_list=load_set(ground_truth_file, "ground_truth")
+
+    data_samples = {
+        'question': question_list,
+        'answer': answer_list,
+        'contexts' : contexts_list,
+        'ground_truth': ground_truth_list
+    }
+
+    dataset = Dataset.from_dict(data_samples)
+
+    score = evaluate(dataset,metrics=[answer_relevancy, faithfulness, context_recall, context_precision])
+    df=score.to_pandas()
+    print(df)
+
+def main():   
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--answer_file", type=str)
+    parser.add_argument("--ground_truth_file", type=str)
+    parser.add_argument("--openai_api_key", type=str)
+    args = parser.parse_args()
+
+    answer_file = args.answer_file
+    ground_truth_file = args.ground_truth_file
+    openai_api_key = args.openai_api_key
+
+    ragas(answer_file, ground_truth_file, openai_api_key)
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file

From 54cc6c0a4a166c55fef32d39afe054cd969e2734 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 12 Mar 2024 08:08:36 +0000
Subject: [PATCH 046/151] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../evaluation/framework/ragas_evaluation.py  | 20 ++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py
index 83f173b314d..29a5eb48987 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py
@@ -1,4 +1,18 @@
-from datasets import Dataset 
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from datasets import Dataset
 import os
 from ragas import evaluate
 from ragas.metrics import (
@@ -45,7 +59,7 @@ def ragas(answer_file, ground_truth_file, openai_api_key):
     df=score.to_pandas()
     print(df)
 
-def main():   
+def main():
     parser = argparse.ArgumentParser()
     parser.add_argument("--answer_file", type=str)
     parser.add_argument("--ground_truth_file", type=str)
@@ -59,4 +73,4 @@ def main():
     ragas(answer_file, ground_truth_file, openai_api_key)
 
 if __name__ == '__main__':
-    main()
\ No newline at end of file
+    main()

From e1b55855068ed7ba8ef45dbe2cb991b965b07699 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Tue, 12 Mar 2024 16:10:14 +0800
Subject: [PATCH 047/151] Create requirements.txt

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tools/evaluation/framework/requirements.txt      | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/framework/requirements.txt

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/requirements.txt b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/requirements.txt
new file mode 100644
index 00000000000..9509ec12878
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/requirements.txt
@@ -0,0 +1 @@
+ragas

From 88a4293937f57ea7897400fb7882ed2f96da8a05 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Tue, 12 Mar 2024 16:47:44 +0800
Subject: [PATCH 048/151] Update llm_generate_truth.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tools/evaluation/data_augmentation/llm_generate_truth.py  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_truth.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_truth.py
index 19277db9f9b..54a1d99367b 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_truth.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_truth.py
@@ -20,7 +20,7 @@
 import jsonlines
 import re
 import logging
-from intel_extension_for_transformers.neural_chat.prompts.prompt import QUERYGENERATE_PROMPT
+from intel_extension_for_transformers.neural_chat.prompts.prompt import TRUTHGENERATE_PROMPT
 from transformers import GenerationConfig
 import argparse
 
@@ -145,4 +145,4 @@ def main():
                      use_cache)
 
 if __name__ == '__main__':
-    main()
+    main()

From 83060f94bcf021c41aee0deeacbf588024f0d555 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Tue, 12 Mar 2024 16:48:20 +0800
Subject: [PATCH 049/151] Update evaluate_retrieval.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tools/evaluation/retriever/evaluate_retrieval.py        | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval.py
index 02ae5d5971d..71a79e041ee 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval.py
@@ -18,14 +18,14 @@
 from typing import ClassVar, Collection
 from intel_extension_for_transformers.langchain.embeddings import HuggingFaceEmbeddings, \
     HuggingFaceInstructEmbeddings, HuggingFaceBgeEmbeddings
-from langchain.embeddings import GooglePalmEmbeddings
+from langchain.embeddings import GooglePalmEmbeddings   # pylint: disable=E0611
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from intel_extension_for_transformers.langchain.vectorstores import Chroma, Qdrant
 import uuid
 from langchain_core.documents import Document
 from intel_extension_for_transformers.langchain.retrievers import ChildParentRetriever
 from langchain_core.vectorstores import VectorStoreRetriever
-from langchain.retrievers import BM25Retriever
+from langchain.retrievers import BM25Retriever  # pylint: disable=E0611
 import jsonlines
 import numpy as np
 import logging
@@ -283,4 +283,4 @@ def main():
     return metrics
 
 if __name__ == '__main__':
-    main()
+    main()

From 76b11750063bed87bdac2ea47ac845f9b72040c5 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Tue, 12 Mar 2024 16:48:39 +0800
Subject: [PATCH 050/151] Update ragas_evaluation.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tools/evaluation/framework/ragas_evaluation.py            | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py
index 29a5eb48987..7e7bdc48f97 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py
@@ -14,8 +14,8 @@
 
 from datasets import Dataset
 import os
-from ragas import evaluate
-from ragas.metrics import (
+from ragas import evaluate   # pylint: disable=E0401
+from ragas.metrics import (    # pylint: disable=E0401
     answer_relevancy,
     faithfulness,
     context_recall,

From b775095ab984d9094159f7a6859df52723a42fc1 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Tue, 12 Mar 2024 17:05:44 +0800
Subject: [PATCH 051/151] Update test_evaluation.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tests/nightly/tools/test_evaluation.py    | 21 ++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py b/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py
index 2472edb81c2..e668ac59a56 100644
--- a/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py
+++ b/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py
@@ -17,9 +17,10 @@
 
 import unittest, os, shutil
 from unittest.mock import patch
-from intel_extension_for_transformers.neural_chat.tools.evaluation.data_augmentation import retrieval_dataset_construction
+from intel_extension_for_transformers.neural_chat.tools.evaluation.data_augmentation import retrieval_dataset_construction, llm_generate_truth
 from intel_extension_for_transformers.neural_chat.tools.evaluation.retriever import evaluate_retrieval
 
+
 class TestEvaluation(unittest.TestCase):
     def setUp(self) -> None:
         if os.path.exists("data.jsonl"):
@@ -28,6 +29,8 @@ def setUp(self) -> None:
             os.remove("data_minedHN.jsonl")
         if os.path.exists("data_minedHN_split.jsonl"):
             os.remove("data_minedHN_split.jsonl")
+        if os.path.exists("ground_truth.jsonl"):
+            os.remove("ground_truth.jsonl")
         if os.path.exists("output"):
             shutil.rmtree("output", ignore_errors=True)
         return super().setUp()
@@ -39,6 +42,8 @@ def tearDown(self) -> None:
             os.remove("data_minedHN.jsonl")
         if os.path.exists("data_minedHN_split.jsonl"):
             os.remove("data_minedHN_split.jsonl")
+        if os.path.exists("ground_truth.jsonl"):
+            os.remove("ground_truth.jsonl")
         if os.path.exists("output"):
             shutil.rmtree("output", ignore_errors=True)
         return super().tearDown()
@@ -50,16 +55,22 @@ def test_retrieval_dataset_construction(self):
                 '--output', 'data', \
                 '--range_for_sampling', '2-2', \
                 '--negative_number', '1']
-
         with patch('sys.argv', ['python retrieval_dataset_construction.py'] + argv):
             retrieval_dataset_construction.main()
             self.assertTrue(os.path.exists("data_minedHN_split.jsonl"))
 
+    def test_llm_generate_truth(self):
+        argv = ['--llm_model', '/tf_dataset2/models/nlp_toolkit/neural-chat-7b-v3-1', \
+                '--input', '/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl', \
+                '--output', 'ground_truth.jsonl']
+        with patch('sys.argv', ['python llm_generate_truth.py'] + argv):
+            llm_generate_truth.main()
+            self.assertTrue(os.path.exists("ground_truth.jsonl"))
+
     def test_evaluate_retrieval(self):
-        argv = ['--index_file_jsonl_path', '/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/embedding_finetune/candidate_context.jsonl', \
-                '--query_file_jsonl_path', '/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/embedding_finetune/example.jsonl', \
+        argv = ['--index_file_jsonl_path', '/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/candidate_context.jsonl', \
+                '--query_file_jsonl_path', '/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl', \
                 '--embedding_model', '/tf_dataset2/inc-ut/gte-base']
-
         with patch('sys.argv', ['python evaluate_retrieval.py'] + argv):
             result = evaluate_retrieval.main()
             self.assertIsNotNone(result)

From edbb32cdecaa4be32af05fbc754dcf851ba55747 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Tue, 12 Mar 2024 17:06:18 +0800
Subject: [PATCH 052/151] Update llm_generate_truth.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tools/evaluation/data_augmentation/llm_generate_truth.py     | 1 -
 1 file changed, 1 deletion(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_truth.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_truth.py
index 54a1d99367b..89e6c985c10 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_truth.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_truth.py
@@ -89,7 +89,6 @@ def raw_data_generate(model_id,
 
          result_str=res.replace('#', " ").replace(r'\t', " ").replace('\n', ' ').replace('\n\n', ' ').strip()
 
-         print('result_str',result_str)
          if result_str and result_str.isspace()==False:
             data = {
                      "question": question,

From 8962abfffc19971870ee62b6b0ae5f5b0cd3c625 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Thu, 14 Mar 2024 14:35:46 +0800
Subject: [PATCH 053/151] Update README.md

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../evaluation/data_augmentation/README.md    | 45 +++++++++++++++++--
 1 file changed, 42 insertions(+), 3 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md
index bf28652297b..ff29509669c 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md
@@ -1,7 +1,13 @@
 # Retrieval Data Augmentation
 
 ## 1. Introduction
-In this example, we show how to do data augmentation to construct a retrieval dataset. Specifically, the effect is to generate specific open-ended questions based on the context of the input file provided. The questions are directly related to the context to form a query-positive pair, suitable for use in constructing a retrieval dataset.
+In this example, we show how to do data augmentation to construct a retrieval dataset. 
+
+* **Context to Question and Mine Hard Negatives**
+The effect is to generate several specific open-ended questions based on the context of the input file provided. The questions are directly related to the context to form a query-positive pair, suitable for use in constructing a retrieval dataset. Then we sample some from the entire corpus as the negatives by mining hard negatives, which is a widely used method to improve the quality of finetuning sentence embedding models.
+
+* **Context, Question to Ground Truth**
+The effect is to generate the right answer based on the context and question provided. The answer is directly related to the context and the question, suitable for use in constructing a synthetic retrieval evaluation dataset.
 
 ## 2. Supported Devices
 CPU, CUDA
@@ -28,6 +34,7 @@ pip install -r requirements_cuda.txt
 ```
 
 ## 4. Retrieval Dataset Construction
+### Context to Questions and Mine Hard Negatives
 * **On CPU**
 ```
 cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation
@@ -66,10 +73,42 @@ python -m data_augmentation.retrieval_dataset_construction \
 - `use_gpu_for_searching`: Whether to use faiss-gpu to retrieve negatives. The default value is False.
 - `similarity_threshold`: The cosine similarity threshold used to filter the generated queries. The default value is 0.6.
 
-## 5. Result
+**Result**:
 Three files will be generated. The default output files are `data.jsonl`, `data_minedHN.jsonl`, `data_minedHN_split.jsonl`. The third is the final output dataset, where each line is a dict like this:
 ```
 {"query": str, "pos": List[str], "neg": List[str]}
 ```
 `query` is the query, and `pos` is a positive text, based on the context of the input file provided, `neg` is a list of negative texts.
-See [augmented_example.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/embedding_finetune/augmented_example.jsonl) for a data file.
+See [augmented_example.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/augmented_example.jsonl) for a data file.
+
+
+### Context, Question to Ground Truth
+```
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation
+python llm_generate_truth.py \
+--llm_model <llm model path> \
+--input example.jsonl \
+--output ground_truth.jsonl
+```
+
+**Some Important Arguments**:
+- `llm_model`: The path for the LLM model.
+- `input`: The path of JSON data including queries and positives where each line is a dict like this:```{"query": str, "pos": List[str]}```. See [example.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl) for a data file.
+- `output`: The path of the output JSON data.
+- `temperature`: The value is used to modulate the next token probabilities, and will influence the distribution of similarity scores. The default value is 0.8.
+- `top_p`: If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. The default value is 0.9.
+- `top_k`: The number of highest probability vocabulary tokens to keep for top-k-filtering. The default value is 40.
+- `repetition_penalty`: The parameter for repetition penalty. 1.0 means no penalty. The default value is 2.0.
+- `max_new_tokens`: The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt. The default value is 48.
+- `do_sample`: Whether or not to use sampling ; use greedy decoding otherwise. The default value is True.
+- `num_beams`: Number of beams for beam search. 1 means no beam search. The default value is 2.
+- `num_return_sequences`: The number of independently computed returned sequences for each element in the batch. The default value is 2.
+- `use_cache`: Whether or not the model should use the past last key/values attentions (if applicable to the model) to speed up decoding. The default value is True.
+
+**Result**:
+Each line of the output JSON data is a dict like this:
+```
+{"question": str, "context": List[str], "ground_truth": str}
+```
+`ground_truth` is the generated ground truth, based on the question and context provided.
+See [ground_truth.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl) for a data file.

From 2ef4e0528b6faab9d4448e6212d7bb679f2506aa Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Thu, 14 Mar 2024 14:42:07 +0800
Subject: [PATCH 054/151] Update README.md

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tools/evaluation/retriever/README.md      | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/README.md
index b078373ceab..b9c860db9e7 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/README.md
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/README.md
@@ -16,13 +16,13 @@ pip install -r requirements.txt
 ```
 cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever
 python evaluate_retrieval.py \
---index_file_jsonl_path /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/embedding_finetune/candidate_context.jsonl \
---query_file_jsonl_path /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/embedding_finetune/example.jsonl
+--index_file_jsonl_path /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/candidate_context.jsonl \
+--query_file_jsonl_path /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl
 ```
 
 **Some Important Arguments**:
-- `index_file_jsonl_path`: path of JSON data including candidate context where each line is a dict like this:```{"context": List[str]}```.
-- `query_file_jsonl_path`: path of JSON data including queries and positives where each line is a dict like this:```{"query": str, "pos": List[str]}```.
+- `index_file_jsonl_path`: path of JSON data including candidate context where each line is a dict like this:```{"context": List[str]}```. See [candidate_context.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/candidate_context.jsonl) for a data file.
+- `query_file_jsonl_path`: path of JSON data including queries and positives where each line is a dict like this:```{"query": str, "pos": List[str]}```. See [example.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl) for a data file.
 - `vector_database`: The vector database for constructing the knowledge base. The default value is "Chroma". The other option is "Qdrant".
 - `embedding_model`: The name or path for the text embedding model. The default value is "BAAI/bge-base-en-v1.5". Other options are "BAAI/bge-large-en-v1.5", "thenlper/gte-large", "infgrad/stella-base-en-v2", "thenlper/gte-base", "intfloat/e5-large-v2", "hkunlp/instructor-xl", and "hkunlp/instructor-large".
 - `retrieval_type`: The type of the retriever. The default value is "default". The other options are "child_parent" and "bm25".

From d2ab7d8061d5185e6535244a2ee590fa3949d765 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Thu, 14 Mar 2024 15:06:02 +0800
Subject: [PATCH 055/151] add README.md

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tools/evaluation/framework/README.md      | 41 +++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
new file mode 100644
index 00000000000..d84d3417a86
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
@@ -0,0 +1,41 @@
+# Ragas Evaluation
+
+## 1. Introduction
+Ragas is a framework that helps you evaluate your Retrieval Augmented Generation (RAG) pipelines. We provide a script to use Ragas based on data files.
+
+## 2. Requirements
+```
+git clone https://github.com/intel/intel-extension-for-transformers.git
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat
+pip install -r requirements.txt
+cd pipeline/plugins/retrieval
+pip install -r requirements.txt
+cd ../../../
+cd tools/evaluation/framework
+pip install -r requirements.txt
+```
+
+## 3. Evaluate Retrieval
+```
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/framework
+python ragas_evaluation.py \
+--answer_file /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/answer.jsonl \
+--ground_truth_file /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl \
+--openai_api_key <your openai api key>
+```
+
+**Some Important Arguments**:
+- `answer_file`: The path of JSON data including question and answer, where each line is a dict like this:```{"question": str, "answer": str}```. See [answer.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/answer.jsonl) for a data file.
+
+- `ground_truth_file`: The path of JSON data including question, context, and ground_truth, where each line is a dict like this:```{"question": str, "context": List[str], "ground_truth": str}```. See [ground_truth.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl) for a data file. The `"question"` of `answer_file` and `ground_truth_file` should correspond one-to-one.
+
+- `openai_api_key`: This guide utilizes OpenAI for running some metrics, so ensure you have your OpenAI key ready and available in your environment.
+
+## 4. Result
+The results include your input question, answer, contexts, ground_truth, as well as output answer relevancy, faithfulness, context_recall, context_precision.
+```
+    question     answer   contexts ground_truth  answer_relevancy  faithfulness  context_recall  context_precision
+0  What t...  The or...  [We ai...  open s...     0.900788          0.500000           1.0             1.0
+1  What a...  The co...  [Our w...  The co...     0.985826          0.250000           1.0             0.0
+......
+```

From bcdf209314bd1f16d273a6f91cd464e8d5a541e1 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Thu, 14 Mar 2024 15:07:51 +0800
Subject: [PATCH 056/151] Update README.md

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tools/evaluation/framework/README.md            | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
index d84d3417a86..7c2bcab99e3 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
@@ -15,7 +15,7 @@ cd tools/evaluation/framework
 pip install -r requirements.txt
 ```
 
-## 3. Evaluate Retrieval
+## 3. Evaluate RAG
 ```
 cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/framework
 python ragas_evaluation.py \

From 102649bca207d11f9ac5b804e6ae18fd531108c2 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Thu, 14 Mar 2024 15:30:18 +0800
Subject: [PATCH 057/151] Update README.md

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tools/evaluation/retriever/README.md        | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/README.md
index b9c860db9e7..1d957bffdae 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/README.md
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/README.md
@@ -1,7 +1,11 @@
 # Retrieval Evaluation
 
 ## 1. Introduction
-We provide a script to evaluate the performance of the retrieval. We use two metrics: MRR (Mean reciprocal rank) and Hit (Hit Ratio). MRR is an internationally accepted mechanism for evaluating search algorithms. MRR emphasizes the position of ground truth in the retrieval list, the higher it is, the better. Hit emphasizes the accuracy of retrieval, that is, whether the ground truth is included in the retrieval items.
+We provide a script to evaluate the performance of the retrieval. We use two metrics: MRR (Mean reciprocal rank) and Hit (Hit Ratio). 
+* **MRR**
+MRR is an internationally accepted mechanism for evaluating search algorithms. MRR emphasizes the position of ground truth in the retrieval list, the higher it is, the better. 
+* **Hit**
+Hit emphasizes the accuracy of retrieval, that is, whether the ground truth is included in the retrieval items. The higher, the better. 
 
 ## 2. Requirements
 ```

From 36a28a480d85032263032f88ca291da6bc4a6c2d Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Thu, 14 Mar 2024 15:30:43 +0800
Subject: [PATCH 058/151] Update README.md

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tools/evaluation/framework/README.md | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
index 7c2bcab99e3..19be577cd10 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
@@ -1,7 +1,15 @@
 # Ragas Evaluation
 
 ## 1. Introduction
-Ragas is a framework that helps you evaluate your Retrieval Augmented Generation (RAG) pipelines. We provide a script to use Ragas based on data files.
+Ragas is a framework that helps you evaluate your Retrieval Augmented Generation (RAG) pipelines. We provide a script to use Ragas based on data files. We use four metrics: answer relevancy, faithfulness, context recall, context precision
+* **Answer relevancy**
+Answer Relevancy focuses on assessing how pertinent the generated answer is to the given prompt. A lower score is assigned to answers that are incomplete or contain redundant information and higher scores indicate better relevancy.
+* **Faithfulness**
+Faithfulness measures the factual consistency of the generated answer against the given context. It is calculated from answer and retrieved context. The answer is scaled to (0,1) range. Higher the better.
+* **Context recall**
+Context recall measures the extent to which the retrieved context aligns with the annotated answer, treated as the ground truth. It is computed based on the ground truth and the retrieved context, and the values range between 0 and 1, with higher values indicating better performance.
+* **Context precision**
+Context Precision is a metric that evaluates whether all of the ground-truth relevant items present in the contexts are ranked higher or not. Ideally all the relevant chunks must appear at the top ranks. This metric is computed using the question, ground_truth and the contexts, with values ranging between 0 and 1, where higher scores indicate better precision.
 
 ## 2. Requirements
 ```
@@ -32,7 +40,7 @@ python ragas_evaluation.py \
 - `openai_api_key`: This guide utilizes OpenAI for running some metrics, so ensure you have your OpenAI key ready and available in your environment.
 
 ## 4. Result
-The results include your input question, answer, contexts, ground_truth, as well as output answer relevancy, faithfulness, context_recall, context_precision.
+The results include your input question, answer, contexts, ground_truth, as well as output answer relevancy, faithfulness, context recall, context precision.
 ```
     question     answer   contexts ground_truth  answer_relevancy  faithfulness  context_recall  context_precision
 0  What t...  The or...  [We ai...  open s...     0.900788          0.500000           1.0             1.0

From 548fdd9b768c4bb5354ed3602bc1c45342dd7b58 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Fri, 15 Mar 2024 09:39:30 +0800
Subject: [PATCH 059/151] Add files via upload

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tests/ci/tools/test_evaluation.py         | 79 +++++++++++++++++++
 1 file changed, 79 insertions(+)
 create mode 100644 intel_extension_for_transformers/neural_chat/tests/ci/tools/test_evaluation.py

diff --git a/intel_extension_for_transformers/neural_chat/tests/ci/tools/test_evaluation.py b/intel_extension_for_transformers/neural_chat/tests/ci/tools/test_evaluation.py
new file mode 100644
index 00000000000..e668ac59a56
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tests/ci/tools/test_evaluation.py
@@ -0,0 +1,79 @@
+# !/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest, os, shutil
+from unittest.mock import patch
+from intel_extension_for_transformers.neural_chat.tools.evaluation.data_augmentation import retrieval_dataset_construction, llm_generate_truth
+from intel_extension_for_transformers.neural_chat.tools.evaluation.retriever import evaluate_retrieval
+
+
+class TestEvaluation(unittest.TestCase):
+    def setUp(self) -> None:
+        if os.path.exists("data.jsonl"):
+            os.remove("data.jsonl")
+        if os.path.exists("data_minedHN.jsonl"):
+            os.remove("data_minedHN.jsonl")
+        if os.path.exists("data_minedHN_split.jsonl"):
+            os.remove("data_minedHN_split.jsonl")
+        if os.path.exists("ground_truth.jsonl"):
+            os.remove("ground_truth.jsonl")
+        if os.path.exists("output"):
+            shutil.rmtree("output", ignore_errors=True)
+        return super().setUp()
+
+    def tearDown(self) -> None:
+        if os.path.exists("data.jsonl"):
+            os.remove("data.jsonl")
+        if os.path.exists("data_minedHN.jsonl"):
+            os.remove("data_minedHN.jsonl")
+        if os.path.exists("data_minedHN_split.jsonl"):
+            os.remove("data_minedHN_split.jsonl")
+        if os.path.exists("ground_truth.jsonl"):
+            os.remove("ground_truth.jsonl")
+        if os.path.exists("output"):
+            shutil.rmtree("output", ignore_errors=True)
+        return super().tearDown()
+
+    def test_retrieval_dataset_construction(self):
+        argv = ['--llm_model', '/tf_dataset2/models/nlp_toolkit/neural-chat-7b-v3-1', \
+                '--embedding_model', '/tf_dataset2/inc-ut/gte-base', \
+                '--input', '/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/assets/docs/retrieve_multi_doc/', \
+                '--output', 'data', \
+                '--range_for_sampling', '2-2', \
+                '--negative_number', '1']
+        with patch('sys.argv', ['python retrieval_dataset_construction.py'] + argv):
+            retrieval_dataset_construction.main()
+            self.assertTrue(os.path.exists("data_minedHN_split.jsonl"))
+
+    def test_llm_generate_truth(self):
+        argv = ['--llm_model', '/tf_dataset2/models/nlp_toolkit/neural-chat-7b-v3-1', \
+                '--input', '/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl', \
+                '--output', 'ground_truth.jsonl']
+        with patch('sys.argv', ['python llm_generate_truth.py'] + argv):
+            llm_generate_truth.main()
+            self.assertTrue(os.path.exists("ground_truth.jsonl"))
+
+    def test_evaluate_retrieval(self):
+        argv = ['--index_file_jsonl_path', '/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/candidate_context.jsonl', \
+                '--query_file_jsonl_path', '/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl', \
+                '--embedding_model', '/tf_dataset2/inc-ut/gte-base']
+        with patch('sys.argv', ['python evaluate_retrieval.py'] + argv):
+            result = evaluate_retrieval.main()
+            self.assertIsNotNone(result)
+
+if __name__ == '__main__':
+    unittest.main()

From 36448ea25b81562e8907512eae8e51b857f3d1e2 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Fri, 15 Mar 2024 10:50:33 +0800
Subject: [PATCH 060/151] Delete
 intel_extension_for_transformers/neural_chat/tests/ci/tools/test_evaluation.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tests/ci/tools/test_evaluation.py         | 79 -------------------
 1 file changed, 79 deletions(-)
 delete mode 100644 intel_extension_for_transformers/neural_chat/tests/ci/tools/test_evaluation.py

diff --git a/intel_extension_for_transformers/neural_chat/tests/ci/tools/test_evaluation.py b/intel_extension_for_transformers/neural_chat/tests/ci/tools/test_evaluation.py
deleted file mode 100644
index e668ac59a56..00000000000
--- a/intel_extension_for_transformers/neural_chat/tests/ci/tools/test_evaluation.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# !/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2023 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import unittest, os, shutil
-from unittest.mock import patch
-from intel_extension_for_transformers.neural_chat.tools.evaluation.data_augmentation import retrieval_dataset_construction, llm_generate_truth
-from intel_extension_for_transformers.neural_chat.tools.evaluation.retriever import evaluate_retrieval
-
-
-class TestEvaluation(unittest.TestCase):
-    def setUp(self) -> None:
-        if os.path.exists("data.jsonl"):
-            os.remove("data.jsonl")
-        if os.path.exists("data_minedHN.jsonl"):
-            os.remove("data_minedHN.jsonl")
-        if os.path.exists("data_minedHN_split.jsonl"):
-            os.remove("data_minedHN_split.jsonl")
-        if os.path.exists("ground_truth.jsonl"):
-            os.remove("ground_truth.jsonl")
-        if os.path.exists("output"):
-            shutil.rmtree("output", ignore_errors=True)
-        return super().setUp()
-
-    def tearDown(self) -> None:
-        if os.path.exists("data.jsonl"):
-            os.remove("data.jsonl")
-        if os.path.exists("data_minedHN.jsonl"):
-            os.remove("data_minedHN.jsonl")
-        if os.path.exists("data_minedHN_split.jsonl"):
-            os.remove("data_minedHN_split.jsonl")
-        if os.path.exists("ground_truth.jsonl"):
-            os.remove("ground_truth.jsonl")
-        if os.path.exists("output"):
-            shutil.rmtree("output", ignore_errors=True)
-        return super().tearDown()
-
-    def test_retrieval_dataset_construction(self):
-        argv = ['--llm_model', '/tf_dataset2/models/nlp_toolkit/neural-chat-7b-v3-1', \
-                '--embedding_model', '/tf_dataset2/inc-ut/gte-base', \
-                '--input', '/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/assets/docs/retrieve_multi_doc/', \
-                '--output', 'data', \
-                '--range_for_sampling', '2-2', \
-                '--negative_number', '1']
-        with patch('sys.argv', ['python retrieval_dataset_construction.py'] + argv):
-            retrieval_dataset_construction.main()
-            self.assertTrue(os.path.exists("data_minedHN_split.jsonl"))
-
-    def test_llm_generate_truth(self):
-        argv = ['--llm_model', '/tf_dataset2/models/nlp_toolkit/neural-chat-7b-v3-1', \
-                '--input', '/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl', \
-                '--output', 'ground_truth.jsonl']
-        with patch('sys.argv', ['python llm_generate_truth.py'] + argv):
-            llm_generate_truth.main()
-            self.assertTrue(os.path.exists("ground_truth.jsonl"))
-
-    def test_evaluate_retrieval(self):
-        argv = ['--index_file_jsonl_path', '/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/candidate_context.jsonl', \
-                '--query_file_jsonl_path', '/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl', \
-                '--embedding_model', '/tf_dataset2/inc-ut/gte-base']
-        with patch('sys.argv', ['python evaluate_retrieval.py'] + argv):
-            result = evaluate_retrieval.main()
-            self.assertIsNotNone(result)
-
-if __name__ == '__main__':
-    unittest.main()

From 26e3e9dbfdf7f9e05bd9cbf237f43f4d3f110e54 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Fri, 15 Mar 2024 10:51:12 +0800
Subject: [PATCH 061/151] Update requirements.txt

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tools/evaluation/framework/requirements.txt     | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/requirements.txt b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/requirements.txt
index 9509ec12878..3d38d0e00bb 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/requirements.txt
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/requirements.txt
@@ -1 +1,3 @@
+datasets
+jsonlines
 ragas

From e4793d374a24b35c24958574d49a27ea7405b212 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Fri, 15 Mar 2024 10:51:40 +0800
Subject: [PATCH 062/151] Update README.md

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tools/evaluation/framework/README.md       | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
index 19be577cd10..88d848ed67b 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
@@ -14,12 +14,7 @@ Context Precision is a metric that evaluates whether all of the ground-truth rel
 ## 2. Requirements
 ```
 git clone https://github.com/intel/intel-extension-for-transformers.git
-cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat
-pip install -r requirements.txt
-cd pipeline/plugins/retrieval
-pip install -r requirements.txt
-cd ../../../
-cd tools/evaluation/framework
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/framework
 pip install -r requirements.txt
 ```
 

From 0569b54a62f8d1a46ecb0099b49476e68d785077 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Fri, 15 Mar 2024 14:58:45 +0800
Subject: [PATCH 063/151] Update hn_mine.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tools/evaluation/data_augmentation/hn_mine.py   | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/hn_mine.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/hn_mine.py
index 976713a846c..73f49914396 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/hn_mine.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/hn_mine.py
@@ -29,6 +29,8 @@ def create_index(embeddings, use_gpu):
         co.shard = True
         co.useFloat16 = True
         index = faiss.index_cpu_to_all_gpus(index, co=co)
+    else:
+        pass
     index.add(embeddings)
     return index
 

From 2d15ec063ff16b67ec2abfe4093d9a1ce351e118 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Fri, 15 Mar 2024 15:04:31 +0800
Subject: [PATCH 064/151] Update README.md

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tools/evaluation/framework/README.md          | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
index 88d848ed67b..caa0011d268 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
@@ -1,11 +1,11 @@
 # Ragas Evaluation
 
 ## 1. Introduction
-Ragas is a framework that helps you evaluate your Retrieval Augmented Generation (RAG) pipelines. We provide a script to use Ragas based on data files. We use four metrics: answer relevancy, faithfulness, context recall, context precision
+[Ragas](https://github.com/explodinggradients/ragas) is a framework that helps you evaluate your Retrieval Augmented Generation (RAG) pipelines. We provide a script to use Ragas based on data files. We use four metrics: answer relevancy, faithfulness, context recall, context precision
 * **Answer relevancy**
 Answer Relevancy focuses on assessing how pertinent the generated answer is to the given prompt. A lower score is assigned to answers that are incomplete or contain redundant information and higher scores indicate better relevancy.
 * **Faithfulness**
-Faithfulness measures the factual consistency of the generated answer against the given context. It is calculated from answer and retrieved context. The answer is scaled to (0,1) range. Higher the better.
+Faithfulness measures the factual consistency of the generated answer against the given context. It is calculated from answer and retrieved context. The answer is scaled to (0,1) range. Higher is the better.
 * **Context recall**
 Context recall measures the extent to which the retrieved context aligns with the annotated answer, treated as the ground truth. It is computed based on the ground truth and the retrieved context, and the values range between 0 and 1, with higher values indicating better performance.
 * **Context precision**

From e8127e9a24aabf959084bfa6c7a10eefa56e103c Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 18 Mar 2024 10:25:34 +0800
Subject: [PATCH 065/151] Update ragas_evaluation.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../evaluation/framework/ragas_evaluation.py  | 49 ++++++++++++++++---
 1 file changed, 43 insertions(+), 6 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py
index 7e7bdc48f97..9770511c33b 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py
@@ -21,6 +21,12 @@
     context_recall,
     context_precision,
 )
+from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
+from intel_extension_for_transformers.langchain.embeddings import HuggingFaceEmbeddings, \
+    HuggingFaceInstructEmbeddings, HuggingFaceBgeEmbeddings
+from langchain.embeddings import GooglePalmEmbeddings
+from ragas.llms import LangchainLLMWrapper
+from ragas.embeddings import LangchainEmbeddingsWrapper
 import pandas as pd
 import jsonlines
 import argparse
@@ -39,8 +45,8 @@ def load_set(file_jsonl_path, item):
             list.append(passages)
     return list
 
-def ragas(answer_file, ground_truth_file, openai_api_key):
-    os.environ["OPENAI_API_KEY"] = openai_api_key
+def ragas(answer_file, ground_truth_file, openai_api_key, llm_model, embedding_model):
+    
     question_list=load_set(answer_file, "question")
     answer_list=load_set(answer_file, "answer")
     contexts_list=load_set(ground_truth_file, "context")
@@ -55,7 +61,34 @@ def ragas(answer_file, ground_truth_file, openai_api_key):
 
     dataset = Dataset.from_dict(data_samples)
 
-    score = evaluate(dataset,metrics=[answer_relevancy, faithfulness, context_recall, context_precision])
+    if llm_model and embedding_model:
+        langchain_llm = HuggingFacePipeline.from_model_id(
+            model_id=llm_model,
+            task="text-generation",
+            pipeline_kwargs={"max_new_tokens": 128},
+        )
+        if "instruct" in embedding_model:
+            langchain_embeddings = HuggingFaceInstructEmbeddings(model_name=embedding_model)
+        elif "bge" in embedding_model:
+            langchain_embeddings = HuggingFaceBgeEmbeddings(
+                model_name=embedding_model,
+                encode_kwargs={'normalize_embeddings': True},
+                query_instruction="Represent this sentence for searching relevant passages:")
+        elif "Google" == embedding_model:
+            langchain_embeddings = GooglePalmEmbeddings()
+        else:
+            langchain_embeddings = HuggingFaceEmbeddings(
+                model_name=embedding_model,
+                encode_kwargs={"normalize_embeddings": True},
+            )
+
+        langchain_llm = LangchainLLMWrapper(langchain_llm)
+        langchain_embedding = LangchainEmbeddingsWrapper(langchain_embeddings)
+        score = evaluate(dataset,metrics=[answer_relevancy, faithfulness, context_recall, context_precision],llm = langchain_llm, embeddings=langchain_embedding)
+    else:
+        os.environ["OPENAI_API_KEY"] = openai_api_key
+        score = evaluate(dataset,metrics=[answer_relevancy, faithfulness, context_recall, context_precision])
+
     df=score.to_pandas()
     print(df)
 
@@ -63,14 +96,18 @@ def main():
     parser = argparse.ArgumentParser()
     parser.add_argument("--answer_file", type=str)
     parser.add_argument("--ground_truth_file", type=str)
-    parser.add_argument("--openai_api_key", type=str)
+    parser.add_argument("--openai_api_key", type=str)    
+    parser.add_argument("--llm_model", type=str)
+    parser.add_argument("--embedding_model", type=str)
     args = parser.parse_args()
 
     answer_file = args.answer_file
     ground_truth_file = args.ground_truth_file
-    openai_api_key = args.openai_api_key
+    openai_api_key = args.openai_api_key        
+    llm_model = args.llm_model
+    embedding_model = args.embedding_model
 
-    ragas(answer_file, ground_truth_file, openai_api_key)
+    ragas(answer_file, ground_truth_file, openai_api_key, llm_model, embedding_model)
 
 if __name__ == '__main__':
     main()

From 321e9b6e8d1acae8cf33fd6b1c442f5b40668f5f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 18 Mar 2024 02:25:53 +0000
Subject: [PATCH 066/151] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../tools/evaluation/framework/ragas_evaluation.py          | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py
index 9770511c33b..74a82a0371f 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py
@@ -46,7 +46,7 @@ def load_set(file_jsonl_path, item):
     return list
 
 def ragas(answer_file, ground_truth_file, openai_api_key, llm_model, embedding_model):
-    
+
     question_list=load_set(answer_file, "question")
     answer_list=load_set(answer_file, "answer")
     contexts_list=load_set(ground_truth_file, "context")
@@ -96,14 +96,14 @@ def main():
     parser = argparse.ArgumentParser()
     parser.add_argument("--answer_file", type=str)
     parser.add_argument("--ground_truth_file", type=str)
-    parser.add_argument("--openai_api_key", type=str)    
+    parser.add_argument("--openai_api_key", type=str)
     parser.add_argument("--llm_model", type=str)
     parser.add_argument("--embedding_model", type=str)
     args = parser.parse_args()
 
     answer_file = args.answer_file
     ground_truth_file = args.ground_truth_file
-    openai_api_key = args.openai_api_key        
+    openai_api_key = args.openai_api_key
     llm_model = args.llm_model
     embedding_model = args.embedding_model
 

From f9b4dabb86db56d146e45155640c0caff9f3fc98 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 18 Mar 2024 10:44:26 +0800
Subject: [PATCH 067/151] Update requirements.txt

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tools/evaluation/framework/requirements.txt   | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/requirements.txt b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/requirements.txt
index 3d38d0e00bb..6a2850d42eb 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/requirements.txt
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/requirements.txt
@@ -1,3 +1,5 @@
-datasets
+InstructorEmbedding
+intel-extension-for-transformers
 jsonlines
 ragas
+sentence-transformers==2.3.1

From 76dc219bd189aac2f3e6b68156f0438756525e90 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 18 Mar 2024 11:03:42 +0800
Subject: [PATCH 068/151] Update README.md

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tools/evaluation/framework/README.md      | 21 +++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
index caa0011d268..9b9eda03647 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
@@ -11,7 +11,8 @@ Context recall measures the extent to which the retrieved context aligns with th
 * **Context precision**
 Context Precision is a metric that evaluates whether all of the ground-truth relevant items present in the contexts are ranked higher or not. Ideally all the relevant chunks must appear at the top ranks. This metric is computed using the question, ground_truth and the contexts, with values ranging between 0 and 1, where higher scores indicate better precision.
 
-## 2. Requirements
+## 2. Installation
+please install dependency using the following commands.
 ```
 git clone https://github.com/intel/intel-extension-for-transformers.git
 cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/framework
@@ -19,6 +20,8 @@ pip install -r requirements.txt
 ```
 
 ## 3. Evaluate RAG
+* **OpenAI**
+By default, these metrics use OpenAI’s API to compute the score. If you’re using this metric, ensure that you’ve set the environment key OPENAI_API_KEY with your API key.
 ```
 cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/framework
 python ragas_evaluation.py \
@@ -26,13 +29,23 @@ python ragas_evaluation.py \
 --ground_truth_file /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl \
 --openai_api_key <your openai api key>
 ```
+* **Langchain**
+You can also try other LLMs for evaluation using Langchain.
+```
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/framework
+python ragas_evaluation.py \
+--answer_file /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/answer.jsonl \
+--ground_truth_file /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl \
+--llm_model <llm model path> \
+--embedding_model <embedding model path>
+```
 
 **Some Important Arguments**:
 - `answer_file`: The path of JSON data including question and answer, where each line is a dict like this:```{"question": str, "answer": str}```. See [answer.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/answer.jsonl) for a data file.
-
 - `ground_truth_file`: The path of JSON data including question, context, and ground_truth, where each line is a dict like this:```{"question": str, "context": List[str], "ground_truth": str}```. See [ground_truth.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl) for a data file. The `"question"` of `answer_file` and `ground_truth_file` should correspond one-to-one.
-
-- `openai_api_key`: This guide utilizes OpenAI for running some metrics, so ensure you have your OpenAI key ready and available in your environment.
+- `openai_api_key`: If you utilize OpenAI for running ragas, ensure you have your OpenAI key ready and available in your environment.
+- `llm_model`: If you utilize Langchain for running ragas, you should input the path for the LLM model.
+- `embedding_model`: If you utilize Langchain for running ragas, you should input the path for the text embedding model.
 
 ## 4. Result
 The results include your input question, answer, contexts, ground_truth, as well as output answer relevancy, faithfulness, context recall, context precision.

From b9db553724c2d03beb0ce6be09a221e8458a3ccc Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 18 Mar 2024 11:04:10 +0800
Subject: [PATCH 069/151] Update README.md

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tools/evaluation/framework/README.md            | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
index 9b9eda03647..bae8fe55a94 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
@@ -12,7 +12,7 @@ Context recall measures the extent to which the retrieved context aligns with th
 Context Precision is a metric that evaluates whether all of the ground-truth relevant items present in the contexts are ranked higher or not. Ideally all the relevant chunks must appear at the top ranks. This metric is computed using the question, ground_truth and the contexts, with values ranging between 0 and 1, where higher scores indicate better precision.
 
 ## 2. Installation
-please install dependency using the following commands.
+Please install dependency using the following commands.
 ```
 git clone https://github.com/intel/intel-extension-for-transformers.git
 cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/framework

From d7b68cb4ad670dea238ba2127f9c6c0b479f3bdf Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 18 Mar 2024 11:07:08 +0800
Subject: [PATCH 070/151] Update README.md

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tools/evaluation/framework/README.md          | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
index bae8fe55a94..78a8ac1b3cd 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
@@ -1,7 +1,7 @@
 # Ragas Evaluation
 
 ## 1. Introduction
-[Ragas](https://github.com/explodinggradients/ragas) is a framework that helps you evaluate your Retrieval Augmented Generation (RAG) pipelines. We provide a script to use Ragas based on data files. We use four metrics: answer relevancy, faithfulness, context recall, context precision
+[Ragas](https://github.com/explodinggradients/ragas) is a framework that helps you evaluate your Retrieval Augmented Generation (RAG) pipelines. We provide a script to use Ragas based on data files. We use four metrics: answer relevancy, faithfulness, context recall, context precision.
 * **Answer relevancy**
 Answer Relevancy focuses on assessing how pertinent the generated answer is to the given prompt. A lower score is assigned to answers that are incomplete or contain redundant information and higher scores indicate better relevancy.
 * **Faithfulness**
@@ -21,7 +21,7 @@ pip install -r requirements.txt
 
 ## 3. Evaluate RAG
 * **OpenAI**
-By default, these metrics use OpenAI’s API to compute the score. If you’re using this metric, ensure that you’ve set the environment key OPENAI_API_KEY with your API key.
+By default, ragas use OpenAI’s API to compute the score. If you’re using this metric, ensure that you set the environment key OPENAI_API_KEY with your API key.
 ```
 cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/framework
 python ragas_evaluation.py \

From 48de606ec872d848059367df3d61cab5feb213e2 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 18 Mar 2024 11:28:04 +0800
Subject: [PATCH 071/151] Update requirements.txt

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tests/requirements.txt                           | 1 +
 1 file changed, 1 insertion(+)

diff --git a/intel_extension_for_transformers/neural_chat/tests/requirements.txt b/intel_extension_for_transformers/neural_chat/tests/requirements.txt
index b0a685213be..5184c18a586 100644
--- a/intel_extension_for_transformers/neural_chat/tests/requirements.txt
+++ b/intel_extension_for_transformers/neural_chat/tests/requirements.txt
@@ -58,6 +58,7 @@ python-docx
 python-multipart
 pyyaml
 qdrant-client
+ragas
 rank_bm25
 resampy==0.3.1
 rouge_score

From 415ebc86ad558651aadbacd51b9152e09dac8ecd Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 18 Mar 2024 11:37:17 +0800
Subject: [PATCH 072/151] Update ragas_evaluation.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tools/evaluation/framework/ragas_evaluation.py            | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py
index 74a82a0371f..1ef4ec1f0dd 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py
@@ -91,6 +91,7 @@ def ragas(answer_file, ground_truth_file, openai_api_key, llm_model, embedding_m
 
     df=score.to_pandas()
     print(df)
+    return df
 
 def main():
     parser = argparse.ArgumentParser()
@@ -107,7 +108,8 @@ def main():
     llm_model = args.llm_model
     embedding_model = args.embedding_model
 
-    ragas(answer_file, ground_truth_file, openai_api_key, llm_model, embedding_model)
+    metrics=ragas(answer_file, ground_truth_file, openai_api_key, llm_model, embedding_model)
+    return metrics
 
 if __name__ == '__main__':
     main()

From f03badd768c16da60372ca50c7aec21fa5dec9ca Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 18 Mar 2024 11:46:57 +0800
Subject: [PATCH 073/151] Update test_evaluation.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tests/nightly/tools/test_evaluation.py            | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py b/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py
index e668ac59a56..394e147079c 100644
--- a/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py
+++ b/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py
@@ -19,7 +19,7 @@
 from unittest.mock import patch
 from intel_extension_for_transformers.neural_chat.tools.evaluation.data_augmentation import retrieval_dataset_construction, llm_generate_truth
 from intel_extension_for_transformers.neural_chat.tools.evaluation.retriever import evaluate_retrieval
-
+from intel_extension_for_transformers.neural_chat.tools.evaluation.framework import ragas_evaluation
 
 class TestEvaluation(unittest.TestCase):
     def setUp(self) -> None:
@@ -75,5 +75,14 @@ def test_evaluate_retrieval(self):
             result = evaluate_retrieval.main()
             self.assertIsNotNone(result)
 
+    def test_ragas_evaluation(self):
+        argv = ['--answer_file', '/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/answer.jsonl', \
+                '--ground_truth_file', '/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl', \
+                '--llm_model', '/tf_dataset2/models/nlp_toolkit/neural-chat-7b-v3-1', \
+                '--embedding_model', '/tf_dataset2/inc-ut/gte-base']
+        with patch('sys.argv', ['python ragas_evaluation.py'] + argv):
+            result = ragas_evaluation.main()
+            self.assertIsNotNone(result)
+
 if __name__ == '__main__':
     unittest.main()

From 2b92e747b473378a1380347b5c1de474389c15a6 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 18 Mar 2024 12:05:36 +0800
Subject: [PATCH 074/151] Update README.md

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tools/evaluation/framework/README.md            | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
index 78a8ac1b3cd..dddcfa9f171 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
@@ -45,7 +45,7 @@ python ragas_evaluation.py \
 - `ground_truth_file`: The path of JSON data including question, context, and ground_truth, where each line is a dict like this:```{"question": str, "context": List[str], "ground_truth": str}```. See [ground_truth.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl) for a data file. The `"question"` of `answer_file` and `ground_truth_file` should correspond one-to-one.
 - `openai_api_key`: If you utilize OpenAI for running ragas, ensure you have your OpenAI key ready and available in your environment.
 - `llm_model`: If you utilize Langchain for running ragas, you should input the path for the LLM model.
-- `embedding_model`: If you utilize Langchain for running ragas, you should input the path for the text embedding model.
+- `embedding_model`: If you utilize Langchain for running ragas, you should input the path for the text embedding model. You can use "BAAI/bge-base-en-v1.5", "BAAI/bge-large-en-v1.5", "thenlper/gte-large", "infgrad/stella-base-en-v2", "thenlper/gte-base", "intfloat/e5-large-v2", "hkunlp/instructor-xl", and "hkunlp/instructor-large".
 
 ## 4. Result
 The results include your input question, answer, contexts, ground_truth, as well as output answer relevancy, faithfulness, context recall, context precision.

From 9091729e28aef2a6ee07fe37b174435d26c8f250 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 18 Mar 2024 16:28:56 +0800
Subject: [PATCH 075/151] Update retrieval_dataset_construction.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../retrieval_dataset_construction.py         | 26 ++++++++++++++-----
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py
index 66bae2c7736..e4519fed174 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py
@@ -18,6 +18,7 @@
 from .llm_generate_raw_data import raw_data_generate
 from .mine_hard_negatives_check_similarity import mine_hard_negatives, similarity_check
 import argparse
+import os
 
 def construct_retrieval_dataset(
       llm_model,
@@ -38,9 +39,10 @@ def construct_retrieval_dataset(
       use_gpu_for_searching,
       similarity_threshold):
 
+   output_path=output+'/raw.jsonl'
    raw_data_generate(llm_model,
                      input,
-                     output,
+                     output_path,
                      temperature,
                      top_p,
                      top_k,
@@ -51,16 +53,15 @@ def construct_retrieval_dataset(
                      num_return_sequences,
                      use_cache)
 
-   output_hn_path=output+'_minedHN.jsonl'
-
+   output_hn_path=output+'/minedHN.jsonl'
    mine_hard_negatives(embedding_model,
-                       output,
+                       output_path,
                        output_hn_path,
                        range_for_sampling,
                        negative_number,
                        use_gpu_for_searching)
 
-   output_json_split_path = output+"_minedHN_split.jsonl"
+   output_json_split_path = output+"/minedHN_split.jsonl"
    similarity_check(output_hn_path,
                     output_json_split_path,
                     embedding_model,
@@ -72,7 +73,7 @@ def main():
    parser.add_argument("--llm_model", type=str)
    parser.add_argument("--embedding_model", type=str)
    parser.add_argument("--input", type=str)
-   parser.add_argument("--output", type=str, default='data')
+   parser.add_argument("--output", type=str, default='./data')
 
    parser.add_argument("--temperature", type=float, default=0.8)
    parser.add_argument("--top_p", type=float, default=0.9)
@@ -113,6 +114,19 @@ def main():
 
    similarity_threshold=args.similarity_threshold
 
+   try:
+      if os.path.exists(output) == False:
+         os.mkdir(output)
+      else:
+         if os.path.exists(output+'/raw.jsonl'):
+            os.remove(output+'/raw.jsonl')
+         if os.path.exists(output+'/minedHN.jsonl'):
+            os.remove(output+'/minedHN.jsonl')
+         if os.path.exists(output+'/minedHN_split.jsonl'):
+            os.remove(output+'/minedHN_split.jsonl')
+   except:
+      pass
+   
    construct_retrieval_dataset(
       llm_model,
       embedding_model,

From be32736f12d96245f7cd35c86a070bdb7273fba1 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 18 Mar 2024 08:29:17 +0000
Subject: [PATCH 076/151] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../data_augmentation/retrieval_dataset_construction.py         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py
index e4519fed174..3c9a5424c61 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py
@@ -126,7 +126,7 @@ def main():
             os.remove(output+'/minedHN_split.jsonl')
    except:
       pass
-   
+
    construct_retrieval_dataset(
       llm_model,
       embedding_model,

From 2c4f4526f65c63104aeb96e462aab62298871d3b Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 18 Mar 2024 16:29:47 +0800
Subject: [PATCH 077/151] Update hn_mine.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>

From c48f66a3bad8fd5c26816da2849935ca2f59eada Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 18 Mar 2024 16:30:20 +0800
Subject: [PATCH 078/151] Update llm_generate_raw_data.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tools/evaluation/data_augmentation/llm_generate_raw_data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py
index 6970a4b5799..7f33eecbabd 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py
@@ -111,5 +111,5 @@ def raw_data_generate(model_id,
                      "query": result_str,
                      "pos": [context],
                }
-            with jsonlines.open(file_json_path+'.jsonl',"a") as file_json:
+            with jsonlines.open(file_json_path,"a") as file_json:
                   file_json.write(data)

From 654c44a72da648bcc25aa4e2e6fe5f85703c02c4 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 18 Mar 2024 16:30:47 +0800
Subject: [PATCH 079/151] Update mine_hard_negatives_check_similarity.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../data_augmentation/mine_hard_negatives_check_similarity.py   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/mine_hard_negatives_check_similarity.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/mine_hard_negatives_check_similarity.py
index 87203e922d5..78848a1fb19 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/mine_hard_negatives_check_similarity.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/mine_hard_negatives_check_similarity.py
@@ -33,7 +33,7 @@ def mine_hard_negatives(model_name_or_path,
    model = SentenceTransformer(model_name_or_path)
 
    find_knn_neg(model,
-               input_file=input_file+'.jsonl',
+               input_file=input_file,
                candidate_pool=candidate_pool,
                output_file=output_file,
                sample_range=sample_range,

From 5208c98b1cd716c3a704e40f5ea622107e87af28 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 18 Mar 2024 16:31:45 +0800
Subject: [PATCH 080/151] Update hn_mine.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>

From ace109081a9a9211205a00ff072ba778ef3e9fae Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 18 Mar 2024 16:34:00 +0800
Subject: [PATCH 081/151] Update test_evaluation.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tests/nightly/tools/test_evaluation.py    | 20 ++++++-------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py b/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py
index 394e147079c..5d3d7d578ea 100644
--- a/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py
+++ b/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py
@@ -23,12 +23,8 @@
 
 class TestEvaluation(unittest.TestCase):
     def setUp(self) -> None:
-        if os.path.exists("data.jsonl"):
-            os.remove("data.jsonl")
-        if os.path.exists("data_minedHN.jsonl"):
-            os.remove("data_minedHN.jsonl")
-        if os.path.exists("data_minedHN_split.jsonl"):
-            os.remove("data_minedHN_split.jsonl")
+        if os.path.exists("data"):
+            shutil.rmtree("data", ignore_errors=True)
         if os.path.exists("ground_truth.jsonl"):
             os.remove("ground_truth.jsonl")
         if os.path.exists("output"):
@@ -36,12 +32,8 @@ def setUp(self) -> None:
         return super().setUp()
 
     def tearDown(self) -> None:
-        if os.path.exists("data.jsonl"):
-            os.remove("data.jsonl")
-        if os.path.exists("data_minedHN.jsonl"):
-            os.remove("data_minedHN.jsonl")
-        if os.path.exists("data_minedHN_split.jsonl"):
-            os.remove("data_minedHN_split.jsonl")
+        if os.path.exists("data"):
+            shutil.rmtree("data", ignore_errors=True)
         if os.path.exists("ground_truth.jsonl"):
             os.remove("ground_truth.jsonl")
         if os.path.exists("output"):
@@ -52,12 +44,12 @@ def test_retrieval_dataset_construction(self):
         argv = ['--llm_model', '/tf_dataset2/models/nlp_toolkit/neural-chat-7b-v3-1', \
                 '--embedding_model', '/tf_dataset2/inc-ut/gte-base', \
                 '--input', '/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/assets/docs/retrieve_multi_doc/', \
-                '--output', 'data', \
+                '--output', './data', \
                 '--range_for_sampling', '2-2', \
                 '--negative_number', '1']
         with patch('sys.argv', ['python retrieval_dataset_construction.py'] + argv):
             retrieval_dataset_construction.main()
-            self.assertTrue(os.path.exists("data_minedHN_split.jsonl"))
+            self.assertTrue(os.path.exists("./data/minedHN_split.jsonl"))
 
     def test_llm_generate_truth(self):
         argv = ['--llm_model', '/tf_dataset2/models/nlp_toolkit/neural-chat-7b-v3-1', \

From 83f10e938e6ccc067d07d1cddf5b7951c2588544 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 18 Mar 2024 17:15:54 +0800
Subject: [PATCH 082/151] Update ragas_evaluation.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tools/evaluation/framework/ragas_evaluation.py    | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py
index 1ef4ec1f0dd..0120e8dd387 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py
@@ -24,9 +24,9 @@
 from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
 from intel_extension_for_transformers.langchain.embeddings import HuggingFaceEmbeddings, \
     HuggingFaceInstructEmbeddings, HuggingFaceBgeEmbeddings
-from langchain.embeddings import GooglePalmEmbeddings
-from ragas.llms import LangchainLLMWrapper
-from ragas.embeddings import LangchainEmbeddingsWrapper
+from langchain.embeddings import GooglePalmEmbeddings   # pylint: disable=E0611
+from ragas.llms import LangchainLLMWrapper   # pylint: disable=E0611
+from ragas.embeddings import LangchainEmbeddingsWrapper   # pylint: disable=E0611
 import pandas as pd
 import jsonlines
 import argparse
@@ -84,7 +84,10 @@ def ragas(answer_file, ground_truth_file, openai_api_key, llm_model, embedding_m
 
         langchain_llm = LangchainLLMWrapper(langchain_llm)
         langchain_embedding = LangchainEmbeddingsWrapper(langchain_embeddings)
-        score = evaluate(dataset,metrics=[answer_relevancy, faithfulness, context_recall, context_precision],llm = langchain_llm, embeddings=langchain_embedding)
+        score = evaluate(dataset,
+                         metrics=[answer_relevancy, faithfulness, context_recall, context_precision],
+                         llm = langchain_llm,    # pylint: disable=E1123
+                         embeddings=langchain_embedding)    # pylint: disable=E1123
     else:
         os.environ["OPENAI_API_KEY"] = openai_api_key
         score = evaluate(dataset,metrics=[answer_relevancy, faithfulness, context_recall, context_precision])

From ac0aef117d312ff6f036c10d36f5cd3b0cc573cc Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 18 Mar 2024 18:06:39 +0800
Subject: [PATCH 083/151] Update README.md

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../evaluation/data_augmentation/README.md    | 48 +++++++++++++++----
 1 file changed, 39 insertions(+), 9 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md
index ff29509669c..ac1e44f1dbe 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md
@@ -1,18 +1,47 @@
 # Retrieval Data Augmentation
 
 ## 1. Introduction
-In this example, we show how to do data augmentation to construct a retrieval dataset. 
+In this example, we show how to do data augmentation to construct a retrieval dataset. The data is described below.
+* **[example.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl)** can be used in [hard negatives mining](https://github.com/intel/intel-extension-for-transformers/tree/main/intel_extension_for_transformers/neural_chat/tools/embedding_finetune/mine_hard_neg.py),  [embedding model evaluation](https://github.com/intel/intel-extension-for-transformers/tree/main/intel_extension_for_transformers/neural_chat/tools/embedding_finetune/evaluate.py), [retriever evaluation](https://github.com/intel/intel-extension-for-transformers/tree/main/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval.py).
+Each line is a dict like this:
+```
+{"query": str, "pos": List[str]}
+```
+`query` is the query text, and `pos` is a positive text.
 
-* **Context to Question and Mine Hard Negatives**
-The effect is to generate several specific open-ended questions based on the context of the input file provided. The questions are directly related to the context to form a query-positive pair, suitable for use in constructing a retrieval dataset. Then we sample some from the entire corpus as the negatives by mining hard negatives, which is a widely used method to improve the quality of finetuning sentence embedding models.
+* **[augmented_example.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/augmented_example.jsonl)** can be used in [embedding finetuning](https://github.com/intel/intel-extension-for-transformers/tree/main/intel_extension_for_transformers/neural_chat/tools/embedding_finetune/finetune.py). 
+Each line is a dict like this:
+```
+{"query": str, "pos": List[str], "neg": List[str]}
+```
+`query` is the query text, and `pos` is a positive text, `neg` is a list of negative texts.
 
-* **Context, Question to Ground Truth**
-The effect is to generate the right answer based on the context and question provided. The answer is directly related to the context and the question, suitable for use in constructing a synthetic retrieval evaluation dataset.
+* **[candidate_context.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/candidate_context.jsonl)** can be used in [embedding model evaluation](https://github.com/intel/intel-extension-for-transformers/tree/main/intel_extension_for_transformers/neural_chat/tools/embedding_finetune/evaluate.py), [retriever evaluation](https://github.com/intel/intel-extension-for-transformers/tree/main/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval.py). 
+Each line is a dict like this:
+```
+{"context": List[str]}
+```
+`context` is the candidate context.
+
+* **[answer.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/answer.jsonl)** can be used in [Rag evaluation](https://github.com/intel/intel-extension-for-transformers/tree/main/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py).
+Each line is a dict like this:
+```
+{"question": str, "answer": str}
+```
+`question` is the question text, `answer` is the answer text.
+
+* **[ground_truth.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl)** can be used in [Rag evaluation](https://github.com/intel/intel-extension-for-transformers/tree/main/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py).
+Each line is a dict like this:
+```
+{"question": str, "context": List[str], "ground_truth": str}
+```
+`question` is the question text, `context` is the candidate context, `ground_truth` is the ground truth.
 
 ## 2. Supported Devices
 CPU, CUDA
 
-## 3. Requirements
+## 3. Installation
+Please ensure the installation of NeuralChat first by following the commands.
 ```
 git clone https://github.com/intel/intel-extension-for-transformers.git
 cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat
@@ -20,13 +49,12 @@ pip install -r requirements.txt
 cd pipeline/plugins/retrieval
 pip install -r requirements.txt
 ```
-
+After that, install additional dependency according to your device.
 * **On CPU**
 ```
 cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation
 pip install -r requirements_cpu.txt
 ```
-
 * **On CUDA**
 ```
 cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation
@@ -35,6 +63,7 @@ pip install -r requirements_cuda.txt
 
 ## 4. Retrieval Dataset Construction
 ### Context to Questions and Mine Hard Negatives
+The effect is to generate several specific open-ended questions based on the context of the input file provided. The questions are directly related to the context to form a query-positive pair, suitable for use in constructing a retrieval dataset. Then we sample some from the entire corpus as the negatives by mining hard negatives, which is a widely used method to improve the quality of finetuning sentence embedding models.
 * **On CPU**
 ```
 cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation
@@ -58,7 +87,7 @@ python -m data_augmentation.retrieval_dataset_construction \
 - `llm_model`: The path for the LLM model.
 - `embedding_model`: The path for the text embedding model.
 - `input`: The path of the file/folder/link of the content.
-- `output`: The name of output files. The default value is 'data'. The default output files are 'data.jsonl', 'data_minedHN.jsonl', 'data_minedHN_split.jsonl'.
+- `output`: The name of output files. The default value is './data'. The default output files are 'data.jsonl', 'data_minedHN.jsonl', 'data_minedHN_split.jsonl'.
 - `temperature`: The value is used to modulate the next token probabilities, and will influence the distribution of similarity scores. The default value is 0.8.
 - `top_p`: If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. The default value is 0.9.
 - `top_k`: The number of highest probability vocabulary tokens to keep for top-k-filtering. The default value is 40.
@@ -83,6 +112,7 @@ See [augmented_example.jsonl](https://github.com/intel/intel-extension-for-trans
 
 
 ### Context, Question to Ground Truth
+The effect is to generate the right answer based on the context and question provided. The answer is directly related to the context and the question, suitable for use in constructing a synthetic retrieval evaluation dataset.
 ```
 cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation
 python llm_generate_truth.py \

From 8deaabd69e90e855739bde0490a9295780f40e75 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Tue, 19 Mar 2024 10:03:12 +0800
Subject: [PATCH 084/151] Update README.md

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tools/evaluation/data_augmentation/README.md            | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md
index ac1e44f1dbe..d52fd6242ee 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md
@@ -41,7 +41,7 @@ Each line is a dict like this:
 CPU, CUDA
 
 ## 3. Installation
-Please ensure the installation of NeuralChat first by following the commands.
+Please ensure the installation of requirements for NeuralChat and retrieval plugin first by following the commands.
 ```
 git clone https://github.com/intel/intel-extension-for-transformers.git
 cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat
@@ -87,7 +87,7 @@ python -m data_augmentation.retrieval_dataset_construction \
 - `llm_model`: The path for the LLM model.
 - `embedding_model`: The path for the text embedding model.
 - `input`: The path of the file/folder/link of the content.
-- `output`: The name of output files. The default value is './data'. The default output files are 'data.jsonl', 'data_minedHN.jsonl', 'data_minedHN_split.jsonl'.
+- `output`: The path of output files. The default value is './data'. The default output files are './data/raw.jsonl', './data/minedHN.jsonl', './data/minedHN_split.jsonl'.
 - `temperature`: The value is used to modulate the next token probabilities, and will influence the distribution of similarity scores. The default value is 0.8.
 - `top_p`: If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. The default value is 0.9.
 - `top_k`: The number of highest probability vocabulary tokens to keep for top-k-filtering. The default value is 40.
@@ -103,7 +103,7 @@ python -m data_augmentation.retrieval_dataset_construction \
 - `similarity_threshold`: The cosine similarity threshold used to filter the generated queries. The default value is 0.6.
 
 **Result**:
-Three files will be generated. The default output files are `data.jsonl`, `data_minedHN.jsonl`, `data_minedHN_split.jsonl`. The third is the final output dataset, where each line is a dict like this:
+Three files will be generated. The default output files are `./data/raw.jsonl`, `./data/minedHN.jsonl`, `./data/minedHN_split.jsonl`. The third is the final output dataset, where each line is a dict like this:
 ```
 {"query": str, "pos": List[str], "neg": List[str]}
 ```

From 2eb084c2613318c93b57b48c9c6df6debbd91167 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Tue, 19 Mar 2024 10:03:46 +0800
Subject: [PATCH 085/151] Update README.md

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tools/evaluation/data_augmentation/README.md    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md
index d52fd6242ee..9d2668937c0 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md
@@ -41,7 +41,7 @@ Each line is a dict like this:
 CPU, CUDA
 
 ## 3. Installation
-Please ensure the installation of requirements for NeuralChat and retrieval plugin first by following the commands.
+Please ensure the installation of requirements for NeuralChat and retrieval plugin first by the following commands.
 ```
 git clone https://github.com/intel/intel-extension-for-transformers.git
 cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat

From 510e801bb7d5959a6c03a2ca616ccee12e70cf3c Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Tue, 19 Mar 2024 10:08:19 +0800
Subject: [PATCH 086/151] Update README.md

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tools/evaluation/retriever/README.md   | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/README.md
index 1d957bffdae..6280ffe76a3 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/README.md
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/README.md
@@ -2,12 +2,11 @@
 
 ## 1. Introduction
 We provide a script to evaluate the performance of the retrieval. We use two metrics: MRR (Mean reciprocal rank) and Hit (Hit Ratio). 
-* **MRR**
-MRR is an internationally accepted mechanism for evaluating search algorithms. MRR emphasizes the position of ground truth in the retrieval list, the higher it is, the better. 
-* **Hit**
-Hit emphasizes the accuracy of retrieval, that is, whether the ground truth is included in the retrieval items. The higher, the better. 
+* **MRR** is an internationally accepted mechanism for evaluating search algorithms. MRR emphasizes the position of ground truth in the retrieval list, the higher it is, the better. 
+* **Hit** emphasizes the accuracy of retrieval, that is, whether the ground truth is included in the retrieval items. The higher, the better. 
 
-## 2. Requirements
+## 2. Installation
+Please ensure the installation of requirements for NeuralChat and retrieval plugin by the following commands.
 ```
 git clone https://github.com/intel/intel-extension-for-transformers.git
 cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat
@@ -17,6 +16,7 @@ pip install -r requirements.txt
 ```
 
 ## 3. Evaluate Retrieval
+You can evaluate the retrieval performance by the following commands.
 ```
 cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever
 python evaluate_retrieval.py \

From dd1f37c1cb809a3f2b5964c5928004399b3048d8 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Tue, 19 Mar 2024 10:09:08 +0800
Subject: [PATCH 087/151] Update README.md

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tools/evaluation/framework/README.md | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
index dddcfa9f171..4729c8abcd6 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
@@ -2,14 +2,10 @@
 
 ## 1. Introduction
 [Ragas](https://github.com/explodinggradients/ragas) is a framework that helps you evaluate your Retrieval Augmented Generation (RAG) pipelines. We provide a script to use Ragas based on data files. We use four metrics: answer relevancy, faithfulness, context recall, context precision.
-* **Answer relevancy**
-Answer Relevancy focuses on assessing how pertinent the generated answer is to the given prompt. A lower score is assigned to answers that are incomplete or contain redundant information and higher scores indicate better relevancy.
-* **Faithfulness**
-Faithfulness measures the factual consistency of the generated answer against the given context. It is calculated from answer and retrieved context. The answer is scaled to (0,1) range. Higher is the better.
-* **Context recall**
-Context recall measures the extent to which the retrieved context aligns with the annotated answer, treated as the ground truth. It is computed based on the ground truth and the retrieved context, and the values range between 0 and 1, with higher values indicating better performance.
-* **Context precision**
-Context Precision is a metric that evaluates whether all of the ground-truth relevant items present in the contexts are ranked higher or not. Ideally all the relevant chunks must appear at the top ranks. This metric is computed using the question, ground_truth and the contexts, with values ranging between 0 and 1, where higher scores indicate better precision.
+* **Answer relevancy** focuses on assessing how pertinent the generated answer is to the given prompt. A lower score is assigned to answers that are incomplete or contain redundant information and higher scores indicate better relevancy.
+* **Faithfulness** measures the factual consistency of the generated answer against the given context. It is calculated from answer and retrieved context. The answer is scaled to (0,1) range. Higher is the better.
+* **Context recall** measures the extent to which the retrieved context aligns with the annotated answer, treated as the ground truth. It is computed based on the ground truth and the retrieved context, and the values range between 0 and 1, with higher values indicating better performance.
+* **Context precision** is a metric that evaluates whether all of the ground-truth relevant items present in the contexts are ranked higher or not. Ideally all the relevant chunks must appear at the top ranks. This metric is computed using the question, ground_truth and the contexts, with values ranging between 0 and 1, where higher scores indicate better precision.
 
 ## 2. Installation
 Please install dependency using the following commands.

From ed95d2d567a8eefa95d0b5a722fd4db8d5188e03 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Tue, 19 Mar 2024 10:57:45 +0800
Subject: [PATCH 088/151] Update prompt.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/prompts/prompt.py                    | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/prompts/prompt.py b/intel_extension_for_transformers/neural_chat/prompts/prompt.py
index 3dc78b6b792..3f5b1dd5b32 100644
--- a/intel_extension_for_transformers/neural_chat/prompts/prompt.py
+++ b/intel_extension_for_transformers/neural_chat/prompts/prompt.py
@@ -335,8 +335,8 @@ def generate_sqlcoder_prompt(qurey, metadata_file):
 1. <question_1>
 2. <question_2>
 ---
-Context:
-### {context}
+### Context:
+{context}
 ---
 Generated questions:
 """
@@ -349,11 +349,11 @@ def generate_sqlcoder_prompt(qurey, metadata_file):
 Desired format:
 1. <ground_truth>
 ---
-Question:
-### {question}
+### Question:
+{question}
 ---
-Context:
-### {context}
+### Context:
+{context}
 ---
 Generated ground_truth:
 """

From e253f41cfd9b0faff27281e1e7533f46eee7ddaf Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Tue, 19 Mar 2024 11:42:11 +0800
Subject: [PATCH 089/151] Update ragas_evaluation.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tools/evaluation/framework/ragas_evaluation.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py
index 0120e8dd387..761a53988e8 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py
@@ -84,7 +84,7 @@ def ragas(answer_file, ground_truth_file, openai_api_key, llm_model, embedding_m
 
         langchain_llm = LangchainLLMWrapper(langchain_llm)
         langchain_embedding = LangchainEmbeddingsWrapper(langchain_embeddings)
-        score = evaluate(dataset,
+        score = evaluate(dataset,    # pylint: disable=E1123
                          metrics=[answer_relevancy, faithfulness, context_recall, context_precision],
                          llm = langchain_llm,    # pylint: disable=E1123
                          embeddings=langchain_embedding)    # pylint: disable=E1123

From fc0b6b92c4dd37f57d875f1382b9f6a692847ba4 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Wed, 20 Mar 2024 16:15:01 +0800
Subject: [PATCH 090/151] add evaluate_retrieval_auto.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../retriever/evaluate_retrieval_auto.py      | 349 ++++++++++++++++++
 1 file changed, 349 insertions(+)
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_auto.py

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_auto.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_auto.py
new file mode 100644
index 00000000000..cc5cf8637a6
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_auto.py
@@ -0,0 +1,349 @@
+# !/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import ClassVar, Collection
+from intel_extension_for_transformers.langchain.embeddings import HuggingFaceEmbeddings, \
+    HuggingFaceInstructEmbeddings, HuggingFaceBgeEmbeddings
+from langchain.embeddings import GooglePalmEmbeddings   # pylint: disable=E0611
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from intel_extension_for_transformers.langchain.vectorstores import Chroma, Qdrant
+import uuid
+from langchain_core.documents import Document
+from intel_extension_for_transformers.langchain.retrievers import ChildParentRetriever
+from langchain_core.vectorstores import VectorStoreRetriever
+from langchain.retrievers import BM25Retriever  # pylint: disable=E0611
+from intel_extension_for_transformers.neural_chat.pipeline.plugins.retrieval.detector.query_explainer import QueryPolisher
+from intel_extension_for_transformers.neural_chat import build_chatbot, PipelineConfig
+import jsonlines
+import numpy as np
+import logging
+import argparse
+
+logging.basicConfig(
+    format="%(asctime)s %(name)s:%(levelname)s:%(message)s",
+    datefmt="%d-%M-%Y %H:%M:%S",
+    level=logging.INFO
+)
+
+def document_transfer(data_collection):
+    "Transfer the raw document into langchain supported format."
+    documents = []
+    for data, meta in data_collection:
+        doc_id = str(uuid.uuid4())
+        metadata = {"source": meta, "identify_id":doc_id}
+        doc = Document(page_content=data, metadata=metadata)
+        documents.append(doc)
+    return documents
+
+def document_append_id(documents):
+    for _doc in documents:
+        _doc.metadata["doc_id"] = _doc.metadata["identify_id"]
+    return documents
+
+def index_library(index_file_jsonl_path):
+    index_list = []
+    with open(index_file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            passages=[stu["context"][0],index_file_jsonl_path]
+            index_list.append(passages)
+    return index_list
+
+def query_set(query_file_jsonl_path):
+    query_list = []
+    with open(query_file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            passages=stu["query"]
+            query_list.append(passages)
+    return query_list
+
+def load_list(file_jsonl_path, item):
+    with open(file_jsonl_path) as file:
+        data = []
+        for stu in jsonlines.Reader(file):
+            content = ",".join(stu[item])
+            data.append(content)
+    return data
+
+def evaluate(preds, labels, cutoffs=[1]):
+    """
+    Evaluate MRR and Hit at cutoffs.
+    """
+    metrics = {}
+
+    # MRR
+    mrrs = np.zeros(len(cutoffs))
+    for pred, label in zip(preds, labels):
+        jump = False
+        for i, x in enumerate(pred, 1):
+            if x in label:
+                for k, cutoff in enumerate(cutoffs):
+                    if i <= cutoff:
+                        mrrs[k] += 1 / i
+                jump = True
+            if jump:
+                break
+    mrrs /= len(preds)
+    for i, cutoff in enumerate(cutoffs):
+        mrr = mrrs[i]
+        metrics[f"MRR@{cutoff}"] = mrr
+
+    # Hit
+    hit_rate_list=[]
+    for cutoff in cutoffs:
+        hit_num = 0
+        for pred, label in zip(preds, labels):
+            hit_list=np.intersect1d(label, pred[:cutoff])
+            hit_num = hit_num+len(hit_list)
+        hit_rate = hit_num/len(labels)
+        hit_rate_list.append(hit_rate)
+    for i, cutoff in enumerate(cutoffs):
+        hit_rate = hit_rate_list[i]
+        metrics[f"Hit@{cutoff}"] = hit_rate
+
+    return metrics
+
+class Retrieval():
+    def __init__(self,
+                 vector_database="Chroma",
+                 embedding_model="BAAI/bge-large-en-v1.5",
+                 input_path = None,
+                 retrieval_type = 'default',
+                 append=True,
+                 polish=False,
+                 k=1,
+                 fetch_k=1,
+                 score_threshold=0.3,        
+                 reranker_model= "BAAI/bge-reranker-large", 
+                 top_n = 1, 
+                 enable_rerank = False, 
+                 **kwargs):
+
+        self.vector_database = vector_database
+        self.input_path = None
+        self.retrieval_type = retrieval_type
+        self.retriever = None
+        self.k = k
+        self.fetch_k = fetch_k
+        self.score_threshold = score_threshold
+        self.reranker_model= reranker_model,
+        self.top_n = top_n
+        self.enable_rerank=enable_rerank
+                                              
+        self.splitter = RecursiveCharacterTextSplitter(chunk_size= kwargs['child_size'] \
+                    if 'child_size' in kwargs else 512)
+        allowed_retrieval_type: ClassVar[Collection[str]] = (
+            "default",
+            "child_parent",
+            'bm25',
+        )
+
+        if polish:
+            self.polisher = QueryPolisher()
+        else:
+            self.polisher = None
+
+        assert self.retrieval_type in allowed_retrieval_type, "search_type of {} not allowed.".format(   \
+            self.retrieval_type)
+
+        self.input_path = input_path
+        assert self.input_path != None, "Should gave an input path!"
+
+        try:
+            if "instruct" in embedding_model:
+                self.embeddings = HuggingFaceInstructEmbeddings(model_name=embedding_model)
+            elif "bge" in embedding_model:
+                self.embeddings = HuggingFaceBgeEmbeddings(
+                    model_name=embedding_model,
+                    encode_kwargs={'normalize_embeddings': True},
+                    query_instruction="Represent this sentence for searching relevant passages:")
+            elif "Google" == embedding_model:
+                self.embeddings = GooglePalmEmbeddings()
+            else:
+                self.embeddings = HuggingFaceEmbeddings(
+                    model_name=embedding_model,
+                    encode_kwargs={"normalize_embeddings": True},
+                )
+        except Exception as e:
+            logging.error("Please select a proper embedding model.")
+            logging.error(e)
+
+        data_collection = index_library(self.input_path)
+        logging.info("The parsing for the uploaded files is finished.")
+
+        langchain_documents = document_transfer(data_collection)
+        logging.info("The format of parsed documents is transferred.")
+
+        if kwargs['search_type']=="similarity":
+            kwargs['search_kwargs']={"k":self.k}
+        elif kwargs['search_type']=="mmr":
+            kwargs['search_kwargs']={"k":self.k, "fetch_k":self.fetch_k}
+        elif kwargs['search_type']=="similarity_score_threshold":
+            kwargs['search_kwargs']={"k":self.k, "score_threshold":self.score_threshold}
+
+        if self.vector_database == "Chroma":
+            self.database = Chroma
+        elif self.vector_database == "Qdrant":
+            self.database = Qdrant
+        if self.retrieval_type == 'default':  # Using vector store retriever
+            if append:
+                knowledge_base = self.database.from_documents(documents=langchain_documents, embedding=self.embeddings,
+                                                              **kwargs)
+            else:
+                knowledge_base = self.database.build(documents=langchain_documents, embedding=self.embeddings, **kwargs)
+            self.retriever = RetrieverAdapter(retrieval_type=self.retrieval_type, document_store=knowledge_base, \
+                                              **kwargs)
+            if self.vector_database == "Qdrant" and knowledge_base.is_local():
+               # one local storage folder cannot be accessed by multiple instances of Qdrant client simultaneously.
+               knowledge_base.client.close()
+        elif self.retrieval_type == "child_parent":    # Using child-parent store retriever
+            child_documents = self.splitter.split_documents(langchain_documents)
+            langchain_documents = document_append_id(langchain_documents)
+            if append:
+                knowledge_base = self.database.from_documents(documents=langchain_documents, embedding=self.embeddings,
+                                                              **kwargs)
+                child_knowledge_base = self.database.from_documents(documents=child_documents, sign='child', \
+                                                                    embedding=self.embeddings, **kwargs)
+            else:
+                knowledge_base = self.database.build(documents=langchain_documents, embedding=self.embeddings, **kwargs)
+                child_knowledge_base = self.database.build(documents=langchain_documents, embedding=self.embeddings, \
+                                            sign='child', **kwargs)
+            self.retriever = RetrieverAdapter(retrieval_type=self.retrieval_type, document_store=knowledge_base, \
+                               child_document_store=child_knowledge_base, **kwargs)
+            if self.vector_database == "Qdrant" :
+                # one local storage folder cannot be accessed by multiple instances of Qdrant client simultaneously.
+                if knowledge_base.is_local():
+                    knowledge_base.client.close()
+                if child_knowledge_base.is_local():
+                    child_knowledge_base.client.close()
+        elif self.retrieval_type == "bm25":
+            self.docs = document_append_id(langchain_documents)
+            self.retriever = RetrieverAdapter(retrieval_type=self.retrieval_type, 
+                                              docs=self.docs, 
+                                              reranker_model=self.reranker_model, 
+                                              top_n = self.top_n, 
+                                              enable_rerank = self.enable_rerank, 
+                                              **kwargs)
+        logging.info("The retriever is successfully built.")
+
+    def pre_llm_inference_actions(self, model_name, query):
+        if self.polisher:
+            try:
+                query = self.polisher.polish_query(model_name, query)
+            except Exception as e:
+                logging.info(f"Polish the user query failed, {e}")
+                raise Exception("[Rereieval ERROR] query polish failed!")
+
+        assert self.retriever is not None, logging.info("Please check the status of retriever")
+        context = self.retriever.get_context(query)
+        return context
+
+
+class RetrieverAdapter():
+    def __init__(self, retrieval_type='default', document_store=None, child_document_store=None, docs=None,  \
+                 reranker_model="BAAI/bge-reranker-large", top_n = 1, enable_rerank = False, **kwargs):
+        self.retrieval_type = retrieval_type
+        if enable_rerank:
+            from intel_extension_for_transformers.langchain.retrievers.bge_reranker import BgeReranker
+            from FlagEmbedding import FlagReranker
+            reranker = FlagReranker(reranker_model)
+            self.reranker = BgeReranker(model = reranker, top_n=top_n)
+        else:
+            self.reranker = None
+
+        if self.retrieval_type == "default":
+            self.retriever = VectorStoreRetriever(vectorstore=document_store, **kwargs)
+        elif self.retrieval_type == "bm25":
+            self.retriever = BM25Retriever.from_documents(docs, **kwargs)
+        elif self.retrieval_type == "child_parent":
+            self.retriever = ChildParentRetriever(parentstore=document_store, \
+                                                  vectorstore=child_document_store,
+                                                  **kwargs)  # pylint: disable=abstract-class-instantiated
+        else:
+            logging.error('The chosen retrieval type remains outside the supported scope.')
+
+    def get_context(self, query):
+        context = []
+        retrieved_documents = self.retriever.get_relevant_documents(query)
+        if self.reranker is not None:
+            retrieved_documents = self.reranker.compress_documents(documents = retrieved_documents, query = query)
+        for doc in retrieved_documents:
+            context.append(doc.page_content)
+        return context
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--index_file_jsonl_path", type=str)
+    parser.add_argument("--query_file_jsonl_path", type=str)
+    parser.add_argument("--vector_database", type=str, default="Chroma")
+    parser.add_argument("--embedding_model", type=str, default="BAAI/bge-large-en-v1.5")
+    parser.add_argument("--llm_model", type=str)
+    parser.add_argument("--reranker_model", type=str, default="BAAI/bge-reranker-large")
+
+    parser.add_argument("--retrieval_type", type=str, default='default')
+    parser.add_argument("--polish", type=bool, default=False)
+    parser.add_argument("--search_type", type=str, default="similarity")
+    parser.add_argument("--k", type=int, default=1)
+    parser.add_argument("--fetch_k", type=int)
+    parser.add_argument("--score_threshold", type=float)
+    parser.add_argument("--top_n", type=int, default=1)
+    parser.add_argument("--enable_rerank", type=bool, default=False)
+
+    args = parser.parse_args()
+
+    index_file_jsonl_path = args.index_file_jsonl_path
+    query_file_jsonl_path = args.query_file_jsonl_path
+    vector_database = args.vector_database
+    embedding_model = args.embedding_model
+    retrieval_type = args.retrieval_type
+    polish = args.polish
+    search_type = args.search_type
+    llm_model = args.llm_model
+    k = args.k
+    fetch_k = args.fetch_k
+    score_threshold = args.score_threshold
+    reranker_model = args.reranker_model
+    top_n = args.top_n
+    enable_rerank = args.enable_rerank
+
+    query_list = query_set(query_file_jsonl_path)
+
+    config = PipelineConfig(model_name_or_path=llm_model)
+    build_chatbot(config)
+
+    retrieval_results=[]
+    for query in query_list:
+        context=Retrieval(input_path=index_file_jsonl_path,
+                         vector_database=vector_database,
+                         embedding_model=embedding_model,
+                         retrieval_type = retrieval_type,
+                         polish = polish,
+                         search_type=search_type,
+                         k=k,
+                         fetch_k=fetch_k,
+                         score_threshold=score_threshold,
+                         reranker_model=reranker_model, 
+                         top_n = top_n, 
+                         enable_rerank = enable_rerank
+                         ).pre_llm_inference_actions(model_name=llm_model, query=query)
+        retrieval_results.append(context)
+    ground_truths=load_list(query_file_jsonl_path, "pos")
+    metrics = evaluate(retrieval_results, ground_truths)
+    print(metrics)
+    return metrics
+
+if __name__ == '__main__':
+    main()

From 6f081b5ab8acd2848c70799a35bae2d689eed414 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 20 Mar 2024 08:15:17 +0000
Subject: [PATCH 091/151] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../retriever/evaluate_retrieval_auto.py      | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_auto.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_auto.py
index cc5cf8637a6..f3dcaa379c0 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_auto.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_auto.py
@@ -126,10 +126,10 @@ def __init__(self,
                  polish=False,
                  k=1,
                  fetch_k=1,
-                 score_threshold=0.3,        
-                 reranker_model= "BAAI/bge-reranker-large", 
-                 top_n = 1, 
-                 enable_rerank = False, 
+                 score_threshold=0.3,
+                 reranker_model= "BAAI/bge-reranker-large",
+                 top_n = 1,
+                 enable_rerank = False,
                  **kwargs):
 
         self.vector_database = vector_database
@@ -142,7 +142,7 @@ def __init__(self,
         self.reranker_model= reranker_model,
         self.top_n = top_n
         self.enable_rerank=enable_rerank
-                                              
+
         self.splitter = RecursiveCharacterTextSplitter(chunk_size= kwargs['child_size'] \
                     if 'child_size' in kwargs else 512)
         allowed_retrieval_type: ClassVar[Collection[str]] = (
@@ -231,11 +231,11 @@ def __init__(self,
                     child_knowledge_base.client.close()
         elif self.retrieval_type == "bm25":
             self.docs = document_append_id(langchain_documents)
-            self.retriever = RetrieverAdapter(retrieval_type=self.retrieval_type, 
-                                              docs=self.docs, 
-                                              reranker_model=self.reranker_model, 
-                                              top_n = self.top_n, 
-                                              enable_rerank = self.enable_rerank, 
+            self.retriever = RetrieverAdapter(retrieval_type=self.retrieval_type,
+                                              docs=self.docs,
+                                              reranker_model=self.reranker_model,
+                                              top_n = self.top_n,
+                                              enable_rerank = self.enable_rerank,
                                               **kwargs)
         logging.info("The retriever is successfully built.")
 
@@ -335,8 +335,8 @@ def main():
                          k=k,
                          fetch_k=fetch_k,
                          score_threshold=score_threshold,
-                         reranker_model=reranker_model, 
-                         top_n = top_n, 
+                         reranker_model=reranker_model,
+                         top_n = top_n,
                          enable_rerank = enable_rerank
                          ).pre_llm_inference_actions(model_name=llm_model, query=query)
         retrieval_results.append(context)

From 746adec5a8a6d1a60f0b4cb98b6e0984cd499e82 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Thu, 21 Mar 2024 16:28:58 +0800
Subject: [PATCH 092/151] Update evaluate_retrieval_auto.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../retriever/evaluate_retrieval_auto.py      | 45 ++++++++++---------
 1 file changed, 25 insertions(+), 20 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_auto.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_auto.py
index f3dcaa379c0..8d8ea935e3b 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_auto.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_auto.py
@@ -16,17 +16,18 @@
 # limitations under the License.
 
 from typing import ClassVar, Collection
-from intel_extension_for_transformers.langchain.embeddings import HuggingFaceEmbeddings, \
-    HuggingFaceInstructEmbeddings, HuggingFaceBgeEmbeddings
-from langchain.embeddings import GooglePalmEmbeddings   # pylint: disable=E0611
+from intel_extension_for_transformers.langchain_community.embeddings import HuggingFaceEmbeddings, \
+    HuggingFaceInstructEmbeddings, HuggingFaceBgeEmbeddings  # pylint: disable=E0401, E0611
+from langchain_community.embeddings import GooglePalmEmbeddings
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from intel_extension_for_transformers.langchain.vectorstores import Chroma, Qdrant
+from intel_extension_for_transformers.langchain_community.vectorstores import Chroma, Qdrant  # pylint: disable=E0401, E0611
 import uuid
 from langchain_core.documents import Document
-from intel_extension_for_transformers.langchain.retrievers import ChildParentRetriever
+from intel_extension_for_transformers.langchain_community.retrievers import ChildParentRetriever  # pylint: disable=E0401, E0611
 from langchain_core.vectorstores import VectorStoreRetriever
-from langchain.retrievers import BM25Retriever  # pylint: disable=E0611
-from intel_extension_for_transformers.neural_chat.pipeline.plugins.retrieval.detector.query_explainer import QueryPolisher
+from langchain_community.retrievers import BM25Retriever
+from intel_extension_for_transformers.neural_chat.pipeline.plugins.retrieval.detector.query_explainer \
+    import QueryPolisher
 from intel_extension_for_transformers.neural_chat import build_chatbot, PipelineConfig
 import jsonlines
 import numpy as np
@@ -126,10 +127,10 @@ def __init__(self,
                  polish=False,
                  k=1,
                  fetch_k=1,
-                 score_threshold=0.3,
-                 reranker_model= "BAAI/bge-reranker-large",
-                 top_n = 1,
-                 enable_rerank = False,
+                 score_threshold=0.3,        
+                 reranker_model= "BAAI/bge-reranker-large", 
+                 top_n = 1, 
+                 enable_rerank = False, 
                  **kwargs):
 
         self.vector_database = vector_database
@@ -142,7 +143,7 @@ def __init__(self,
         self.reranker_model= reranker_model,
         self.top_n = top_n
         self.enable_rerank=enable_rerank
-
+                                              
         self.splitter = RecursiveCharacterTextSplitter(chunk_size= kwargs['child_size'] \
                     if 'child_size' in kwargs else 512)
         allowed_retrieval_type: ClassVar[Collection[str]] = (
@@ -231,11 +232,11 @@ def __init__(self,
                     child_knowledge_base.client.close()
         elif self.retrieval_type == "bm25":
             self.docs = document_append_id(langchain_documents)
-            self.retriever = RetrieverAdapter(retrieval_type=self.retrieval_type,
-                                              docs=self.docs,
-                                              reranker_model=self.reranker_model,
-                                              top_n = self.top_n,
-                                              enable_rerank = self.enable_rerank,
+            self.retriever = RetrieverAdapter(retrieval_type=self.retrieval_type, 
+                                              docs=self.docs, 
+                                              reranker_model=self.reranker_model, 
+                                              top_n = self.top_n, 
+                                              enable_rerank = self.enable_rerank, 
                                               **kwargs)
         logging.info("The retriever is successfully built.")
 
@@ -257,7 +258,7 @@ def __init__(self, retrieval_type='default', document_store=None, child_document
                  reranker_model="BAAI/bge-reranker-large", top_n = 1, enable_rerank = False, **kwargs):
         self.retrieval_type = retrieval_type
         if enable_rerank:
-            from intel_extension_for_transformers.langchain.retrievers.bge_reranker import BgeReranker
+            from intel_extension_for_transformers.langchain_community.retrievers.bge_reranker import BgeReranker  # pylint: disable=E0401, E0611
             from FlagEmbedding import FlagReranker
             reranker = FlagReranker(reranker_model)
             self.reranker = BgeReranker(model = reranker, top_n=top_n)
@@ -285,6 +286,10 @@ def get_context(self, query):
         return context
 
 def main():
+    import os, shutil
+    if os.path.exists("output"):
+        shutil.rmtree("output", ignore_errors=True)
+
     parser = argparse.ArgumentParser()
     parser.add_argument("--index_file_jsonl_path", type=str)
     parser.add_argument("--query_file_jsonl_path", type=str)
@@ -335,8 +340,8 @@ def main():
                          k=k,
                          fetch_k=fetch_k,
                          score_threshold=score_threshold,
-                         reranker_model=reranker_model,
-                         top_n = top_n,
+                         reranker_model=reranker_model, 
+                         top_n = top_n, 
                          enable_rerank = enable_rerank
                          ).pre_llm_inference_actions(model_name=llm_model, query=query)
         retrieval_results.append(context)

From 100322ed4334a55870543cdc91e2ea08a9fb31e1 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 21 Mar 2024 08:29:25 +0000
Subject: [PATCH 093/151] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../retriever/evaluate_retrieval_auto.py      | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_auto.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_auto.py
index 8d8ea935e3b..3b93e8c5f3b 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_auto.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_auto.py
@@ -127,10 +127,10 @@ def __init__(self,
                  polish=False,
                  k=1,
                  fetch_k=1,
-                 score_threshold=0.3,        
-                 reranker_model= "BAAI/bge-reranker-large", 
-                 top_n = 1, 
-                 enable_rerank = False, 
+                 score_threshold=0.3,
+                 reranker_model= "BAAI/bge-reranker-large",
+                 top_n = 1,
+                 enable_rerank = False,
                  **kwargs):
 
         self.vector_database = vector_database
@@ -143,7 +143,7 @@ def __init__(self,
         self.reranker_model= reranker_model,
         self.top_n = top_n
         self.enable_rerank=enable_rerank
-                                              
+
         self.splitter = RecursiveCharacterTextSplitter(chunk_size= kwargs['child_size'] \
                     if 'child_size' in kwargs else 512)
         allowed_retrieval_type: ClassVar[Collection[str]] = (
@@ -232,11 +232,11 @@ def __init__(self,
                     child_knowledge_base.client.close()
         elif self.retrieval_type == "bm25":
             self.docs = document_append_id(langchain_documents)
-            self.retriever = RetrieverAdapter(retrieval_type=self.retrieval_type, 
-                                              docs=self.docs, 
-                                              reranker_model=self.reranker_model, 
-                                              top_n = self.top_n, 
-                                              enable_rerank = self.enable_rerank, 
+            self.retriever = RetrieverAdapter(retrieval_type=self.retrieval_type,
+                                              docs=self.docs,
+                                              reranker_model=self.reranker_model,
+                                              top_n = self.top_n,
+                                              enable_rerank = self.enable_rerank,
                                               **kwargs)
         logging.info("The retriever is successfully built.")
 
@@ -340,8 +340,8 @@ def main():
                          k=k,
                          fetch_k=fetch_k,
                          score_threshold=score_threshold,
-                         reranker_model=reranker_model, 
-                         top_n = top_n, 
+                         reranker_model=reranker_model,
+                         top_n = top_n,
                          enable_rerank = enable_rerank
                          ).pre_llm_inference_actions(model_name=llm_model, query=query)
         retrieval_results.append(context)

From 5e07789d774d6e52c1d7af1c2abede84d94b4961 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Thu, 21 Mar 2024 16:31:42 +0800
Subject: [PATCH 094/151] Update evaluate_retrieval.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../evaluation/retriever/evaluate_retrieval.py     | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval.py
index 71a79e041ee..c4fe8303530 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval.py
@@ -16,16 +16,16 @@
 # limitations under the License.
 
 from typing import ClassVar, Collection
-from intel_extension_for_transformers.langchain.embeddings import HuggingFaceEmbeddings, \
-    HuggingFaceInstructEmbeddings, HuggingFaceBgeEmbeddings
-from langchain.embeddings import GooglePalmEmbeddings   # pylint: disable=E0611
+from intel_extension_for_transformers.langchain_community.embeddings import HuggingFaceEmbeddings, \
+    HuggingFaceInstructEmbeddings, HuggingFaceBgeEmbeddings  # pylint: disable=E0401, E0611
+from langchain_community.embeddings import GooglePalmEmbeddings
 from langchain.text_splitter import RecursiveCharacterTextSplitter
-from intel_extension_for_transformers.langchain.vectorstores import Chroma, Qdrant
+from intel_extension_for_transformers.langchain_community.vectorstores import Chroma, Qdrant  # pylint: disable=E0401, E0611
 import uuid
 from langchain_core.documents import Document
-from intel_extension_for_transformers.langchain.retrievers import ChildParentRetriever
+from intel_extension_for_transformers.langchain_community.retrievers import ChildParentRetriever  # pylint: disable=E0401, E0611
 from langchain_core.vectorstores import VectorStoreRetriever
-from langchain.retrievers import BM25Retriever  # pylint: disable=E0611
+from langchain_community.retrievers import BM25Retriever
 import jsonlines
 import numpy as np
 import logging
@@ -224,7 +224,7 @@ def __init__(self, retrieval_type='default', document_store=None, child_document
                  reranker_model="BAAI/bge-reranker-large", top_n = 1, enable_rerank = False, **kwargs):
         self.retrieval_type = retrieval_type
         if enable_rerank:
-            from intel_extension_for_transformers.langchain.retrievers.bge_reranker import BgeReranker
+            from intel_extension_for_transformers.langchain_community.retrievers.bge_reranker import BgeReranker  # pylint: disable=E0401, E0611
             from FlagEmbedding import FlagReranker
             reranker = FlagReranker(reranker_model)
             self.reranker = BgeReranker(model = reranker, top_n=top_n)

From 0a2f742b48657b5e4bc31fd0d6c0c577cb7ce616 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Thu, 21 Mar 2024 16:33:09 +0800
Subject: [PATCH 095/151] Update ragas_evaluation.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tools/evaluation/framework/ragas_evaluation.py          | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py
index 761a53988e8..744436daeec 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py
@@ -22,9 +22,9 @@
     context_precision,
 )
 from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
-from intel_extension_for_transformers.langchain.embeddings import HuggingFaceEmbeddings, \
-    HuggingFaceInstructEmbeddings, HuggingFaceBgeEmbeddings
-from langchain.embeddings import GooglePalmEmbeddings   # pylint: disable=E0611
+from intel_extension_for_transformers.langchain_community.embeddings import HuggingFaceEmbeddings, \
+    HuggingFaceInstructEmbeddings, HuggingFaceBgeEmbeddings  # pylint: disable=E0401, E0611
+from langchain_community.embeddings import GooglePalmEmbeddings
 from ragas.llms import LangchainLLMWrapper   # pylint: disable=E0611
 from ragas.embeddings import LangchainEmbeddingsWrapper   # pylint: disable=E0611
 import pandas as pd

From 17526844b731644d98f0adcd812b1cbe80a335a8 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Thu, 21 Mar 2024 16:43:00 +0800
Subject: [PATCH 096/151] Update test_evaluation.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tests/nightly/tools/test_evaluation.py    | 48 ++++++++++++++++---
 1 file changed, 42 insertions(+), 6 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py b/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py
index 5d3d7d578ea..f223e0f5fc5 100644
--- a/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py
+++ b/intel_extension_for_transformers/neural_chat/tests/nightly/tools/test_evaluation.py
@@ -41,9 +41,15 @@ def tearDown(self) -> None:
         return super().tearDown()
 
     def test_retrieval_dataset_construction(self):
+        path = \
+          "/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/assets/docs/retrieve_multi_doc/"
+        if os.path.exists(path):
+            input_path=path
+        else:
+            input_path='../assets/docs/retrieve_multi_doc/'
         argv = ['--llm_model', '/tf_dataset2/models/nlp_toolkit/neural-chat-7b-v3-1', \
                 '--embedding_model', '/tf_dataset2/inc-ut/gte-base', \
-                '--input', '/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/assets/docs/retrieve_multi_doc/', \
+                '--input', input_path, \
                 '--output', './data', \
                 '--range_for_sampling', '2-2', \
                 '--negative_number', '1']
@@ -52,24 +58,54 @@ def test_retrieval_dataset_construction(self):
             self.assertTrue(os.path.exists("./data/minedHN_split.jsonl"))
 
     def test_llm_generate_truth(self):
+        path = \
+          "/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl"
+        if os.path.exists(path):
+            input_path=path
+        else:
+            input_path='../tools/evaluation/data_augmentation/example.jsonl'
         argv = ['--llm_model', '/tf_dataset2/models/nlp_toolkit/neural-chat-7b-v3-1', \
-                '--input', '/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl', \
+                '--input', input_path, \
                 '--output', 'ground_truth.jsonl']
         with patch('sys.argv', ['python llm_generate_truth.py'] + argv):
             llm_generate_truth.main()
             self.assertTrue(os.path.exists("ground_truth.jsonl"))
 
     def test_evaluate_retrieval(self):
-        argv = ['--index_file_jsonl_path', '/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/candidate_context.jsonl', \
-                '--query_file_jsonl_path', '/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl', \
+        path1 = \
+          "/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/candidate_context.jsonl"
+        if os.path.exists(path1):
+            index_file_jsonl_path=path1
+        else:
+            index_file_jsonl_path='../tools/evaluation/data_augmentation/candidate_context.jsonl'
+        path2 = \
+          "/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl"
+        if os.path.exists(path2):
+            query_file_jsonl_path=path2
+        else:
+            query_file_jsonl_path='../tools/evaluation/data_augmentation/example.jsonl'
+        argv = ['--index_file_jsonl_path', index_file_jsonl_path, \
+                '--query_file_jsonl_path', query_file_jsonl_path, \
                 '--embedding_model', '/tf_dataset2/inc-ut/gte-base']
         with patch('sys.argv', ['python evaluate_retrieval.py'] + argv):
             result = evaluate_retrieval.main()
             self.assertIsNotNone(result)
 
     def test_ragas_evaluation(self):
-        argv = ['--answer_file', '/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/answer.jsonl', \
-                '--ground_truth_file', '/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl', \
+        path1 = \
+          "/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/answer.jsonl"
+        if os.path.exists(path1):
+            answer_file_path=path1
+        else:
+            answer_file_path='../tools/evaluation/data_augmentation/answer.jsonl'
+        path2 = \
+          "/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl"
+        if os.path.exists(path2):
+            ground_truth_file_path=path2
+        else:
+            ground_truth_file_path='../tools/evaluation/data_augmentation/ground_truth.jsonl'
+        argv = ['--answer_file', answer_file_path, \
+                '--ground_truth_file', ground_truth_file_path, \
                 '--llm_model', '/tf_dataset2/models/nlp_toolkit/neural-chat-7b-v3-1', \
                 '--embedding_model', '/tf_dataset2/inc-ut/gte-base']
         with patch('sys.argv', ['python ragas_evaluation.py'] + argv):

From 2a2238eced2fb9b7a88d14b639026ca7a8f7ae0d Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Fri, 22 Mar 2024 14:08:08 +0800
Subject: [PATCH 097/151] Update ragas_evaluation.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tools/evaluation/framework/ragas_evaluation.py        | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py
index 744436daeec..b4bccae2c14 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation.py
@@ -45,7 +45,7 @@ def load_set(file_jsonl_path, item):
             list.append(passages)
     return list
 
-def ragas(answer_file, ground_truth_file, openai_api_key, llm_model, embedding_model):
+def ragas(answer_file, ground_truth_file, llm_model, embedding_model):
 
     question_list=load_set(answer_file, "question")
     answer_list=load_set(answer_file, "answer")
@@ -89,7 +89,7 @@ def ragas(answer_file, ground_truth_file, openai_api_key, llm_model, embedding_m
                          llm = langchain_llm,    # pylint: disable=E1123
                          embeddings=langchain_embedding)    # pylint: disable=E1123
     else:
-        os.environ["OPENAI_API_KEY"] = openai_api_key
+        os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
         score = evaluate(dataset,metrics=[answer_relevancy, faithfulness, context_recall, context_precision])
 
     df=score.to_pandas()
@@ -100,18 +100,16 @@ def main():
     parser = argparse.ArgumentParser()
     parser.add_argument("--answer_file", type=str)
     parser.add_argument("--ground_truth_file", type=str)
-    parser.add_argument("--openai_api_key", type=str)
     parser.add_argument("--llm_model", type=str)
     parser.add_argument("--embedding_model", type=str)
     args = parser.parse_args()
 
     answer_file = args.answer_file
     ground_truth_file = args.ground_truth_file
-    openai_api_key = args.openai_api_key
     llm_model = args.llm_model
     embedding_model = args.embedding_model
 
-    metrics=ragas(answer_file, ground_truth_file, openai_api_key, llm_model, embedding_model)
+    metrics=ragas(answer_file, ground_truth_file, llm_model, embedding_model)
     return metrics
 
 if __name__ == '__main__':

From e8f0f9cf1e3a903b068c5c94f3df3bd1d3216843 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Fri, 22 Mar 2024 14:14:07 +0800
Subject: [PATCH 098/151] Update README.md

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tools/evaluation/framework/README.md  | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
index 4729c8abcd6..3c69c3d2c96 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
@@ -19,11 +19,11 @@ pip install -r requirements.txt
 * **OpenAI**
 By default, ragas use OpenAI’s API to compute the score. If you’re using this metric, ensure that you set the environment key OPENAI_API_KEY with your API key.
 ```
+export OPENAI_API_KEY=xxx
 cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/framework
 python ragas_evaluation.py \
 --answer_file /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/answer.jsonl \
---ground_truth_file /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl \
---openai_api_key <your openai api key>
+--ground_truth_file /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl
 ```
 * **Langchain**
 You can also try other LLMs for evaluation using Langchain.
@@ -51,3 +51,10 @@ The results include your input question, answer, contexts, ground_truth, as well
 1  What a...  The co...  [Our w...  The co...     0.985826          0.250000           1.0             0.0
 ......
 ```
+where your input question, answer, contexts, ground_truth in the first line are
+```
+question: What types of platforms does the organization focus on?
+answer: The organization focuses on delivering open software and hardware platforms with industry-defining standards, as well as leadership products, open and secure platforms, and resilient manufacturing.
+contexts: [We aim to deliver open software and hardware platforms with industry-defining standards.]
+ground_truth: open software and hardware platforms
+```

From 8d650788bdddba6b873957ba1ea4423598acaa8c Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Fri, 22 Mar 2024 14:19:30 +0800
Subject: [PATCH 099/151] Update and rename evaluate_retrieval_auto.py to
 evaluate_retrieval_benchmark.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 ...uate_retrieval_auto.py => evaluate_retrieval_benchmark.py} | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
 rename intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/{evaluate_retrieval_auto.py => evaluate_retrieval_benchmark.py} (96%)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_auto.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_benchmark.py
similarity index 96%
rename from intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_auto.py
rename to intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_benchmark.py
index 3b93e8c5f3b..2927fc08256 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_auto.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_benchmark.py
@@ -302,8 +302,8 @@ def main():
     parser.add_argument("--polish", type=bool, default=False)
     parser.add_argument("--search_type", type=str, default="similarity")
     parser.add_argument("--k", type=int, default=1)
-    parser.add_argument("--fetch_k", type=int)
-    parser.add_argument("--score_threshold", type=float)
+    parser.add_argument("--fetch_k", type=int, default=5)
+    parser.add_argument("--score_threshold", type=float, default=0.3)
     parser.add_argument("--top_n", type=int, default=1)
     parser.add_argument("--enable_rerank", type=bool, default=False)
 

From a951a8950d8e8d09da0399d8d363af79e24d8716 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 25 Mar 2024 15:05:28 +0800
Subject: [PATCH 100/151] Update evaluate_retrieval_benchmark.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../retriever/evaluate_retrieval_benchmark.py | 30 ++++++++++++++++---
 1 file changed, 26 insertions(+), 4 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_benchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_benchmark.py
index 2927fc08256..fccd03a576b 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_benchmark.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_benchmark.py
@@ -115,7 +115,7 @@ def evaluate(preds, labels, cutoffs=[1]):
         hit_rate = hit_rate_list[i]
         metrics[f"Hit@{cutoff}"] = hit_rate
 
-    return metrics
+    return metrics["MRR@1"], metrics["Hit@1"]
 
 class Retrieval():
     def __init__(self,
@@ -346,9 +346,31 @@ def main():
                          ).pre_llm_inference_actions(model_name=llm_model, query=query)
         retrieval_results.append(context)
     ground_truths=load_list(query_file_jsonl_path, "pos")
-    metrics = evaluate(retrieval_results, ground_truths)
-    print(metrics)
-    return metrics
+    MRR, Hit = evaluate(retrieval_results, ground_truths)
+
+    file_json_path='result.jsonl'
+
+    if MRR and Hit:
+        data = {
+                "index_file_jsonl_path": args.index_file_jsonl_path,
+                "query_file_jsonl_path": args.query_file_jsonl_path,
+                "vector_database": args.vector_database,
+                "embedding_model": args.embedding_model,
+                "retrieval_type": args.retrieval_type,
+                "polish": args.polish,
+                "search_type": args.search_type,
+                "llm_model": args.llm_model,
+                "k": args.k,
+                "fetch_k": args.fetch_k,
+                "score_threshold": args.score_threshold,
+                "reranker_model": args.reranker_model,
+                "top_n": args.top_n,
+                "enable_rerank": args.enable_rerank,
+                "MRR": MRR,
+                "Hit": Hit,
+            }
+        with jsonlines.open(file_json_path,"a") as file_json:
+                file_json.write(data)
 
 if __name__ == '__main__':
     main()

From 13921f638cb0b72d6630592bfa62996107d1ca75 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 25 Mar 2024 15:06:08 +0800
Subject: [PATCH 101/151] add retrieval_benchmark.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../retriever/retrieval_benchmark.py          | 102 ++++++++++++++++++
 1 file changed, 102 insertions(+)
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.py

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.py
new file mode 100644
index 00000000000..216a5e2946a
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.py
@@ -0,0 +1,102 @@
+import argparse
+import os
+import subprocess
+import jsonlines
+
+def main():
+    if os.path.exists("result.jsonl"):
+        os.remove("result.jsonl") 
+    script_path = 'evaluate_retrieval_benchmark.py'
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--index_file_jsonl_path", type=str)
+    parser.add_argument("--query_file_jsonl_path", type=str)
+    parser.add_argument("--vector_database", type=str, default="Chroma")
+    parser.add_argument("--embedding_model", type=str, default="BAAI/bge-large-en-v1.5")
+    parser.add_argument("--llm_model", type=str)
+    parser.add_argument("--reranker_model", type=str, default="BAAI/bge-reranker-large")
+
+    args = parser.parse_args()
+
+    arg1 = args.index_file_jsonl_path
+    arg2 = args.query_file_jsonl_path
+    arg3 = args.vector_database
+    arg4 = args.embedding_model
+    arg5 = args.retrieval_type
+    arg6 = args.llm_model
+
+    arg7_list = ['default','child_parent','bm25']
+    arg8_list = ['True','False']
+    arg9_list = ['similarity','mmr','similarity_score_threshold']
+    arg10_list = ['1', '3', '5']
+    arg11_list = ['5', '10', '20']
+    arg12_list = ['0.3','0.5','0.7']
+    arg13_list = ['1','3', '5','10']
+    arg14_list = ['True','False']
+
+    for arg7 in arg7_list:
+        print('--'*1 +'retrieval_type',arg7)
+        for arg8 in arg8_list:
+            print('--'*2 +'polish',arg8)
+            for arg9 in arg9_list:
+                print('--'*3 +'search_type',arg9)
+                for arg10 in arg10_list:
+                    print('--'*4 +'k',arg10)
+                    for arg11 in arg11_list:
+                        print('--'*5 +'fetch_k',arg11)
+                        for arg12 in arg12_list:
+                            print('--'*6 +'score_threshold',arg12)
+                            for arg13 in arg13_list:
+                                print('--'*7 +'top_n',arg13)
+                                for arg14 in arg14_list:
+                                    print('--'*8 +'enable_rerank',arg14)
+                                    # try:
+                                    subprocess.run(['python', 
+                                                    script_path, 
+                                                    '--index_file_jsonl_path', arg1, 
+                                                    '--query_file_jsonl_path', arg2, 
+                                                    '--vector_database', arg3,  
+                                                    '--embedding_model', arg4, 
+                                                    '--llm_model', arg5, 
+                                                    '--reranker_model', arg6, 
+                                                    '--retrieval_type', arg7,  
+                                                    '--polish', arg8, 
+                                                    '--search_type', arg9, 
+                                                    '--k', arg10, 
+                                                    '--fetch_k', arg11, 
+                                                    '--score_threshold', arg12, 
+                                                    '--top_n', arg13,
+                                                    '--enable_rerank', arg14],
+                                                    stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+
+    file_jsonl_path='result.jsonl'
+
+    MRR_list = []
+    Hit_list = []
+
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            MRR=stu["MRR"]
+            Hit=stu["Hit"]
+            MRR_list.append(MRR)
+            Hit_list.append(Hit)
+
+    MRR_line_number_list = [i for i, v in enumerate(MRR_list) if v == max(MRR_list)]
+    Hit_line_number_list = [i for i, v in enumerate(Hit_list) if v == max(Hit_list)]
+
+    line=0
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            if line in MRR_line_number_list:
+                print('max_MRR',stu)
+            line+=1
+
+    line=0
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            if line in Hit_line_number_list:
+                print('max_Hit',stu)
+            line+=1
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file

From 02c0813de15630d0782ff3a93dd81fc799d892f9 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 25 Mar 2024 07:06:59 +0000
Subject: [PATCH 102/151] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../retriever/retrieval_benchmark.py          | 46 ++++++++++++-------
 1 file changed, 30 insertions(+), 16 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.py
index 216a5e2946a..4ad6f25024b 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import argparse
 import os
 import subprocess
@@ -5,7 +19,7 @@
 
 def main():
     if os.path.exists("result.jsonl"):
-        os.remove("result.jsonl") 
+        os.remove("result.jsonl")
     script_path = 'evaluate_retrieval_benchmark.py'
 
     parser = argparse.ArgumentParser()
@@ -51,20 +65,20 @@ def main():
                                 for arg14 in arg14_list:
                                     print('--'*8 +'enable_rerank',arg14)
                                     # try:
-                                    subprocess.run(['python', 
-                                                    script_path, 
-                                                    '--index_file_jsonl_path', arg1, 
-                                                    '--query_file_jsonl_path', arg2, 
-                                                    '--vector_database', arg3,  
-                                                    '--embedding_model', arg4, 
-                                                    '--llm_model', arg5, 
-                                                    '--reranker_model', arg6, 
-                                                    '--retrieval_type', arg7,  
-                                                    '--polish', arg8, 
-                                                    '--search_type', arg9, 
-                                                    '--k', arg10, 
-                                                    '--fetch_k', arg11, 
-                                                    '--score_threshold', arg12, 
+                                    subprocess.run(['python',
+                                                    script_path,
+                                                    '--index_file_jsonl_path', arg1,
+                                                    '--query_file_jsonl_path', arg2,
+                                                    '--vector_database', arg3,
+                                                    '--embedding_model', arg4,
+                                                    '--llm_model', arg5,
+                                                    '--reranker_model', arg6,
+                                                    '--retrieval_type', arg7,
+                                                    '--polish', arg8,
+                                                    '--search_type', arg9,
+                                                    '--k', arg10,
+                                                    '--fetch_k', arg11,
+                                                    '--score_threshold', arg12,
                                                     '--top_n', arg13,
                                                     '--enable_rerank', arg14],
                                                     stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
@@ -99,4 +113,4 @@ def main():
             line+=1
 
 if __name__ == '__main__':
-    main()
\ No newline at end of file
+    main()

From d212d666f1dd6ae1969442885caa6bb13a9bb418 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 25 Mar 2024 15:07:56 +0800
Subject: [PATCH 103/151] Update retrieval_benchmark.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../retriever/retrieval_benchmark.py          | 31 +++++++++----------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.py
index 4ad6f25024b..ec06849872e 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.py
@@ -1,17 +1,17 @@
-# Copyright (c) 2024 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import argparse
 import os
 import subprocess
@@ -64,7 +64,6 @@ def main():
                                 print('--'*7 +'top_n',arg13)
                                 for arg14 in arg14_list:
                                     print('--'*8 +'enable_rerank',arg14)
-                                    # try:
                                     subprocess.run(['python',
                                                     script_path,
                                                     '--index_file_jsonl_path', arg1,
@@ -113,4 +112,4 @@ def main():
             line+=1
 
 if __name__ == '__main__':
-    main()
+    main()

From 20529a41c8464d6e8cafb3da118e0d2715e721bd Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Tue, 26 Mar 2024 14:59:24 +0800
Subject: [PATCH 104/151] add ragas_benchmark ragas_evaluation_benchmark

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../evaluation/framework/ragas_benchmark.py   | 153 +++++++++
 .../framework/ragas_evaluation_benchmark.py   | 295 ++++++++++++++++++
 2 files changed, 448 insertions(+)
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.py
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.py
new file mode 100644
index 00000000000..71735a78168
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.py
@@ -0,0 +1,153 @@
+import argparse
+import os
+import subprocess
+import jsonlines
+
+def main():
+    if os.path.exists("result_ragas.jsonl"):
+        os.remove("result_ragas.jsonl")
+    script_path = 'ragas_evaluation_benchmark.py'
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--ground_truth_file", type=str)
+    parser.add_argument("--input_path", type=str)
+    parser.add_argument("--vector_database", type=str, default="Chroma")
+    parser.add_argument("--embedding_model", type=str, default="BAAI/bge-large-en-v1.5")
+    parser.add_argument("--llm_model", type=str)
+    parser.add_argument("--reranker_model", type=str, default="BAAI/bge-reranker-large")
+
+    args = parser.parse_args()
+
+    arg1 = args.ground_truth_file
+    arg2 = args.input_path
+    arg3 = args.vector_database
+    arg4 = args.embedding_model
+    arg5 = args.llm_model
+    arg6 = args.reranker_model
+
+    arg7_list = ['default','child_parent','bm25']
+    arg8_list = ['True','False']
+    arg9_list = ['similarity','mmr','similarity_score_threshold']
+    arg10_list = ['1', '3', '5']
+    arg11_list = ['5', '10', '20']
+    arg12_list = ['0.3','0.5','0.7']
+    arg13_list = ['1','3', '5','10']
+    arg14_list = ['True','False']
+    arg15_list = ['256','512', '768','1024']
+    arg16_list = ['0.01','0.05', '0.1','0.3','0.5','0.7']
+    arg17_list = ['1','3', '10','20']
+    arg18_list = ['0.1','0.3', '0.5','0.7']
+    arg19_list = ['1.0','1.1', '1.3','1.5','1.7']
+    arg20_list = ['1','3', '10','20']
+    arg21_list = ['True','False']
+
+    for arg7 in arg7_list:
+        print('--'*1 +'retrieval_type',arg7)
+        for arg8 in arg8_list:
+            print('--'*2 +'polish',arg8)
+            for arg9 in arg9_list:
+                print('--'*3 +'search_type',arg9)
+                for arg10 in arg10_list:
+                    print('--'*4 +'k',arg10)
+                    for arg11 in arg11_list:
+                        print('--'*5 +'fetch_k',arg11)
+                        for arg12 in arg12_list:
+                            print('--'*6 +'score_threshold',arg12)
+                            for arg13 in arg13_list:
+                                print('--'*7 +'top_n',arg13)
+                                for arg14 in arg14_list:
+                                    print('--'*8 +'enable_rerank',arg14)
+                                    for arg15 in arg15_list:
+                                        print('--'*9 +'max_chuck_size',arg15)
+                                        for arg16 in arg16_list:
+                                            print('--'*10 +'temperature',arg16)
+                                            for arg17 in arg17_list:
+                                                print('--'*11 +'top_k',arg17)
+                                                for arg18 in arg18_list:
+                                                    print('--'*12 +'top_p',arg18)
+                                                    for arg19 in arg19_list:
+                                                        print('--'*13 +'repetition_penalty',arg19)
+                                                        for arg20 in arg20_list:
+                                                            print('--'*14 +'num_beams',arg20)
+                                                            for arg21 in arg21_list:
+                                                                print('--'*15 +'do_sample',arg21)
+                                                                subprocess.run(['python', 
+                                                                                script_path, 
+                                                                                '--ground_truth_file', arg1,
+                                                                                '--input_path', arg2,
+                                                                                '--vector_database', arg3,
+                                                                                '--embedding_model', arg4,
+                                                                                '--llm_model', arg5,
+                                                                                '--reranker_model', arg6,
+                                                                                '--retrieval_type', arg7,
+                                                                                '--polish', arg8,
+                                                                                '--search_type', arg9,
+                                                                                '--k', arg10,
+                                                                                '--fetch_k', arg11,
+                                                                                '--score_threshold', arg12,
+                                                                                '--top_n', arg13,
+                                                                                '--enable_rerank', arg14,
+                                                                                '--max_chuck_size', arg15,
+                                                                                '--temperature', arg16,
+                                                                                '--top_k', arg17,
+                                                                                '--top_p', arg18,
+                                                                                '--repetition_penalty', arg19,
+                                                                                '--num_beams', arg20,
+                                                                                '--do_sample', arg21],
+                                                                                stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+
+    file_jsonl_path='result_ragas.jsonl'
+
+    answer_relevancy_average_list = []
+    faithfulness_average_list = []
+    context_recall_average_list = []
+    context_precision_average_list = []
+
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            answer_relevancy_average=stu["answer_relevancy_average"]
+            faithfulness_average=stu["faithfulness_average"]
+            context_recall_average=stu["context_recall_average"]
+            context_precision_average=stu["context_precision_average"]
+
+            answer_relevancy_average_list.append(answer_relevancy_average)
+            faithfulness_average_list.append(faithfulness_average)
+            context_recall_average_list.append(context_recall_average)
+            context_precision_average_list.append(context_precision_average)
+
+    answer_relevancy_average_line_number_list = [i for i, v in enumerate(answer_relevancy_average_list) if v == max(answer_relevancy_average_list)]
+    faithfulness_average_line_number_list = [i for i, v in enumerate(faithfulness_average_list) if v == max(faithfulness_average_list)]
+    context_recall_average_line_number_list = [i for i, v in enumerate(context_recall_average_list) if v == max(context_recall_average_list)]
+    context_precision_average_line_number_list = [i for i, v in enumerate(context_precision_average_list) if v == max(context_precision_average_list)]
+
+    line=0
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            if line in answer_relevancy_average_line_number_list:
+                print('max_answer_relevancy_average',stu)
+            line+=1
+
+    line=0
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            if line in faithfulness_average_line_number_list:
+                print('max_faithfulness_average',stu)
+            line+=1
+
+    line=0
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            if line in context_recall_average_line_number_list:
+                print('max_context_recall_average',stu)
+            line+=1
+
+    line=0
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            if line in context_precision_average_line_number_list:
+                print('max_context_precision_average',stu)
+            line+=1
+
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py
new file mode 100644
index 00000000000..e734ac3f8a6
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py
@@ -0,0 +1,295 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from datasets import Dataset
+import os, shutil
+from ragas import evaluate   # pylint: disable=E0401
+from ragas.metrics import (    # pylint: disable=E0401
+    answer_relevancy,
+    faithfulness,
+    context_recall,
+    context_precision,
+)
+import pandas as pd
+import jsonlines
+import argparse
+from intel_extension_for_transformers.neural_chat import PipelineConfig
+from intel_extension_for_transformers.neural_chat import build_chatbot
+from intel_extension_for_transformers.neural_chat import plugins
+from intel_extension_for_transformers.neural_chat.config import GenerationConfig
+
+pd.set_option("display.max_rows", None)
+pd.set_option("display.max_columns", None)
+pd.set_option("display.width", None)
+pd.set_option("display.max_colwidth", 10)
+
+def load_set(file_jsonl_path, item):
+    list = []
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            passages=stu[item]
+            list.append(passages)
+    return list
+
+def ragas(answer_file, ground_truth_file):
+
+    question_list=load_set(answer_file, "question")
+    answer_list=load_set(answer_file, "answer")
+    contexts_list=load_set(ground_truth_file, "context")
+    ground_truth_list=load_set(ground_truth_file, "ground_truth")
+
+    data_samples = {
+        'question': question_list,
+        'answer': answer_list,
+        'contexts' : contexts_list,
+        'ground_truth': ground_truth_list
+    }
+
+    dataset = Dataset.from_dict(data_samples)
+
+    os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
+    score = evaluate(dataset,metrics=[answer_relevancy, faithfulness, context_recall, context_precision])
+
+    df=score.to_pandas()
+    answer_relevancy_average=df['answer_relevancy'][:].mean()
+    faithfulness_average=df['faithfulness'][:].mean()
+    context_recall_average=df['context_recall'][:].mean()
+    context_precision_average=df['context_precision'][:].mean()
+    return answer_relevancy_average, faithfulness_average, context_recall_average, context_precision_average
+
+
+def rag(text,
+        input_path,
+        vector_database="Chroma",
+        embedding_model="BAAI/bge-large-en-v1.5",
+        retrieval_type='default',
+        max_chuck_size=256,
+        search_type="similarity",
+        k=1,
+        fetch_k=5,
+        score_threshold=0.3,
+        polish=False,
+        top_n=1,
+        enable_rerank=False,
+        reranker_model="BAAI/bge-reranker-large",
+        llm_model='intel/neural-chat-7b-v3-1',
+        temperature=0.01,
+        top_k=1, 
+        top_p=0.1, 
+        repetition_penalty=1.0, 
+        num_beams=1, 
+        do_sample=True
+        ):
+    plugins.retrieval.enable=True
+    plugins.retrieval.args["input_path"]=input_path
+    plugins.retrieval.args["vector_database"]=vector_database
+    plugins.retrieval.args["embedding_model"]=embedding_model
+    plugins.retrieval.args["retrieval_type"]=retrieval_type
+    plugins.retrieval.args["max_chuck_size"]=max_chuck_size
+    plugins.retrieval.args["search_type"]=search_type
+    if search_type=="similarity":
+        plugins.retrieval.args["search_kwargs"]={"k":k}
+    elif search_type=="mmr":
+        plugins.retrieval.args["search_kwargs"]={"k":k, "fetch_k":fetch_k}
+    elif search_type=="similarity_score_threshold":
+        plugins.retrieval.args["search_kwargs"]={"k":k, "score_threshold":score_threshold}
+    plugins.retrieval.args["polish"]=polish
+    plugins.retrieval.args["top_n"]=top_n
+    plugins.retrieval.args["enable_rerank"]=enable_rerank
+    plugins.retrieval.args["reranker_model"]=reranker_model
+    config = PipelineConfig(plugins=plugins, model_name_or_path=llm_model)
+    chatbot = build_chatbot(config)
+    response = chatbot.predict(text, 
+                            config=GenerationConfig(temperature=temperature, 
+                                                    top_k=top_k, 
+                                                    top_p=top_p, 
+                                                    repetition_penalty=repetition_penalty, 
+                                                    num_beams=num_beams, 
+                                                    do_sample=do_sample))
+    return response
+
+def result_data(ground_truth_file, 
+                input_path,
+                vector_database="Chroma",
+                embedding_model="BAAI/bge-large-en-v1.5",
+                retrieval_type='default',
+                max_chuck_size=256,
+                search_type="similarity",
+                k=1,
+                fetch_k=5,
+                score_threshold=0.3,
+                polish=False,
+                top_n=1,
+                enable_rerank=False,
+                reranker_model="BAAI/bge-reranker-large",
+                llm_model='intel/neural-chat-7b-v3-1',
+                temperature=0.01,
+                top_k=1, 
+                top_p=0.1, 
+                repetition_penalty=1.0, 
+                num_beams=1, 
+                do_sample=True
+                ):
+    question_list = load_set(ground_truth_file, "question")
+    
+    result_answer_path='result_answer.jsonl'
+    if os.path.exists("result_answer.jsonl"):
+        os.remove("result_answer.jsonl")
+    
+    if os.path.exists("output"):
+        shutil.rmtree("output", ignore_errors=True)
+    for question in question_list:
+        response = rag(
+                        question,
+                        input_path,
+                        vector_database,
+                        embedding_model,
+                        retrieval_type,
+                        max_chuck_size,
+                        search_type,
+                        k,
+                        fetch_k,
+                        score_threshold,
+                        polish,
+                        top_n,
+                        enable_rerank,
+                        reranker_model,
+                        llm_model,
+                        temperature,
+                        top_k, 
+                        top_p, 
+                        repetition_penalty, 
+                        num_beams, 
+                        do_sample
+                     )
+        data = {
+                "question": question,
+                "answer": response,
+            }
+        with jsonlines.open(result_answer_path,"a") as file_json:
+                file_json.write(data)
+
+def main():
+    parser = argparse.ArgumentParser()
+    
+    parser.add_argument("--ground_truth_file", type=str)
+    parser.add_argument("--input_path", type=str)
+
+    parser.add_argument("--vector_database", type=str, default="Chroma")
+    parser.add_argument("--embedding_model", type=str, default="BAAI/bge-large-en-v1.5")
+    parser.add_argument("--llm_model", type=str)
+    parser.add_argument("--reranker_model", type=str, default="BAAI/bge-reranker-large")
+
+    parser.add_argument("--retrieval_type", type=str, default='default')
+    parser.add_argument("--polish", type=bool, default=False)
+    parser.add_argument("--search_type", type=str, default="similarity")
+    parser.add_argument("--k", type=int, default=1)
+    parser.add_argument("--fetch_k", type=int, default=5)
+    parser.add_argument("--score_threshold", type=float, default=0.3)
+    parser.add_argument("--top_n", type=int, default=1)
+    parser.add_argument("--enable_rerank", type=bool, default=False)
+
+    parser.add_argument("--max_chuck_size", type=int, default=256)
+    parser.add_argument("--temperature", type=float, default=0.01)
+    parser.add_argument("--top_k", type=int, default=1)
+    parser.add_argument("--top_p", type=float, default=0.1)
+    parser.add_argument("--repetition_penalty", type=float, default=1.0)
+    parser.add_argument("--num_beams", type=int, default=1) 
+    parser.add_argument("--do_sample", type=bool, default=True)
+
+    args = parser.parse_args()
+    
+    ground_truth_file = args.ground_truth_file
+    input_path = args.input_path
+    vector_database = args.vector_database
+    embedding_model = args.embedding_model
+    retrieval_type = args.retrieval_type
+    polish = args.polish
+    search_type = args.search_type
+    llm_model = args.llm_model
+    k = args.k
+    fetch_k = args.fetch_k
+    score_threshold = args.score_threshold
+    reranker_model = args.reranker_model
+    top_n = args.top_n
+    enable_rerank = args.enable_rerank
+
+    max_chuck_size = args.max_chuck_size
+    temperature = args.temperature
+    top_k = args.top_k
+    top_p = args.top_p
+    repetition_penalty = args.repetition_penalty
+    num_beams = args.num_beams
+    do_sample = args.do_sample
+    
+    result_data(ground_truth_file, 
+                input_path,
+                vector_database=vector_database,
+                embedding_model=embedding_model,
+                retrieval_type=retrieval_type,
+                max_chuck_size=max_chuck_size,
+                search_type=search_type,
+                k=k,
+                fetch_k=fetch_k,
+                score_threshold=score_threshold,
+                polish=polish,
+                top_n=top_n,
+                enable_rerank=enable_rerank,
+                reranker_model=reranker_model,
+                llm_model=llm_model,
+                temperature=temperature,
+                top_k=top_k, 
+                top_p=top_p, 
+                repetition_penalty=repetition_penalty, 
+                num_beams=num_beams, 
+                do_sample=do_sample)
+    
+    answer_file = 'result_answer.jsonl'
+    answer_relevancy_average, faithfulness_average, context_recall_average, context_precision_average=ragas(answer_file, ground_truth_file)
+
+    file_json_path='result_ragas.jsonl'
+
+    if answer_relevancy_average and faithfulness_average and context_recall_average and context_precision_average:
+        data = {
+                "ground_truth_file": args.ground_truth_file,
+                "input_path": args.input_path,
+                "vector_database": args.vector_database,
+                "embedding_model": args.embedding_model,
+                "retrieval_type": args.retrieval_type,
+                "polish": args.polish,
+                "search_type": args.search_type,
+                "llm_model": args.llm_model,
+                "k": args.k,
+                "fetch_k": args.fetch_k,
+                "score_threshold": args.score_threshold,
+                "reranker_model": args.reranker_model,
+                "top_n": args.top_n,
+                "enable_rerank": args.enable_rerank,
+                "max_chuck_size": args.max_chuck_size,
+                "temperature": args.temperature,
+                "top_k": args.top_k,
+                "top_p": args.top_p,
+                "repetition_penalty": args.repetition_penalty,
+                "num_beams": args.num_beams,
+                "do_sample": args.do_sample,
+                "answer_relevancy_average": answer_relevancy_average, 
+                "faithfulness_average": faithfulness_average, 
+                "context_recall_average": context_recall_average, 
+                "context_precision_average": context_precision_average,
+            }
+        with jsonlines.open(file_json_path,"a") as file_json:
+                file_json.write(data)
+
+if __name__ == '__main__':
+    main()

From 5026421735111a448ec78869d68f5d895040a70b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 26 Mar 2024 06:59:44 +0000
Subject: [PATCH 105/151] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../evaluation/framework/ragas_benchmark.py   | 20 +++++-
 .../framework/ragas_evaluation_benchmark.py   | 68 +++++++++----------
 2 files changed, 51 insertions(+), 37 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.py
index 71735a78168..db983251ba2 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import argparse
 import os
 import subprocess
@@ -71,8 +85,8 @@ def main():
                                                             print('--'*14 +'num_beams',arg20)
                                                             for arg21 in arg21_list:
                                                                 print('--'*15 +'do_sample',arg21)
-                                                                subprocess.run(['python', 
-                                                                                script_path, 
+                                                                subprocess.run(['python',
+                                                                                script_path,
                                                                                 '--ground_truth_file', arg1,
                                                                                 '--input_path', arg2,
                                                                                 '--vector_database', arg3,
@@ -150,4 +164,4 @@ def main():
 
 
 if __name__ == '__main__':
-    main()
\ No newline at end of file
+    main()
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py
index e734ac3f8a6..6770de7749d 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py
@@ -85,10 +85,10 @@ def rag(text,
         reranker_model="BAAI/bge-reranker-large",
         llm_model='intel/neural-chat-7b-v3-1',
         temperature=0.01,
-        top_k=1, 
-        top_p=0.1, 
-        repetition_penalty=1.0, 
-        num_beams=1, 
+        top_k=1,
+        top_p=0.1,
+        repetition_penalty=1.0,
+        num_beams=1,
         do_sample=True
         ):
     plugins.retrieval.enable=True
@@ -110,16 +110,16 @@ def rag(text,
     plugins.retrieval.args["reranker_model"]=reranker_model
     config = PipelineConfig(plugins=plugins, model_name_or_path=llm_model)
     chatbot = build_chatbot(config)
-    response = chatbot.predict(text, 
-                            config=GenerationConfig(temperature=temperature, 
-                                                    top_k=top_k, 
-                                                    top_p=top_p, 
-                                                    repetition_penalty=repetition_penalty, 
-                                                    num_beams=num_beams, 
+    response = chatbot.predict(text,
+                            config=GenerationConfig(temperature=temperature,
+                                                    top_k=top_k,
+                                                    top_p=top_p,
+                                                    repetition_penalty=repetition_penalty,
+                                                    num_beams=num_beams,
                                                     do_sample=do_sample))
     return response
 
-def result_data(ground_truth_file, 
+def result_data(ground_truth_file,
                 input_path,
                 vector_database="Chroma",
                 embedding_model="BAAI/bge-large-en-v1.5",
@@ -135,18 +135,18 @@ def result_data(ground_truth_file,
                 reranker_model="BAAI/bge-reranker-large",
                 llm_model='intel/neural-chat-7b-v3-1',
                 temperature=0.01,
-                top_k=1, 
-                top_p=0.1, 
-                repetition_penalty=1.0, 
-                num_beams=1, 
+                top_k=1,
+                top_p=0.1,
+                repetition_penalty=1.0,
+                num_beams=1,
                 do_sample=True
                 ):
     question_list = load_set(ground_truth_file, "question")
-    
+
     result_answer_path='result_answer.jsonl'
     if os.path.exists("result_answer.jsonl"):
         os.remove("result_answer.jsonl")
-    
+
     if os.path.exists("output"):
         shutil.rmtree("output", ignore_errors=True)
     for question in question_list:
@@ -167,10 +167,10 @@ def result_data(ground_truth_file,
                         reranker_model,
                         llm_model,
                         temperature,
-                        top_k, 
-                        top_p, 
-                        repetition_penalty, 
-                        num_beams, 
+                        top_k,
+                        top_p,
+                        repetition_penalty,
+                        num_beams,
                         do_sample
                      )
         data = {
@@ -182,7 +182,7 @@ def result_data(ground_truth_file,
 
 def main():
     parser = argparse.ArgumentParser()
-    
+
     parser.add_argument("--ground_truth_file", type=str)
     parser.add_argument("--input_path", type=str)
 
@@ -205,11 +205,11 @@ def main():
     parser.add_argument("--top_k", type=int, default=1)
     parser.add_argument("--top_p", type=float, default=0.1)
     parser.add_argument("--repetition_penalty", type=float, default=1.0)
-    parser.add_argument("--num_beams", type=int, default=1) 
+    parser.add_argument("--num_beams", type=int, default=1)
     parser.add_argument("--do_sample", type=bool, default=True)
 
     args = parser.parse_args()
-    
+
     ground_truth_file = args.ground_truth_file
     input_path = args.input_path
     vector_database = args.vector_database
@@ -232,8 +232,8 @@ def main():
     repetition_penalty = args.repetition_penalty
     num_beams = args.num_beams
     do_sample = args.do_sample
-    
-    result_data(ground_truth_file, 
+
+    result_data(ground_truth_file,
                 input_path,
                 vector_database=vector_database,
                 embedding_model=embedding_model,
@@ -249,12 +249,12 @@ def main():
                 reranker_model=reranker_model,
                 llm_model=llm_model,
                 temperature=temperature,
-                top_k=top_k, 
-                top_p=top_p, 
-                repetition_penalty=repetition_penalty, 
-                num_beams=num_beams, 
+                top_k=top_k,
+                top_p=top_p,
+                repetition_penalty=repetition_penalty,
+                num_beams=num_beams,
                 do_sample=do_sample)
-    
+
     answer_file = 'result_answer.jsonl'
     answer_relevancy_average, faithfulness_average, context_recall_average, context_precision_average=ragas(answer_file, ground_truth_file)
 
@@ -283,9 +283,9 @@ def main():
                 "repetition_penalty": args.repetition_penalty,
                 "num_beams": args.num_beams,
                 "do_sample": args.do_sample,
-                "answer_relevancy_average": answer_relevancy_average, 
-                "faithfulness_average": faithfulness_average, 
-                "context_recall_average": context_recall_average, 
+                "answer_relevancy_average": answer_relevancy_average,
+                "faithfulness_average": faithfulness_average,
+                "context_recall_average": context_recall_average,
                 "context_precision_average": context_precision_average,
             }
         with jsonlines.open(file_json_path,"a") as file_json:

From cfa7d9c83f29e8eb3cf1d9b36d396ba16d344522 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Tue, 26 Mar 2024 15:03:17 +0800
Subject: [PATCH 106/151] Update retrieval_benchmark.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tools/evaluation/retriever/retrieval_benchmark.py     | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.py
index ec06849872e..8ea0555fa1d 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.py
@@ -18,8 +18,8 @@
 import jsonlines
 
 def main():
-    if os.path.exists("result.jsonl"):
-        os.remove("result.jsonl")
+    if os.path.exists("result_retrieval.jsonl"):
+        os.remove("result_retrieval.jsonl")
     script_path = 'evaluate_retrieval_benchmark.py'
 
     parser = argparse.ArgumentParser()
@@ -36,8 +36,8 @@ def main():
     arg2 = args.query_file_jsonl_path
     arg3 = args.vector_database
     arg4 = args.embedding_model
-    arg5 = args.retrieval_type
-    arg6 = args.llm_model
+    arg5 = args.llm_model
+    arg6 = args.reranker_model
 
     arg7_list = ['default','child_parent','bm25']
     arg8_list = ['True','False']

From 8d1215e5823512b17d7b6ad3e4d4ad64de5ec1d3 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Tue, 26 Mar 2024 15:04:11 +0800
Subject: [PATCH 107/151] Update evaluate_retrieval_benchmark.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tools/evaluation/retriever/evaluate_retrieval_benchmark.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_benchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_benchmark.py
index fccd03a576b..d80a0afc5db 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_benchmark.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_benchmark.py
@@ -348,7 +348,7 @@ def main():
     ground_truths=load_list(query_file_jsonl_path, "pos")
     MRR, Hit = evaluate(retrieval_results, ground_truths)
 
-    file_json_path='result.jsonl'
+    file_json_path='result_retrieval.jsonl'
 
     if MRR and Hit:
         data = {

From 3458a8e210a6445d98172e7b5de58ff471aa96d1 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Tue, 26 Mar 2024 15:05:15 +0800
Subject: [PATCH 108/151] Update retrieval_benchmark.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tools/evaluation/retriever/retrieval_benchmark.py           | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.py
index 8ea0555fa1d..24083bb933f 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.py
@@ -82,7 +82,7 @@ def main():
                                                     '--enable_rerank', arg14],
                                                     stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
 
-    file_jsonl_path='result.jsonl'
+    file_jsonl_path='result_retrieval.jsonl'
 
     MRR_list = []
     Hit_list = []

From 4effd375b51adff2110cbd51f827b7539878d094 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Tue, 26 Mar 2024 15:31:21 +0800
Subject: [PATCH 109/151] Update ragas_evaluation_benchmark.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tools/evaluation/framework/ragas_evaluation_benchmark.py   | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py
index 6770de7749d..7e6d4b0c676 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py
@@ -256,7 +256,8 @@ def main():
                 do_sample=do_sample)
 
     answer_file = 'result_answer.jsonl'
-    answer_relevancy_average, faithfulness_average, context_recall_average, context_precision_average=ragas(answer_file, ground_truth_file)
+    answer_relevancy_average,faithfulness_average,context_recall_average,context_precision_average=ragas(answer_file,
+                                                                                                         ground_truth_file)
 
     file_json_path='result_ragas.jsonl'
 

From 3c38ae68f78af790bb40391d738d9a9c0223b7a6 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Tue, 26 Mar 2024 15:34:43 +0800
Subject: [PATCH 110/151] Update ragas_benchmark.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../evaluation/framework/ragas_benchmark.py   | 45 ++++++++++---------
 1 file changed, 25 insertions(+), 20 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.py
index db983251ba2..c2eb5dbe4cd 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.py
@@ -1,17 +1,17 @@
-# Copyright (c) 2024 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import argparse
 import os
 import subprocess
@@ -108,7 +108,8 @@ def main():
                                                                                 '--repetition_penalty', arg19,
                                                                                 '--num_beams', arg20,
                                                                                 '--do_sample', arg21],
-                                                                                stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+                                                                                stdout=subprocess.DEVNULL, 
+                                                                                stderr=subprocess.DEVNULL)
 
     file_jsonl_path='result_ragas.jsonl'
 
@@ -129,10 +130,14 @@ def main():
             context_recall_average_list.append(context_recall_average)
             context_precision_average_list.append(context_precision_average)
 
-    answer_relevancy_average_line_number_list = [i for i, v in enumerate(answer_relevancy_average_list) if v == max(answer_relevancy_average_list)]
-    faithfulness_average_line_number_list = [i for i, v in enumerate(faithfulness_average_list) if v == max(faithfulness_average_list)]
-    context_recall_average_line_number_list = [i for i, v in enumerate(context_recall_average_list) if v == max(context_recall_average_list)]
-    context_precision_average_line_number_list = [i for i, v in enumerate(context_precision_average_list) if v == max(context_precision_average_list)]
+    answer_relevancy_average_line_number_list = [i for i, v in enumerate(answer_relevancy_average_list) \
+                                                 if v == max(answer_relevancy_average_list)]
+    faithfulness_average_line_number_list = [i for i, v in enumerate(faithfulness_average_list) \
+                                             if v == max(faithfulness_average_list)]
+    context_recall_average_line_number_list = [i for i, v in enumerate(context_recall_average_list) \
+                                               if v == max(context_recall_average_list)]
+    context_precision_average_line_number_list = [i for i, v in enumerate(context_precision_average_list) \
+                                                  if v == max(context_precision_average_list)]
 
     line=0
     with open(file_jsonl_path) as file:
@@ -164,4 +169,4 @@ def main():
 
 
 if __name__ == '__main__':
-    main()
+    main()

From b02da074096a8e433391a1b17e435f29e065126b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 26 Mar 2024 07:35:05 +0000
Subject: [PATCH 111/151] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../neural_chat/tools/evaluation/framework/ragas_benchmark.py   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.py
index c2eb5dbe4cd..797e6f80845 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.py
@@ -108,7 +108,7 @@ def main():
                                                                                 '--repetition_penalty', arg19,
                                                                                 '--num_beams', arg20,
                                                                                 '--do_sample', arg21],
-                                                                                stdout=subprocess.DEVNULL, 
+                                                                                stdout=subprocess.DEVNULL,
                                                                                 stderr=subprocess.DEVNULL)
 
     file_jsonl_path='result_ragas.jsonl'

From a2a7de1ec404f34e9e355759546958a52572558c Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Tue, 26 Mar 2024 16:32:18 +0800
Subject: [PATCH 112/151] Update ragas_evaluation_benchmark.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tools/evaluation/framework/ragas_evaluation_benchmark.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py
index 7e6d4b0c676..696d2dd8558 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py
@@ -256,8 +256,9 @@ def main():
                 do_sample=do_sample)
 
     answer_file = 'result_answer.jsonl'
-    answer_relevancy_average,faithfulness_average,context_recall_average,context_precision_average=ragas(answer_file,
-                                                                                                         ground_truth_file)
+    answer_relevancy_average,faithfulness_average,context_recall_average,context_precision_average=ragas(
+        answer_file, 
+        ground_truth_file)
 
     file_json_path='result_ragas.jsonl'
 

From 4191f4b8934499b76536eb4ac0e7e3d1882e8451 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 26 Mar 2024 08:32:52 +0000
Subject: [PATCH 113/151] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../tools/evaluation/framework/ragas_evaluation_benchmark.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py
index 696d2dd8558..2365535052a 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py
@@ -257,7 +257,7 @@ def main():
 
     answer_file = 'result_answer.jsonl'
     answer_relevancy_average,faithfulness_average,context_recall_average,context_precision_average=ragas(
-        answer_file, 
+        answer_file,
         ground_truth_file)
 
     file_json_path='result_ragas.jsonl'

From 35b2d7dfe9ba3c4721d70d04250086c4a3f58010 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Wed, 27 Mar 2024 10:27:55 +0800
Subject: [PATCH 114/151] Update evaluate_retrieval_benchmark.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tools/evaluation/retriever/evaluate_retrieval_benchmark.py   | 1 +
 1 file changed, 1 insertion(+)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_benchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_benchmark.py
index d80a0afc5db..26c22b2a5db 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_benchmark.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_benchmark.py
@@ -369,6 +369,7 @@ def main():
                 "MRR": MRR,
                 "Hit": Hit,
             }
+        print(data)
         with jsonlines.open(file_json_path,"a") as file_json:
                 file_json.write(data)
 

From 56037b925ff6836fe53abf7d08cf2c8022ed5d24 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Wed, 27 Mar 2024 10:28:21 +0800
Subject: [PATCH 115/151] Update ragas_evaluation_benchmark.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tools/evaluation/framework/ragas_evaluation_benchmark.py     | 1 +
 1 file changed, 1 insertion(+)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py
index 2365535052a..003271502a9 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py
@@ -290,6 +290,7 @@ def main():
                 "context_recall_average": context_recall_average,
                 "context_precision_average": context_precision_average,
             }
+        print(data)
         with jsonlines.open(file_json_path,"a") as file_json:
                 file_json.write(data)
 

From de44f0db2796cfd17083b893ab18ce62cae2139d Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Wed, 27 Mar 2024 10:29:07 +0800
Subject: [PATCH 116/151] add retrieval_benchmark.sh

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../retriever/retrieval_benchmark.sh          | 93 +++++++++++++++++++
 1 file changed, 93 insertions(+)
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.sh

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.sh b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.sh
new file mode 100644
index 00000000000..162237bc26c
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.sh
@@ -0,0 +1,93 @@
+set -x
+
+function main {
+
+  init_params "$@"
+  run_benchmark
+
+}
+
+# init params
+function init_params {
+  retrieval_type='default'
+  polish=False
+  search_type="similarity"
+  k=1
+  fetch_k=5
+  score_threshold=0.3
+  top_n=1
+  enable_rerank=False
+
+  for var in "$@"
+  do
+    case $var in
+     --index_file_jsonl_path=*)
+          index_file_jsonl_path=$(echo $var |cut -f2 -d=)
+      ;;
+      --query_file_jsonl_path=*)
+          query_file_jsonl_path=$(echo $var |cut -f2 -d=)
+      ;;
+      --vector_database=*)
+          vector_database=$(echo $var |cut -f2 -d=)
+      ;;
+      --embedding_model=*)
+          embedding_model=$(echo $var |cut -f2 -d=)
+      ;;
+      --llm_model=*)
+          llm_model=$(echo $var |cut -f2 -d=)
+      ;;
+      --reranker_model=*)
+          reranker_model=$(echo ${var} |cut -f2 -d=)
+      ;;
+      --retrieval_type=*)
+          retrieval_type=$(echo $var |cut -f2 -d=)
+      ;;
+      --polish=*)
+          polish=$(echo $var |cut -f2 -d=)
+      ;;
+      --search_type=*)
+          search_type=$(echo $var |cut -f2 -d=)
+      ;;
+      --k=*)
+          k=$(echo $var |cut -f2 -d=)
+      ;;
+      --fetch_k=*)
+          fetch_k=$(echo $var |cut -f2 -d=)
+      ;;
+      --score_threshold=*)
+          score_threshold=$(echo ${var} |cut -f2 -d=)
+      ;;
+      --top_n=*)
+          top_n=$(echo ${var} |cut -f2 -d=)
+      ;;
+      --enable_rerank=*)
+          enable_rerank=$(echo $var |cut -f2 -d=)
+      ;;
+    esac
+  done
+
+}
+
+
+# run_benchmark
+function run_benchmark {
+
+    python -u ./evaluate_retrieval_benchmark.py \
+        --index_file_jsonl_path ${index_file_jsonl_path} \
+        --query_file_jsonl_path ${query_file_jsonl_path} \
+        --vector_database ${vector_database} \
+        --embedding_model ${embedding_model} \
+        --llm_model ${llm_model} \
+        --reranker_model ${reranker_model} \
+        --retrieval_type ${retrieval_type} \
+        --polish ${polish} \
+        --search_type ${search_type} \
+        --k ${k} \
+        --fetch_k ${fetch_k} \
+        --score_threshold ${score_threshold} \
+        --top_n ${top_n} \
+        --enable_rerank ${enable_rerank} 
+
+}
+
+main "$@"
\ No newline at end of file

From 67456e42f6eb6e9b39ccac7bf896d15402edccd2 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 27 Mar 2024 02:29:28 +0000
Subject: [PATCH 117/151] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../evaluation/retriever/retrieval_benchmark.sh    | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.sh b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.sh
index 162237bc26c..40365ad1825 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.sh
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.sh
@@ -1,3 +1,17 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 set -x
 
 function main {

From 2a913360410ba48d601de5df375865744325cfa0 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Wed, 27 Mar 2024 10:45:08 +0800
Subject: [PATCH 118/151] add ragas_benchmark.sh

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../evaluation/framework/ragas_benchmark.sh   | 128 ++++++++++++++++++
 1 file changed, 128 insertions(+)
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.sh

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.sh b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.sh
new file mode 100644
index 00000000000..0ff6e7c0b41
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.sh
@@ -0,0 +1,128 @@
+set -x
+
+function main {
+
+  init_params "$@"
+  run_benchmark
+
+}
+
+# init params
+function init_params {
+  retrieval_type='default'
+  polish=False
+  search_type="similarity"
+  k=1
+  fetch_k=5
+  score_threshold=0.3
+  top_n=1
+  enable_rerank=False
+  max_chuck_size=256
+  temperature=0.01
+  top_k=1
+  top_p=0.1
+  repetition_penalty=1.0
+  num_beams=1
+  do_sample=True
+
+  for var in "$@"
+  do
+    case $var in
+     --ground_truth_file=*)
+          ground_truth_file=$(echo $var |cut -f2 -d=)
+      ;;
+      --input_path=*)
+          input_path=$(echo $var |cut -f2 -d=)
+      ;;
+      --vector_database=*)
+          vector_database=$(echo $var |cut -f2 -d=)
+      ;;
+      --embedding_model=*)
+          embedding_model=$(echo $var |cut -f2 -d=)
+      ;;
+      --llm_model=*)
+          llm_model=$(echo $var |cut -f2 -d=)
+      ;;
+      --reranker_model=*)
+          reranker_model=$(echo ${var} |cut -f2 -d=)
+      ;;
+      --retrieval_type=*)
+          retrieval_type=$(echo $var |cut -f2 -d=)
+      ;;
+      --polish=*)
+          polish=$(echo $var |cut -f2 -d=)
+      ;;
+      --search_type=*)
+          search_type=$(echo $var |cut -f2 -d=)
+      ;;
+      --k=*)
+          k=$(echo $var |cut -f2 -d=)
+      ;;
+      --fetch_k=*)
+          fetch_k=$(echo $var |cut -f2 -d=)
+      ;;
+      --score_threshold=*)
+          score_threshold=$(echo ${var} |cut -f2 -d=)
+      ;;
+      --top_n=*)
+          top_n=$(echo ${var} |cut -f2 -d=)
+      ;;
+      --enable_rerank=*)
+          enable_rerank=$(echo $var |cut -f2 -d=)
+      ;;
+      --max_chuck_size=*)
+          max_chuck_size=$(echo $var |cut -f2 -d=)
+      ;;
+      --temperature=*)
+          temperature=$(echo $var |cut -f2 -d=)
+      ;;
+      --top_k=*)
+          top_k=$(echo $var |cut -f2 -d=)
+      ;;
+      --top_p=*)
+          top_p=$(echo $var |cut -f2 -d=)
+      ;;
+      --repetition_penalty=*)
+          repetition_penalty=$(echo ${var} |cut -f2 -d=)
+      ;;
+      --num_beams=*)
+          num_beams=$(echo ${var} |cut -f2 -d=)
+      ;;
+      --do_sample=*)
+          do_sample=$(echo $var |cut -f2 -d=)
+      ;;
+    esac
+  done
+
+}
+
+
+
+# run_benchmark
+function run_benchmark {
+
+    python -u ./ragas_evaluation_benchmark.py \
+        --ground_truth_file ${ground_truth_file} \
+        --input_path ${input_path} \
+        --vector_database ${vector_database} \
+        --embedding_model ${embedding_model} \
+        --llm_model ${llm_model} \
+        --reranker_model ${reranker_model} \
+        --retrieval_type ${retrieval_type} \
+        --polish ${polish} \
+        --search_type ${search_type} \
+        --k ${k} \
+        --fetch_k ${fetch_k} \
+        --score_threshold ${score_threshold} \
+        --top_n ${top_n} \
+        --enable_rerank ${enable_rerank} 
+        --max_chuck_size ${max_chuck_size} \
+        --temperature ${temperature} \
+        --top_k ${top_k} \
+        --top_p ${top_p} \
+        --repetition_penalty ${repetition_penalty} \
+        --num_beams ${num_beams} \
+        --do_sample ${do_sample} 
+}
+
+main "$@"
\ No newline at end of file

From 8f05a34cf663953fae312458b3b6f6fff1bda867 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 27 Mar 2024 02:45:29 +0000
Subject: [PATCH 119/151] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../tools/evaluation/framework/ragas_benchmark.sh  | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.sh b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.sh
index 0ff6e7c0b41..f2038b1bf17 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.sh
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.sh
@@ -1,3 +1,17 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 set -x
 
 function main {

From c64ca3c24be671d23278752f37c5ebd49b3594d1 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Wed, 27 Mar 2024 10:51:02 +0800
Subject: [PATCH 120/151] add data.txt

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tools/evaluation/data_augmentation/data.txt        | 10 ++++++++++
 1 file changed, 10 insertions(+)
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/data.txt

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/data.txt b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/data.txt
new file mode 100644
index 00000000000..46f0e982af6
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/data.txt
@@ -0,0 +1,10 @@
+We aim to deliver open software and hardware platforms with industry-defining standards.
+Our world-class talent is at the heart of everything we do. Together we strive to have a positive effect on business, society, and the planet.
+We invest in public and private companies and do not always realize a return on our investments.
+The past several years demonstrated just how much technology is increasingly central to every aspect of our lives, all of which depends on semiconductors.
+With our focus on delivering leadership products, open and secure platforms and resilient manufacturing, Intel has the right strategy in place to enable this global digitalization and fuel customer success.
+The COVID-19 pandemic could materially adversely affect our financial condition and results of operations.
+Theft, loss, or misuse of personal data about our employees, customers, or other third parties could increase our expenses, damage our reputation, or result in legal or regulatory proceedings.
+We rely on access to third-party IP, which may not be available to us on commercially reasonable terms or at all.
+We receive a significant portion of our revenue from a limited number of customers.
+Intel plans to regain transistor performance and power performance leadership by 2025, and we remain on track to deliver on our goal of five manufacturing technology nodes in four years.
\ No newline at end of file

From fbef1f6f22428b16826b3ac2496ea75bce603933 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Wed, 27 Mar 2024 10:54:23 +0800
Subject: [PATCH 121/151] Update ragas_benchmark.sh

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tools/evaluation/framework/ragas_benchmark.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.sh b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.sh
index f2038b1bf17..ec40501fadf 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.sh
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.sh
@@ -129,7 +129,7 @@ function run_benchmark {
         --fetch_k ${fetch_k} \
         --score_threshold ${score_threshold} \
         --top_n ${top_n} \
-        --enable_rerank ${enable_rerank} 
+        --enable_rerank ${enable_rerank} \
         --max_chuck_size ${max_chuck_size} \
         --temperature ${temperature} \
         --top_k ${top_k} \
@@ -139,4 +139,4 @@ function run_benchmark {
         --do_sample ${do_sample} 
 }
 
-main "$@"
\ No newline at end of file
+main "$@"

From f50aeb40321306db822c261718236ca8ba0ff936 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Thu, 28 Mar 2024 09:53:47 +0800
Subject: [PATCH 122/151] Update ragas_evaluation_benchmark.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../framework/ragas_evaluation_benchmark.py   | 56 ++++++++++++++++---
 1 file changed, 49 insertions(+), 7 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py
index 003271502a9..82cb6dabce7 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py
@@ -21,6 +21,12 @@
     context_recall,
     context_precision,
 )
+from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
+from intel_extension_for_transformers.langchain_community.embeddings import HuggingFaceEmbeddings, \
+    HuggingFaceInstructEmbeddings, HuggingFaceBgeEmbeddings  # pylint: disable=E0401, E0611
+from langchain_community.embeddings import GooglePalmEmbeddings
+from ragas.llms import LangchainLLMWrapper   # pylint: disable=E0611
+from ragas.embeddings import LangchainEmbeddingsWrapper   # pylint: disable=E0611
 import pandas as pd
 import jsonlines
 import argparse
@@ -42,7 +48,7 @@ def load_set(file_jsonl_path, item):
             list.append(passages)
     return list
 
-def ragas(answer_file, ground_truth_file):
+def ragas(answer_file, ground_truth_file, llm_model, embedding_model, use_openai_key):
 
     question_list=load_set(answer_file, "question")
     answer_list=load_set(answer_file, "answer")
@@ -58,8 +64,36 @@ def ragas(answer_file, ground_truth_file):
 
     dataset = Dataset.from_dict(data_samples)
 
-    os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
-    score = evaluate(dataset,metrics=[answer_relevancy, faithfulness, context_recall, context_precision])
+    if use_openai_key:
+        os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
+        score = evaluate(dataset,metrics=[answer_relevancy, faithfulness, context_recall, context_precision])
+    else:
+        langchain_llm = HuggingFacePipeline.from_model_id(
+            model_id=llm_model,
+            task="text-generation",
+            pipeline_kwargs={"max_new_tokens": 128},
+        )
+        if "instruct" in embedding_model:
+            langchain_embeddings = HuggingFaceInstructEmbeddings(model_name=embedding_model)
+        elif "bge" in embedding_model:
+            langchain_embeddings = HuggingFaceBgeEmbeddings(
+                model_name=embedding_model,
+                encode_kwargs={'normalize_embeddings': True},
+                query_instruction="Represent this sentence for searching relevant passages:")
+        elif "Google" == embedding_model:
+            langchain_embeddings = GooglePalmEmbeddings()
+        else:
+            langchain_embeddings = HuggingFaceEmbeddings(
+                model_name=embedding_model,
+                encode_kwargs={"normalize_embeddings": True},
+            )
+
+        langchain_llm = LangchainLLMWrapper(langchain_llm)
+        langchain_embedding = LangchainEmbeddingsWrapper(langchain_embeddings)
+        score = evaluate(dataset,    # pylint: disable=E1123
+                         metrics=[answer_relevancy, faithfulness, context_recall, context_precision],
+                         llm = langchain_llm,    # pylint: disable=E1123
+                         embeddings=langchain_embedding)    # pylint: disable=E1123
 
     df=score.to_pandas()
     answer_relevancy_average=df['answer_relevancy'][:].mean()
@@ -185,6 +219,7 @@ def main():
 
     parser.add_argument("--ground_truth_file", type=str)
     parser.add_argument("--input_path", type=str)
+    parser.add_argument("--use_openai_key", default=False, action='store_true')
 
     parser.add_argument("--vector_database", type=str, default="Chroma")
     parser.add_argument("--embedding_model", type=str, default="BAAI/bge-large-en-v1.5")
@@ -192,13 +227,13 @@ def main():
     parser.add_argument("--reranker_model", type=str, default="BAAI/bge-reranker-large")
 
     parser.add_argument("--retrieval_type", type=str, default='default')
-    parser.add_argument("--polish", type=bool, default=False)
+    parser.add_argument("--polish", default=False, action='store_true')
     parser.add_argument("--search_type", type=str, default="similarity")
     parser.add_argument("--k", type=int, default=1)
     parser.add_argument("--fetch_k", type=int, default=5)
     parser.add_argument("--score_threshold", type=float, default=0.3)
     parser.add_argument("--top_n", type=int, default=1)
-    parser.add_argument("--enable_rerank", type=bool, default=False)
+    parser.add_argument("--enable_rerank", default=False, action='store_true')
 
     parser.add_argument("--max_chuck_size", type=int, default=256)
     parser.add_argument("--temperature", type=float, default=0.01)
@@ -206,12 +241,14 @@ def main():
     parser.add_argument("--top_p", type=float, default=0.1)
     parser.add_argument("--repetition_penalty", type=float, default=1.0)
     parser.add_argument("--num_beams", type=int, default=1)
-    parser.add_argument("--do_sample", type=bool, default=True)
+    parser.add_argument("--do_sample", default=False, action='store_true')
 
     args = parser.parse_args()
 
     ground_truth_file = args.ground_truth_file
     input_path = args.input_path
+    use_openai_key = args.use_openai_key
+    
     vector_database = args.vector_database
     embedding_model = args.embedding_model
     retrieval_type = args.retrieval_type
@@ -233,6 +270,7 @@ def main():
     num_beams = args.num_beams
     do_sample = args.do_sample
 
+
     result_data(ground_truth_file,
                 input_path,
                 vector_database=vector_database,
@@ -258,7 +296,10 @@ def main():
     answer_file = 'result_answer.jsonl'
     answer_relevancy_average,faithfulness_average,context_recall_average,context_precision_average=ragas(
         answer_file,
-        ground_truth_file)
+        ground_truth_file,
+        llm_model, 
+        embedding_model,
+        use_openai_key)
 
     file_json_path='result_ragas.jsonl'
 
@@ -266,6 +307,7 @@ def main():
         data = {
                 "ground_truth_file": args.ground_truth_file,
                 "input_path": args.input_path,
+                "use_openai_key": args.use_openai_key,
                 "vector_database": args.vector_database,
                 "embedding_model": args.embedding_model,
                 "retrieval_type": args.retrieval_type,

From 84aea7c8008be37d65c36222c64889f0be5988a0 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Thu, 28 Mar 2024 09:54:11 +0800
Subject: [PATCH 123/151] Update ragas_benchmark.sh

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../evaluation/framework/ragas_benchmark.sh   | 48 +++++++++++--------
 1 file changed, 28 insertions(+), 20 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.sh b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.sh
index ec40501fadf..6d3e49c6b6a 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.sh
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.sh
@@ -1,17 +1,3 @@
-# Copyright (c) 2024 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
 set -x
 
 function main {
@@ -24,20 +10,17 @@ function main {
 # init params
 function init_params {
   retrieval_type='default'
-  polish=False
   search_type="similarity"
   k=1
   fetch_k=5
   score_threshold=0.3
   top_n=1
-  enable_rerank=False
   max_chuck_size=256
   temperature=0.01
   top_k=1
   top_p=0.1
   repetition_penalty=1.0
   num_beams=1
-  do_sample=True
 
   for var in "$@"
   do
@@ -48,6 +31,9 @@ function init_params {
       --input_path=*)
           input_path=$(echo $var |cut -f2 -d=)
       ;;
+      --use_openai_key=*)
+          use_openai_key=$(echo $var |cut -f2 -d=)
+      ;;
       --vector_database=*)
           vector_database=$(echo $var |cut -f2 -d=)
       ;;
@@ -115,28 +101,50 @@ function init_params {
 # run_benchmark
 function run_benchmark {
 
+    if [[ ${use_openai_key} == True ]]; then
+        use_openai_key="--use_openai_key"
+    else
+        use_openai_key=""
+    fi
+    if [[ ${polish} == True ]]; then
+        polish="--polish"
+    else
+        polish=""
+    fi
+    if [[ ${enable_rerank} == True ]]; then
+        enable_rerank="--enable_rerank"
+    else
+         enable_rerank=""
+    fi
+    if [[ ${do_sample} == True ]]; then
+        do_sample="--do_sample"
+    else
+        do_sample=""
+    fi
+
     python -u ./ragas_evaluation_benchmark.py \
         --ground_truth_file ${ground_truth_file} \
         --input_path ${input_path} \
+        ${use_openai_key} \
         --vector_database ${vector_database} \
         --embedding_model ${embedding_model} \
         --llm_model ${llm_model} \
         --reranker_model ${reranker_model} \
         --retrieval_type ${retrieval_type} \
-        --polish ${polish} \
+        ${polish} \
         --search_type ${search_type} \
         --k ${k} \
         --fetch_k ${fetch_k} \
         --score_threshold ${score_threshold} \
         --top_n ${top_n} \
-        --enable_rerank ${enable_rerank} \
+        ${enable_rerank} \
         --max_chuck_size ${max_chuck_size} \
         --temperature ${temperature} \
         --top_k ${top_k} \
         --top_p ${top_p} \
         --repetition_penalty ${repetition_penalty} \
         --num_beams ${num_beams} \
-        --do_sample ${do_sample} 
+        ${do_sample} 
 }
 
 main "$@"

From ad1814ab68973a2a29b89780a6b6ee175ab6b5ea Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 28 Mar 2024 01:54:37 +0000
Subject: [PATCH 124/151] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../tools/evaluation/framework/ragas_benchmark.sh  | 14 ++++++++++++++
 .../framework/ragas_evaluation_benchmark.py        |  4 ++--
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.sh b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.sh
index 6d3e49c6b6a..a717f23cfdf 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.sh
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.sh
@@ -1,3 +1,17 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 set -x
 
 function main {
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py
index 82cb6dabce7..7abf65bc6c9 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py
@@ -248,7 +248,7 @@ def main():
     ground_truth_file = args.ground_truth_file
     input_path = args.input_path
     use_openai_key = args.use_openai_key
-    
+
     vector_database = args.vector_database
     embedding_model = args.embedding_model
     retrieval_type = args.retrieval_type
@@ -297,7 +297,7 @@ def main():
     answer_relevancy_average,faithfulness_average,context_recall_average,context_precision_average=ragas(
         answer_file,
         ground_truth_file,
-        llm_model, 
+        llm_model,
         embedding_model,
         use_openai_key)
 

From 932562d665f49ae093e0e9fb584bf4876383b7a8 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Thu, 28 Mar 2024 09:54:50 +0800
Subject: [PATCH 125/151] Update and rename ragas_benchmark.py to
 ragas_superbenchmark.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 ...s_benchmark.py => ragas_superbenchmark.py} | 151 +++++++++---------
 1 file changed, 77 insertions(+), 74 deletions(-)
 rename intel_extension_for_transformers/neural_chat/tools/evaluation/framework/{ragas_benchmark.py => ragas_superbenchmark.py} (66%)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_superbenchmark.py
similarity index 66%
rename from intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.py
rename to intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_superbenchmark.py
index 797e6f80845..ba63c8bbb5d 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_benchmark.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_superbenchmark.py
@@ -20,11 +20,12 @@
 def main():
     if os.path.exists("result_ragas.jsonl"):
         os.remove("result_ragas.jsonl")
-    script_path = 'ragas_evaluation_benchmark.py'
+    script_path = 'ragas_benchmark.sh'
 
     parser = argparse.ArgumentParser()
     parser.add_argument("--ground_truth_file", type=str)
     parser.add_argument("--input_path", type=str)
+    parser.add_argument("--use_openai_key", default=False, action='store_true')
     parser.add_argument("--vector_database", type=str, default="Chroma")
     parser.add_argument("--embedding_model", type=str, default="BAAI/bge-large-en-v1.5")
     parser.add_argument("--llm_model", type=str)
@@ -34,80 +35,82 @@ def main():
 
     arg1 = args.ground_truth_file
     arg2 = args.input_path
-    arg3 = args.vector_database
-    arg4 = args.embedding_model
-    arg5 = args.llm_model
-    arg6 = args.reranker_model
-
-    arg7_list = ['default','child_parent','bm25']
-    arg8_list = ['True','False']
-    arg9_list = ['similarity','mmr','similarity_score_threshold']
-    arg10_list = ['1', '3', '5']
-    arg11_list = ['5', '10', '20']
-    arg12_list = ['0.3','0.5','0.7']
-    arg13_list = ['1','3', '5','10']
-    arg14_list = ['True','False']
-    arg15_list = ['256','512', '768','1024']
-    arg16_list = ['0.01','0.05', '0.1','0.3','0.5','0.7']
-    arg17_list = ['1','3', '10','20']
-    arg18_list = ['0.1','0.3', '0.5','0.7']
-    arg19_list = ['1.0','1.1', '1.3','1.5','1.7']
-    arg20_list = ['1','3', '10','20']
-    arg21_list = ['True','False']
-
-    for arg7 in arg7_list:
-        print('--'*1 +'retrieval_type',arg7)
-        for arg8 in arg8_list:
-            print('--'*2 +'polish',arg8)
-            for arg9 in arg9_list:
-                print('--'*3 +'search_type',arg9)
-                for arg10 in arg10_list:
-                    print('--'*4 +'k',arg10)
-                    for arg11 in arg11_list:
-                        print('--'*5 +'fetch_k',arg11)
-                        for arg12 in arg12_list:
-                            print('--'*6 +'score_threshold',arg12)
-                            for arg13 in arg13_list:
-                                print('--'*7 +'top_n',arg13)
-                                for arg14 in arg14_list:
-                                    print('--'*8 +'enable_rerank',arg14)
-                                    for arg15 in arg15_list:
-                                        print('--'*9 +'max_chuck_size',arg15)
-                                        for arg16 in arg16_list:
-                                            print('--'*10 +'temperature',arg16)
-                                            for arg17 in arg17_list:
-                                                print('--'*11 +'top_k',arg17)
-                                                for arg18 in arg18_list:
-                                                    print('--'*12 +'top_p',arg18)
-                                                    for arg19 in arg19_list:
-                                                        print('--'*13 +'repetition_penalty',arg19)
-                                                        for arg20 in arg20_list:
-                                                            print('--'*14 +'num_beams',arg20)
-                                                            for arg21 in arg21_list:
-                                                                print('--'*15 +'do_sample',arg21)
-                                                                subprocess.run(['python',
+    arg3 = str(args.use_openai_key)
+    arg4 = args.vector_database
+    arg5 = args.embedding_model
+    arg6 = args.llm_model
+    arg7 = args.reranker_model
+
+    arg8_list = ['default','child_parent','bm25']
+    arg9_list = ['True','False']
+    arg10_list = ['similarity','mmr','similarity_score_threshold']
+    arg11_list = ['1', '3', '5']
+    arg12_list = ['5', '10', '20']
+    arg13_list = ['0.3','0.5','0.7']
+    arg14_list = ['1','3', '5','10']
+    arg15_list = ['True','False']
+    arg16_list = ['256','512', '768','1024']
+    arg17_list = ['0.01','0.05', '0.1','0.3','0.5','0.7']
+    arg18_list = ['1','3', '10','20']
+    arg19_list = ['0.1','0.3', '0.5','0.7']
+    arg20_list = ['1.0','1.1', '1.3','1.5','1.7']
+    arg21_list = ['1','3', '10','20']
+    arg22_list = ['True','False']
+
+    for arg8 in arg8_list:
+        print('--'*1 +'retrieval_type',arg8)
+        for arg9 in arg9_list:
+            print('--'*2 +'polish',arg9)
+            for arg10 in arg10_list:
+                print('--'*3 +'search_type',arg10)
+                for arg11 in arg11_list:
+                    print('--'*4 +'k',arg11)
+                    for arg12 in arg12_list:
+                        print('--'*5 +'fetch_k',arg12)
+                        for arg13 in arg13_list:
+                            print('--'*6 +'score_threshold',arg13)
+                            for arg14 in arg14_list:
+                                print('--'*7 +'top_n',arg14)
+                                for arg15 in arg15_list:
+                                    print('--'*8 +'enable_rerank',arg15)
+                                    for arg16 in arg16_list:
+                                        print('--'*9 +'max_chuck_size',arg16)
+                                        for arg17 in arg17_list:
+                                            print('--'*10 +'temperature',arg17)
+                                            for arg18 in arg18_list:
+                                                print('--'*11 +'top_k',arg18)
+                                                for arg19 in arg19_list:
+                                                    print('--'*12 +'top_p',arg19)
+                                                    for arg20 in arg20_list:
+                                                        print('--'*13 +'repetition_penalty',arg20)
+                                                        for arg21 in arg21_list:
+                                                            print('--'*14 +'num_beams',arg21)
+                                                            for arg22 in arg22_list:
+                                                                print('--'*15 +'do_sample',arg22)
+                                                                subprocess.run(['bash',
                                                                                 script_path,
-                                                                                '--ground_truth_file', arg1,
-                                                                                '--input_path', arg2,
-                                                                                '--vector_database', arg3,
-                                                                                '--embedding_model', arg4,
-                                                                                '--llm_model', arg5,
-                                                                                '--reranker_model', arg6,
-                                                                                '--retrieval_type', arg7,
-                                                                                '--polish', arg8,
-                                                                                '--search_type', arg9,
-                                                                                '--k', arg10,
-                                                                                '--fetch_k', arg11,
-                                                                                '--score_threshold', arg12,
-                                                                                '--top_n', arg13,
-                                                                                '--enable_rerank', arg14,
-                                                                                '--max_chuck_size', arg15,
-                                                                                '--temperature', arg16,
-                                                                                '--top_k', arg17,
-                                                                                '--top_p', arg18,
-                                                                                '--repetition_penalty', arg19,
-                                                                                '--num_beams', arg20,
-                                                                                '--do_sample', arg21],
+                                                                                '--ground_truth_file='+arg1,
+                                                                                '--input_path='+arg2,
+                                                                                '--use_openai_key='+arg3,
+                                                                                '--vector_database='+arg4,
+                                                                                '--embedding_model='+arg5,
+                                                                                '--llm_model='+arg6,
+                                                                                '--reranker_model='+arg7,
+                                                                                '--retrieval_type='+arg8,
+                                                                                '--polish='+arg9,
+                                                                                '--search_type='+arg10,
+                                                                                '--k='+arg11,
+                                                                                '--fetch_k='+arg12,
+                                                                                '--score_threshold='+arg13,
+                                                                                '--top_n='+arg14,
+                                                                                '--enable_rerank='+arg15,
+                                                                                '--max_chuck_size='+arg16,
+                                                                                '--temperature='+arg17,
+                                                                                '--top_k='+arg18,
+                                                                                '--top_p='+arg19,
+                                                                                '--repetition_penalty='+arg20,
+                                                                                '--num_beams='+arg21,
+                                                                                '--do_sample='+arg22],
                                                                                 stdout=subprocess.DEVNULL,
                                                                                 stderr=subprocess.DEVNULL)
 

From 50d8c8373a58dede2bda7cea28e5ff80eabbdfd4 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Thu, 28 Mar 2024 10:17:01 +0800
Subject: [PATCH 126/151] Update evaluate_retrieval_benchmark.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../evaluation/retriever/evaluate_retrieval_benchmark.py      | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_benchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_benchmark.py
index 26c22b2a5db..ac00952180f 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_benchmark.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_benchmark.py
@@ -299,13 +299,13 @@ def main():
     parser.add_argument("--reranker_model", type=str, default="BAAI/bge-reranker-large")
 
     parser.add_argument("--retrieval_type", type=str, default='default')
-    parser.add_argument("--polish", type=bool, default=False)
+    parser.add_argument("--polish", default=False, action='store_true')
     parser.add_argument("--search_type", type=str, default="similarity")
     parser.add_argument("--k", type=int, default=1)
     parser.add_argument("--fetch_k", type=int, default=5)
     parser.add_argument("--score_threshold", type=float, default=0.3)
     parser.add_argument("--top_n", type=int, default=1)
-    parser.add_argument("--enable_rerank", type=bool, default=False)
+    parser.add_argument("--enable_rerank", default=False, action='store_true')
 
     args = parser.parse_args()
 

From a4ea5dd0d71361b81f2e57fa849d5c5e5dd157da Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Thu, 28 Mar 2024 10:17:27 +0800
Subject: [PATCH 127/151] Update retrieval_benchmark.sh

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../retriever/retrieval_benchmark.sh          | 33 ++++++++-----------
 1 file changed, 14 insertions(+), 19 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.sh b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.sh
index 40365ad1825..c9ded476025 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.sh
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.sh
@@ -1,17 +1,3 @@
-# Copyright (c) 2024 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
 set -x
 
 function main {
@@ -24,13 +10,11 @@ function main {
 # init params
 function init_params {
   retrieval_type='default'
-  polish=False
   search_type="similarity"
   k=1
   fetch_k=5
   score_threshold=0.3
   top_n=1
-  enable_rerank=False
 
   for var in "$@"
   do
@@ -86,6 +70,17 @@ function init_params {
 # run_benchmark
 function run_benchmark {
 
+    if [[ ${polish} == True ]]; then
+        polish="--polish"
+    else
+        polish=""
+    fi
+    if [[ ${enable_rerank} == True ]]; then
+        enable_rerank="--enable_rerank"
+    else
+         enable_rerank=""
+    fi
+
     python -u ./evaluate_retrieval_benchmark.py \
         --index_file_jsonl_path ${index_file_jsonl_path} \
         --query_file_jsonl_path ${query_file_jsonl_path} \
@@ -94,14 +89,14 @@ function run_benchmark {
         --llm_model ${llm_model} \
         --reranker_model ${reranker_model} \
         --retrieval_type ${retrieval_type} \
-        --polish ${polish} \
+        ${polish} \
         --search_type ${search_type} \
         --k ${k} \
         --fetch_k ${fetch_k} \
         --score_threshold ${score_threshold} \
         --top_n ${top_n} \
-        --enable_rerank ${enable_rerank} 
+        ${enable_rerank} \
 
 }
 
-main "$@"
\ No newline at end of file
+main "$@"

From 6e29d4304e5b1e877aeeb1361c4a7ec27596a45c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 28 Mar 2024 02:17:52 +0000
Subject: [PATCH 128/151] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../evaluation/retriever/retrieval_benchmark.sh    | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.sh b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.sh
index c9ded476025..903f1cf9f4e 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.sh
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.sh
@@ -1,3 +1,17 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 set -x
 
 function main {

From 702f9a9b22f2de660cb85ad743585fd6208ee2dd Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Thu, 28 Mar 2024 10:18:54 +0800
Subject: [PATCH 129/151] Update and rename retrieval_benchmark.py to
 retrieval_superbenchmark.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 ...nchmark.py => retrieval_superbenchmark.py} | 49 +++++++------------
 1 file changed, 18 insertions(+), 31 deletions(-)
 rename intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/{retrieval_benchmark.py => retrieval_superbenchmark.py} (74%)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_superbenchmark.py
similarity index 74%
rename from intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.py
rename to intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_superbenchmark.py
index 24083bb933f..d66d932bb88 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_benchmark.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_superbenchmark.py
@@ -1,17 +1,3 @@
-# Copyright (c) 2024 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
 import argparse
 import os
 import subprocess
@@ -20,7 +6,7 @@
 def main():
     if os.path.exists("result_retrieval.jsonl"):
         os.remove("result_retrieval.jsonl")
-    script_path = 'evaluate_retrieval_benchmark.py'
+    script_path = 'retrieval_benchmark.sh'
 
     parser = argparse.ArgumentParser()
     parser.add_argument("--index_file_jsonl_path", type=str)
@@ -64,22 +50,23 @@ def main():
                                 print('--'*7 +'top_n',arg13)
                                 for arg14 in arg14_list:
                                     print('--'*8 +'enable_rerank',arg14)
-                                    subprocess.run(['python',
-                                                    script_path,
-                                                    '--index_file_jsonl_path', arg1,
-                                                    '--query_file_jsonl_path', arg2,
-                                                    '--vector_database', arg3,
-                                                    '--embedding_model', arg4,
-                                                    '--llm_model', arg5,
-                                                    '--reranker_model', arg6,
-                                                    '--retrieval_type', arg7,
-                                                    '--polish', arg8,
-                                                    '--search_type', arg9,
-                                                    '--k', arg10,
-                                                    '--fetch_k', arg11,
-                                                    '--score_threshold', arg12,
-                                                    '--top_n', arg13,
-                                                    '--enable_rerank', arg14],
+                                    # try:
+                                    subprocess.run(['bash', 
+                                                    script_path, 
+                                                    '--index_file_jsonl_path='+arg1, 
+                                                    '--query_file_jsonl_path='+arg2, 
+                                                    '--vector_database='+arg3,  
+                                                    '--embedding_model='+arg4, 
+                                                    '--llm_model='+arg5, 
+                                                    '--reranker_model='+arg6, 
+                                                    '--retrieval_type='+arg7,  
+                                                    '--polish='+arg8, 
+                                                    '--search_type='+arg9, 
+                                                    '--k='+arg10, 
+                                                    '--fetch_k='+arg11, 
+                                                    '--score_threshold='+arg12, 
+                                                    '--top_n='+arg13,
+                                                    '--enable_rerank='+arg14],
                                                     stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
 
     file_jsonl_path='result_retrieval.jsonl'

From 0452526d2f2fd7935976b61a44352091c10e555b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 28 Mar 2024 02:19:17 +0000
Subject: [PATCH 130/151] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../retriever/retrieval_superbenchmark.py     | 42 ++++++++++++-------
 1 file changed, 28 insertions(+), 14 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_superbenchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_superbenchmark.py
index d66d932bb88..edcaa85c8c1 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_superbenchmark.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_superbenchmark.py
@@ -1,3 +1,17 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import argparse
 import os
 import subprocess
@@ -51,20 +65,20 @@ def main():
                                 for arg14 in arg14_list:
                                     print('--'*8 +'enable_rerank',arg14)
                                     # try:
-                                    subprocess.run(['bash', 
-                                                    script_path, 
-                                                    '--index_file_jsonl_path='+arg1, 
-                                                    '--query_file_jsonl_path='+arg2, 
-                                                    '--vector_database='+arg3,  
-                                                    '--embedding_model='+arg4, 
-                                                    '--llm_model='+arg5, 
-                                                    '--reranker_model='+arg6, 
-                                                    '--retrieval_type='+arg7,  
-                                                    '--polish='+arg8, 
-                                                    '--search_type='+arg9, 
-                                                    '--k='+arg10, 
-                                                    '--fetch_k='+arg11, 
-                                                    '--score_threshold='+arg12, 
+                                    subprocess.run(['bash',
+                                                    script_path,
+                                                    '--index_file_jsonl_path='+arg1,
+                                                    '--query_file_jsonl_path='+arg2,
+                                                    '--vector_database='+arg3,
+                                                    '--embedding_model='+arg4,
+                                                    '--llm_model='+arg5,
+                                                    '--reranker_model='+arg6,
+                                                    '--retrieval_type='+arg7,
+                                                    '--polish='+arg8,
+                                                    '--search_type='+arg9,
+                                                    '--k='+arg10,
+                                                    '--fetch_k='+arg11,
+                                                    '--score_threshold='+arg12,
                                                     '--top_n='+arg13,
                                                     '--enable_rerank='+arg14],
                                                     stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

From 008a892e72821e39870bb7bbd0cd1728bfac9881 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Thu, 28 Mar 2024 15:12:02 +0800
Subject: [PATCH 131/151] add README.md

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tools/evaluation/README.md    | 199 ++++++++++++++++++
 1 file changed, 199 insertions(+)
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/README.md

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
new file mode 100644
index 00000000000..1a1b362a8af
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
@@ -0,0 +1,199 @@
+# Retrieval and Rag Benchmark
+
+## 1. Introduction
+We provide scripts of the benchmark of Retrieval and Rag. For data augmentation, please goto [Retrieval Data Augmentation](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation)
+
+## 2. Retrieval Benchmark
+### Installation
+Please ensure the installation of requirements for NeuralChat and retrieval plugin by the following commands.
+```
+git clone https://github.com/intel/intel-extension-for-transformers.git
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat
+pip install -r requirements.txt
+cd pipeline/plugins/retrieval
+pip install -r requirements.txt
+```
+
+### Benchmark
+You can run retrieval benchmark by the following commands.
+```
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever
+bash retrieval_benchmark.sh \
+--index_file_jsonl_path=/path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/candidate_context.jsonl \
+--query_file_jsonl_path=/path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl
+--vector_database=Chroma \
+--embedding_model=<embedding model name or path> \
+--llm_model=<llm model name or path> \
+--reranker_model=<reranker model name or path>
+```
+**Some Important Arguments**:
+- `index_file_jsonl_path`: path of JSON data including candidate context where each line is a dict like this:```{"context": List[str]}```. See [candidate_context.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/candidate_context.jsonl) for a data file.
+- `query_file_jsonl_path`: path of JSON data including queries and positives where each line is a dict like this:```{"query": str, "pos": List[str]}```. See [example.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl) for a data file.
+- `vector_database`: The vector database for constructing the knowledge base.
+- `embedding_model`: The name or path for the text embedding model. The default value is "BAAI/bge-base-en-v1.5". Other options are "BAAI/bge-large-en-v1.5", "thenlper/gte-large", "infgrad/stella-base-en-v2", "thenlper/gte-base", "intfloat/e5-large-v2", "hkunlp/instructor-xl", and "hkunlp/instructor-large".
+- `llm_model`: The name or path for the LLM model.
+- `reranker_model`: The name or path for the reranker model.
+- `retrieval_type`: The type of the retriever. The default value is "default". The other options are "child_parent" and "bm25".
+- `polish`: Whether to polish the input query before processing. The default value is False.
+- `search_type`: Type of search to perform. The default value is "similarity". The other options are "mmr" and "similarity_score_threshold".
+- `k`: The number of the returned most similar documents. The default value is 1.
+- `fetch_k`: The number of Documents to fetch to pass to MMR algorithm. The default value is 5.
+- `score_threshold`: The similar score threshold for the retrieved documents. The default value is 0.3.
+- `top_n`: The return number of the reranker model. The default value is 1.
+- `enable_rerank`: Whether to enable retrieval then rerank pipeline. The default value is False.
+
+**Result**:
+The result will include all parameter values and MRR (Mean reciprocal rank) and Hit (Hit Ratio) values.
+```
+{'index_file_jsonl_path': '/path/to/candidate_context.jsonl', 'query_file_jsonl_path': '/path/to/example.jsonl', 'vector_database': 'Chroma', 'embedding_model': '/path/to/bge-large-en-v1.5', 'retrieval_type': 'default', 'polish': False, 'search_type': 'similarity', 'llm_model': '/path/to/neural-chat-7b-v3-1/', 'k': 1, 'fetch_k': 5, 'score_threshold': 0.3, 'reranker_model': '/path/to/bge-reranker-large', 'top_n': 1, 'enable_rerank': False, 'MRR': 0.8, 'Hit': 0.8}
+```
+
+### SuperBenchmark
+You can run retrieval superbenchmark by the following commands.
+```
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever
+python retrieval_superbenchmark.py \
+--index_file_jsonl_path /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/candidate_context.jsonl \
+--query_file_jsonl_path /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl \
+--vector_database Chroma \
+--embedding_model <embedding model name or path> \
+--llm_model <llm model name or path> \
+--reranker_model <reranker model name or path>
+```
+
+This will run benckmark multiple times based on the following different parameter values and output the parameter values that achieve the maximum MRR and Hit.
+
+**Adjustable Parameters**:
+- `retrieval_type`: ['default','child_parent','bm25']
+- `polish`: [True, False]
+- `search_type`: ['similarity','mmr','similarity_score_threshold']
+- `k`: [1, 3, 5]
+- `fetch_k`: [5, 10, 20]
+- `score_threshold`: [0.3, 0.5, 0.7]
+- `top_n`: [1, 3, 5, 10]
+- `enable_rerank`: [True, False]
+
+**Result**:
+```
+max_MRR {'index_file_jsonl_path': '/path/to/candidate_context.jsonl', 'query_file_jsonl_path': '/path/to/example.jsonl', 'vector_database': 'Chroma', 'embedding_model': '/path/to/bge-large-en-v1.5', 'retrieval_type': 'default', 'polish': True, 'search_type': 'similarity', 'llm_model': '/path/to/neural-chat-7b-v3-1/', 'k': 1, 'fetch_k': 5, 'score_threshold': 0.3, 'reranker_model': '/path/to/bge-reranker-large', 'top_n': 1, 'enable_rerank': True, 'MRR': 0.7, 'Hit': 0.7}
+...
+max_Hit {'index_file_jsonl_path': '/path/to/candidate_context.jsonl', 'query_file_jsonl_path': '/path/to/example.jsonl', 'vector_database': 'Chroma', 'embedding_model': '/path/to/bge-large-en-v1.5', 'retrieval_type': 'default', 'polish': True, 'search_type': 'similarity', 'llm_model': '/path/to/neural-chat-7b-v3-1/', 'k': 1, 'fetch_k': 20, 'score_threshold': 0.3, 'reranker_model': '/path/to/bge-reranker-large', 'top_n': 3, 'enable_rerank': True, 'MRR': 0.7, 'Hit': 0.7}
+...
+```
+
+## 3. Rag Benchmark
+### Installation
+Please ensure the installation of requirements for NeuralChat and retrieval plugin first by the following commands.
+```
+git clone https://github.com/intel/intel-extension-for-transformers.git
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat
+pip install -r requirements.txt
+cd pipeline/plugins/retrieval
+pip install -r requirements.txt
+```
+After that, please install dependency using the following commands.
+```
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/framework
+pip install -r requirements.txt
+```
+
+### Benchmark
+You can run rag benchmark by the following commands.
+```
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/framework
+bash ragas_benchmark.sh \
+--ground_truth_file=/path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl \
+--input_path=/path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/data.txt \
+--vector_database=Chroma \
+--embedding_model=<embedding model name or path> \
+--llm_model=<llm model name or path> \
+--reranker_model=<reranker model name or path>
+```
+
+**Some Important Arguments**:
+- `ground_truth_file`: The path of JSON data including question, context, and ground_truth, where each line is a dict like this:```{"question": str, "context": List[str], "ground_truth": str}```. See [ground_truth.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl) for a data file. The `"question"` of `answer_file` and `ground_truth_file` should correspond one-to-one.
+- `input_path`: The path of the file/folder/link of the content.
+- `use_openai_key`: Whether to utilize OpenAI for running ragas to compute the score. If you’re using openai, ensure you have your OpenAI key ready and available in your environment by `export OPENAI_API_KEY=xxx`. The default value is False.
+- `vector_database`: The vector database for constructing the knowledge base.
+- `embedding_model`: The name or path for the text embedding model. The default value is "BAAI/bge-base-en-v1.5". Other options are "BAAI/bge-large-en-v1.5", "thenlper/gte-large", "infgrad/stella-base-en-v2", "thenlper/gte-base", "intfloat/e5-large-v2", "hkunlp/instructor-xl", and "hkunlp/instructor-large".
+- `llm_model`: The name or path for the LLM model.
+- `reranker_model`: The name or path for the reranker model.
+- `retrieval_type`: The type of the retriever. The default value is "default". The other options are "child_parent" and "bm25".
+- `polish`: Whether to polish the input query before processing. The default value is False.
+- `search_type`: Type of search to perform. The default value is "similarity". The other options are "mmr" and "similarity_score_threshold".
+- `k`: The number of the returned most similar documents. The default value is 1.
+- `fetch_k`: The number of Documents to fetch to pass to MMR algorithm. The default value is 5.
+- `score_threshold`: The similar score threshold for the retrieved documents. The default value is 0.3.
+- `top_n`: The return number of the reranker model. The default value is 1.
+- `enable_rerank`: Whether to enable retrieval then rerank pipeline. The default value is False.
+- `max_chuck_size`: The max token length for a single chuck in the knowledge base. The default value is 256.
+- `temperature`: The value is used to modulate the next token probabilities, and will influence the distribution of similarity scores. The default value is 0.01.
+- `top_k`: The number of highest probability vocabulary tokens to keep for top-k-filtering. The default value is 1.
+- `top_p`: If set to float < 1, only the smallest set of most probable tokens with probabilities that add up to top_p or higher are kept for generation. The default value is 0.1.
+- `repetition_penalty`: The parameter for repetition penalty. 1.0 means no penalty. The default value is 1.0.
+- `num_beams`: Number of beams for beam search. 1 means no beam search. The default value is 1.
+- `do_sample`: Whether or not to use sampling; use greedy decoding otherwise. The default value is False.
+
+**Result**:
+The result will include all parameter values and values of Average Answer Relevancy, Average Faithfulness, Average Context Recall, Average Context Precision.
+```
+{"ground_truth_file": "ground_truth.jsonl", "input_path": "data.txt", "vector_database": "Chroma", "embedding_model": "/path/to/bge-large-en-v1.5", "retrieval_type": "default", "polish": true, "search_type": "similarity", "llm_model": "/path/to/neural-chat-7b-v3-1/", "k": 1, "fetch_k": 5, "score_threshold": 0.3, "reranker_model": "/path/to/bge-reranker-large", "top_n": 1, "enable_rerank": true, "max_chuck_size": 256, "temperature": 0.01, "top_k": 1, "top_p": 0.1, "repetition_penalty": 1.0, "num_beams": 1, "do_sample": true, "answer_relevancy_average": 0.937748267362332, "faithfulness_average": 0.5833333333333333, "context_recall_average": 1.0, "context_precision_average": 0.49999999995}
+```
+
+### SuperBenchmark
+You can run rag superbenchmark by the following commands.
+```
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/framework
+python ragas_benchmark.py \
+--ground_truth_file /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl \
+--input_path /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/data.txt \
+--vector_database Chroma \
+--embedding_model <embedding model name or path> \
+--llm_model <llm model name or path> \
+--reranker_model <reranker model name or path>
+```
+
+If you utilize OpenAI for running ragas, ensure you have your OpenAI key ready and available in your environment. This will make multiple calls to the OpenAI API, please be aware of your costs.
+```
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/framework
+export OPENAI_API_KEY=xxx
+python ragas_benchmark.py \
+--ground_truth_file /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl \
+--input_path /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/data.txt \
+--use_openai_key \
+--vector_database Chroma \
+--embedding_model <embedding model name or path> \
+--llm_model <llm model name or path> \
+--reranker_model <reranker model name or path>
+```
+
+This will run benckmark multiple times based on the following different parameter values and output the parameter values that achieve the maximum Average Answer Relevancy, Average Faithfulness, Average Context Recall, Average Context Precision.
+
+**Adjustable Parameters**:
+- `retrieval_type`: ['default','child_parent','bm25']
+- `polish`: [True, False]
+- `search_type`: ['similarity','mmr','similarity_score_threshold']
+- `k`: [1, 3, 5]
+- `fetch_k`: [5, 10, 20]
+- `score_threshold`: [0.3, 0.5, 0.7]
+- `top_n`: [1, 3, 5, 10]
+- `enable_rerank`: [True, False]
+- `max_chuck_size`: [256, 512, 768, 1024]
+- `temperature`: [0.01, 0.05, 0.1, 0.3, 0.5, 0.7]
+- `top_k`: [1, 3, 10, 20]
+- `top_p`: [0.1, 0.3, 0.5, 0.7]
+- `repetition_penalty`: [1.0, 1.1, 1.3, 1.5, 1.7]
+- `num_beams`: [1, 3, 10, 20]
+- `do_sample`: [True, False]
+
+**Result**:
+```
+max_answer_relevancy_average {'ground_truth_file': 'ground_truth.jsonl', 'input_path': 'data.txt', 'vector_database': 'Chroma', 'embedding_model': '/path/to/bge-large-en-v1.5', 'retrieval_type': 'default', 'polish': True, 'search_type': 'similarity', 'llm_model': '/path/to/neural-chat-7b-v3-1', 'k': 1, 'fetch_k': 5, 'score_threshold': 0.3, 'reranker_model': '/path/to/bge-reranker-large', 'top_n': 1, 'enable_rerank': True, 'max_chuck_size': 256, 'temperature': 0.01, 'top_k': 1, 'top_p': 0.1, 'repetition_penalty': 1.0, 'num_beams': 20, 'do_sample': True, 'answer_relevancy_average': 0.9533325665270252, 'faithfulness_average': 0.5083333333333333, 'context_recall_average': 1.0, 'context_precision_average': 0.49999999995}
+...
+max_faithfulness_average {'ground_truth_file': 'ground_truth.jsonl', 'input_path': 'data.txt', 'vector_database': 'Chroma', 'embedding_model': '/path/to/bge-large-en-v1.5', 'retrieval_type': 'default', 'polish': True, 'search_type': 'similarity', 'llm_model': '/path/to/neural-chat-7b-v3-1', 'k': 1, 'fetch_k': 5, 'score_threshold': 0.3, 'reranker_model': '/path/to/bge-reranker-large', 'top_n': 1, 'enable_rerank': True, 'max_chuck_size': 256, 'temperature': 0.01, 'top_k': 1, 'top_p': 0.1, 'repetition_penalty': 1.0, 'num_beams': 1, 'do_sample': True, 'answer_relevancy_average': 0.9354267206448277, 'faithfulness_average': 0.675, 'context_recall_average': 1.0, 'context_precision_average': 0.49999999995}
+...
+max_context_recall_average {'ground_truth_file': 'ground_truth.jsonl', 'input_path': 'data.txt', 'vector_database': 'Chroma', 'embedding_model': '/path/to/bge-large-en-v1.5', 'retrieval_type': 'default', 'polish': True, 'search_type': 'similarity', 'llm_model': '/path/to/neural-chat-7b-v3-1', 'k': 1, 'fetch_k': 5, 'score_threshold': 0.3, 'reranker_model': '/path/to/bge-reranker-large', 'top_n': 1, 'enable_rerank': True, 'max_chuck_size': 256, 'temperature': 0.01, 'top_k': 1, 'top_p': 0.1, 'repetition_penalty': 1.0, 'num_beams': 1, 'do_sample': True, 'answer_relevancy_average': 0.9354267206448277, 'faithfulness_average': 0.675, 'context_recall_average': 1.0, 'context_precision_average': 0.49999999995}
+...
+max_context_precision_average {'ground_truth_file': 'ground_truth.jsonl', 'input_path': 'data.txt', 'vector_database': 'Chroma', 'embedding_model': '/path/to/bge-large-en-v1.5', 'retrieval_type': 'default', 'polish': True, 'search_type': 'similarity', 'llm_model': '/path/to/neural-chat-7b-v3-1', 'k': 1, 'fetch_k': 5, 'score_threshold': 0.3, 'reranker_model': '/path/to/bge-reranker-large', 'top_n': 1, 'enable_rerank': True, 'max_chuck_size': 256, 'temperature': 0.01, 'top_k': 1, 'top_p': 0.1, 'repetition_penalty': 1.1, 'num_beams': 1, 'do_sample': True, 'answer_relevancy_average': 0.7429146997306499, 'faithfulness_average': 0.6666666666666667, 'context_recall_average': 1.0, 'context_precision_average': 0.49999999995}
+...
+```
\ No newline at end of file

From 53038375e8fa5ec83bf5ab7a4a70d5e0dcf2112c Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 28 Mar 2024 07:12:24 +0000
Subject: [PATCH 132/151] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../neural_chat/tools/evaluation/README.md                  | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
index 1a1b362a8af..0eca8f43d70 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
@@ -61,7 +61,7 @@ python retrieval_superbenchmark.py \
 --reranker_model <reranker model name or path>
 ```
 
-This will run benckmark multiple times based on the following different parameter values and output the parameter values that achieve the maximum MRR and Hit.
+This will run benchmark multiple times based on the following different parameter values and output the parameter values that achieve the maximum MRR and Hit.
 
 **Adjustable Parameters**:
 - `retrieval_type`: ['default','child_parent','bm25']
@@ -167,7 +167,7 @@ python ragas_benchmark.py \
 --reranker_model <reranker model name or path>
 ```
 
-This will run benckmark multiple times based on the following different parameter values and output the parameter values that achieve the maximum Average Answer Relevancy, Average Faithfulness, Average Context Recall, Average Context Precision.
+This will run benchmark multiple times based on the following different parameter values and output the parameter values that achieve the maximum Average Answer Relevancy, Average Faithfulness, Average Context Recall, Average Context Precision.
 
 **Adjustable Parameters**:
 - `retrieval_type`: ['default','child_parent','bm25']
@@ -196,4 +196,4 @@ max_context_recall_average {'ground_truth_file': 'ground_truth.jsonl', 'input_pa
 ...
 max_context_precision_average {'ground_truth_file': 'ground_truth.jsonl', 'input_path': 'data.txt', 'vector_database': 'Chroma', 'embedding_model': '/path/to/bge-large-en-v1.5', 'retrieval_type': 'default', 'polish': True, 'search_type': 'similarity', 'llm_model': '/path/to/neural-chat-7b-v3-1', 'k': 1, 'fetch_k': 5, 'score_threshold': 0.3, 'reranker_model': '/path/to/bge-reranker-large', 'top_n': 1, 'enable_rerank': True, 'max_chuck_size': 256, 'temperature': 0.01, 'top_k': 1, 'top_p': 0.1, 'repetition_penalty': 1.1, 'num_beams': 1, 'do_sample': True, 'answer_relevancy_average': 0.7429146997306499, 'faithfulness_average': 0.6666666666666667, 'context_recall_average': 1.0, 'context_precision_average': 0.49999999995}
 ...
-```
\ No newline at end of file
+```

From 8957b189759775da833c2d1bc4c54a0896f2eec4 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Thu, 28 Mar 2024 15:13:18 +0800
Subject: [PATCH 133/151] Update README.md

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tools/evaluation/README.md                      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
index 0eca8f43d70..5080c29c655 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
@@ -1,7 +1,7 @@
 # Retrieval and Rag Benchmark
 
 ## 1. Introduction
-We provide scripts of the benchmark of Retrieval and Rag. For data augmentation, please goto [Retrieval Data Augmentation](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation)
+We provide scripts of the benchmark of Retrieval and Rag. For data augmentation, please go to [Retrieval Data Augmentation](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation).
 
 ## 2. Retrieval Benchmark
 ### Installation

From 96f477c4ea609720b9beb2fe190d7c6eafa04075 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Fri, 29 Mar 2024 09:48:29 +0800
Subject: [PATCH 134/151] Update README.md

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../evaluation/data_augmentation/README.md    | 22 +++++++++----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md
index 9d2668937c0..f5c03008230 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/README.md
@@ -68,24 +68,24 @@ The effect is to generate several specific open-ended questions based on the con
 ```
 cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation
 python -m data_augmentation.retrieval_dataset_construction \
---llm_model <llm model path> \
---embedding_model <embedding model path> \
---input <your input file path>
+--llm_model <llm model name or path> \
+--embedding_model <embedding model name or path> \
+--input <your input file name or path>
 ```
 
 * **On CUDA**
 ```
 cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation
 python -m data_augmentation.retrieval_dataset_construction \
---llm_model <llm model path> \
---embedding_model <embedding model path> \
---input <your input file path> \
+--llm_model <llm model name or path> \
+--embedding_model <embedding model name or path> \
+--input <your input file name or path> \
 --use_gpu_for_searching True
 ```
 
 **Some Important Arguments**:
-- `llm_model`: The path for the LLM model.
-- `embedding_model`: The path for the text embedding model.
+- `llm_model`: The name or path for the LLM model.
+- `embedding_model`: The name or path for the text embedding model.
 - `input`: The path of the file/folder/link of the content.
 - `output`: The path of output files. The default value is './data'. The default output files are './data/raw.jsonl', './data/minedHN.jsonl', './data/minedHN_split.jsonl'.
 - `temperature`: The value is used to modulate the next token probabilities, and will influence the distribution of similarity scores. The default value is 0.8.
@@ -93,7 +93,7 @@ python -m data_augmentation.retrieval_dataset_construction \
 - `top_k`: The number of highest probability vocabulary tokens to keep for top-k-filtering. The default value is 40.
 - `repetition_penalty`: The parameter for repetition penalty. 1.0 means no penalty. The default value is 2.0.
 - `max_new_tokens`: The maximum numbers of tokens to generate, ignoring the number of tokens in the prompt. The default value is 48.
-- `do_sample`: Whether or not to use sampling ; use greedy decoding otherwise. The default value is True.
+- `do_sample`: Whether or not to use sampling; use greedy decoding otherwise. The default value is True.
 - `num_beams`: Number of beams for beam search. 1 means no beam search. The default value is 2.
 - `num_return_sequences`: The number of independently computed returned sequences for each element in the batch. The default value is 2.
 - `use_cache`: Whether or not the model should use the past last key/values attentions (if applicable to the model) to speed up decoding. The default value is True.
@@ -116,13 +116,13 @@ The effect is to generate the right answer based on the context and question pro
 ```
 cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation
 python llm_generate_truth.py \
---llm_model <llm model path> \
+--llm_model <llm model name or path> \
 --input example.jsonl \
 --output ground_truth.jsonl
 ```
 
 **Some Important Arguments**:
-- `llm_model`: The path for the LLM model.
+- `llm_model`: The name or path for the LLM model.
 - `input`: The path of JSON data including queries and positives where each line is a dict like this:```{"query": str, "pos": List[str]}```. See [example.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl) for a data file.
 - `output`: The path of the output JSON data.
 - `temperature`: The value is used to modulate the next token probabilities, and will influence the distribution of similarity scores. The default value is 0.8.

From c99856da7bda2b7823026613902e303f68a43276 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Fri, 29 Mar 2024 09:49:05 +0800
Subject: [PATCH 135/151] Update README.md

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tools/evaluation/framework/README.md     | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
index 3c69c3d2c96..3b95a39310c 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/README.md
@@ -32,16 +32,15 @@ cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat
 python ragas_evaluation.py \
 --answer_file /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/answer.jsonl \
 --ground_truth_file /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl \
---llm_model <llm model path> \
---embedding_model <embedding model path>
+--llm_model <llm model name or path> \
+--embedding_model <embedding model name or path>
 ```
 
 **Some Important Arguments**:
 - `answer_file`: The path of JSON data including question and answer, where each line is a dict like this:```{"question": str, "answer": str}```. See [answer.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/answer.jsonl) for a data file.
 - `ground_truth_file`: The path of JSON data including question, context, and ground_truth, where each line is a dict like this:```{"question": str, "context": List[str], "ground_truth": str}```. See [ground_truth.jsonl](https://github.com/intel/intel-extension-for-transformers/blob/master/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl) for a data file. The `"question"` of `answer_file` and `ground_truth_file` should correspond one-to-one.
-- `openai_api_key`: If you utilize OpenAI for running ragas, ensure you have your OpenAI key ready and available in your environment.
-- `llm_model`: If you utilize Langchain for running ragas, you should input the path for the LLM model.
-- `embedding_model`: If you utilize Langchain for running ragas, you should input the path for the text embedding model. You can use "BAAI/bge-base-en-v1.5", "BAAI/bge-large-en-v1.5", "thenlper/gte-large", "infgrad/stella-base-en-v2", "thenlper/gte-base", "intfloat/e5-large-v2", "hkunlp/instructor-xl", and "hkunlp/instructor-large".
+- `llm_model`: If you utilize Langchain for running ragas, you should input the name or path for the LLM model.
+- `embedding_model`: If you utilize Langchain for running ragas, you should input the name or path for the text embedding model. You can use "BAAI/bge-base-en-v1.5", "BAAI/bge-large-en-v1.5", "thenlper/gte-large", "infgrad/stella-base-en-v2", "thenlper/gte-base", "intfloat/e5-large-v2", "hkunlp/instructor-xl", and "hkunlp/instructor-large".
 
 ## 4. Result
 The results include your input question, answer, contexts, ground_truth, as well as output answer relevancy, faithfulness, context recall, context precision.

From 19dfb93e99d5e56d3440b9e74363b35626339d97 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 1 Apr 2024 10:59:58 +0800
Subject: [PATCH 136/151] Update README.md

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tools/evaluation/README.md    | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
index 5080c29c655..69a9b67838d 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
@@ -44,9 +44,22 @@ bash retrieval_benchmark.sh \
 
 **Result**:
 The result will include all parameter values and MRR (Mean reciprocal rank) and Hit (Hit Ratio) values.
-```
-{'index_file_jsonl_path': '/path/to/candidate_context.jsonl', 'query_file_jsonl_path': '/path/to/example.jsonl', 'vector_database': 'Chroma', 'embedding_model': '/path/to/bge-large-en-v1.5', 'retrieval_type': 'default', 'polish': False, 'search_type': 'similarity', 'llm_model': '/path/to/neural-chat-7b-v3-1/', 'k': 1, 'fetch_k': 5, 'score_threshold': 0.3, 'reranker_model': '/path/to/bge-reranker-large', 'top_n': 1, 'enable_rerank': False, 'MRR': 0.8, 'Hit': 0.8}
-```
+| 'index_file_jsonl_path'  | '/path/to/candidate_context.jsonl' |
+| 'query_file_jsonl_path'  | '/path/to/example.jsonl' |
+| 'vector_database'  | 'Chroma'|
+| 'embedding_model' | '/path/to/bge-large-en-v1.5' |
+| 'retrieval_type' | 'default' |
+| 'polish' | False |
+| 'search_type' | 'similarity' |
+| 'llm_model' | '/path/to/neural-chat-7b-v3-1/' |
+| 'k' | 1 |
+| 'fetch_k' | 5 |
+| 'score_threshold' | 0.3 |
+| 'reranker_model' | '/path/to/bge-reranker-large' |
+| 'top_n' | 1 |
+| 'enable_rerank' | False |
+| 'MRR' | 0.8 |
+| 'Hit' | 0.8 |
 
 ### SuperBenchmark
 You can run retrieval superbenchmark by the following commands.

From 99940f3a61106cdd51544bb4902b8adcb8b4fd34 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 1 Apr 2024 11:02:05 +0800
Subject: [PATCH 137/151] Update README.md

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tools/evaluation/README.md                      | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
index 69a9b67838d..9c38994fbd0 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
@@ -44,6 +44,8 @@ bash retrieval_benchmark.sh \
 
 **Result**:
 The result will include all parameter values and MRR (Mean reciprocal rank) and Hit (Hit Ratio) values.
+|  Parameter & Result  | Value  |
+|  :----:  | :----:  |
 | 'index_file_jsonl_path'  | '/path/to/candidate_context.jsonl' |
 | 'query_file_jsonl_path'  | '/path/to/example.jsonl' |
 | 'vector_database'  | 'Chroma'|

From 464d52bc5335462234e8d8f25329e12a3be716e2 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 1 Apr 2024 11:19:35 +0800
Subject: [PATCH 138/151] Update README.md

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tools/evaluation/README.md    | 192 ++++++++++++++++--
 1 file changed, 179 insertions(+), 13 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
index 9c38994fbd0..09297fcf9a8 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
@@ -89,12 +89,46 @@ This will run benchmark multiple times based on the following different paramete
 - `enable_rerank`: [True, False]
 
 **Result**:
-```
-max_MRR {'index_file_jsonl_path': '/path/to/candidate_context.jsonl', 'query_file_jsonl_path': '/path/to/example.jsonl', 'vector_database': 'Chroma', 'embedding_model': '/path/to/bge-large-en-v1.5', 'retrieval_type': 'default', 'polish': True, 'search_type': 'similarity', 'llm_model': '/path/to/neural-chat-7b-v3-1/', 'k': 1, 'fetch_k': 5, 'score_threshold': 0.3, 'reranker_model': '/path/to/bge-reranker-large', 'top_n': 1, 'enable_rerank': True, 'MRR': 0.7, 'Hit': 0.7}
+max_MRR 
+|  Parameter & Result  | Value  |
+|  :----:  | :----:  |
+| 'index_file_jsonl_path'  | '/path/to/candidate_context.jsonl' |
+| 'query_file_jsonl_path'  | '/path/to/example.jsonl' |
+| 'vector_database'  | 'Chroma'|
+| 'embedding_model' | '/path/to/bge-large-en-v1.5' |
+| 'retrieval_type' | 'default' |
+| 'polish' | True |
+| 'search_type' | 'similarity' |
+| 'llm_model' | '/path/to/neural-chat-7b-v3-1/' |
+| 'k' | 1 |
+| 'fetch_k' | 5 |
+| 'score_threshold' | 0.3 |
+| 'reranker_model' | '/path/to/bge-reranker-large' |
+| 'top_n' | 1 |
+| 'enable_rerank' | True |
+| 'MRR' | 0.7 |
+| 'Hit' | 0.7 |
 ...
-max_Hit {'index_file_jsonl_path': '/path/to/candidate_context.jsonl', 'query_file_jsonl_path': '/path/to/example.jsonl', 'vector_database': 'Chroma', 'embedding_model': '/path/to/bge-large-en-v1.5', 'retrieval_type': 'default', 'polish': True, 'search_type': 'similarity', 'llm_model': '/path/to/neural-chat-7b-v3-1/', 'k': 1, 'fetch_k': 20, 'score_threshold': 0.3, 'reranker_model': '/path/to/bge-reranker-large', 'top_n': 3, 'enable_rerank': True, 'MRR': 0.7, 'Hit': 0.7}
+max_Hit
+|  Parameter & Result  | Value  |
+|  :----:  | :----:  |
+| 'index_file_jsonl_path'  | '/path/to/candidate_context.jsonl' |
+| 'query_file_jsonl_path'  | '/path/to/example.jsonl' |
+| 'vector_database'  | 'Chroma'|
+| 'embedding_model' | '/path/to/bge-large-en-v1.5' |
+| 'retrieval_type' | 'default' |
+| 'polish' | True |
+| 'search_type' | 'similarity' |
+| 'llm_model' | '/path/to/neural-chat-7b-v3-1/' |
+| 'k' | 1 |
+| 'fetch_k' | 20 |
+| 'score_threshold' | 0.3 |
+| 'reranker_model' | '/path/to/bge-reranker-large' |
+| 'top_n' | 3 |
+| 'enable_rerank' | True |
+| 'MRR' | 0.7 |
+| 'Hit' | 0.7 |
 ...
-```
 
 ## 3. Rag Benchmark
 ### Installation
@@ -151,9 +185,35 @@ bash ragas_benchmark.sh \
 
 **Result**:
 The result will include all parameter values and values of Average Answer Relevancy, Average Faithfulness, Average Context Recall, Average Context Precision.
-```
-{"ground_truth_file": "ground_truth.jsonl", "input_path": "data.txt", "vector_database": "Chroma", "embedding_model": "/path/to/bge-large-en-v1.5", "retrieval_type": "default", "polish": true, "search_type": "similarity", "llm_model": "/path/to/neural-chat-7b-v3-1/", "k": 1, "fetch_k": 5, "score_threshold": 0.3, "reranker_model": "/path/to/bge-reranker-large", "top_n": 1, "enable_rerank": true, "max_chuck_size": 256, "temperature": 0.01, "top_k": 1, "top_p": 0.1, "repetition_penalty": 1.0, "num_beams": 1, "do_sample": true, "answer_relevancy_average": 0.937748267362332, "faithfulness_average": 0.5833333333333333, "context_recall_average": 1.0, "context_precision_average": 0.49999999995}
-```
+
+|  Parameter & Result  | Value  |
+|  :----:  | :----:  |
+| "ground_truth_file"  | "ground_truth.jsonl" |
+| "input_path" | "data.txt" |
+| "vector_database"  | "Chroma" |
+| "embedding_model" | "/path/to/bge-large-en-v1.5" |
+| "retrieval_type" | "default" |
+| "polish" | True |
+| "search_type" | "similarity" |
+| "llm_model" | "/path/to/neural-chat-7b-v3-1/" |
+| "k" | 1 |
+| "fetch_k" | 5 |
+| "score_threshold" | 0.3 |
+| "reranker_model" | "/path/to/bge-reranker-large" |
+| "top_n" | 1 |
+| "enable_rerank" | True |
+| "max_chuck_size" | 256 |
+| "temperature" | 0.01 |
+| "top_k" | 1 |
+| "top_p" | 0.1 |
+| "repetition_penalty" | 1.0 |
+| "num_beams" | 1 |
+| "do_sample" | True |
+| "answer_relevancy_average" | 0.937748267362332 |
+| "faithfulness_average" | 0.5833333333333333 |
+| "context_recall_average" | 1.0 |
+| "context_precision_average" | 0.49999999995 |
+
 
 ### SuperBenchmark
 You can run rag superbenchmark by the following commands.
@@ -202,13 +262,119 @@ This will run benchmark multiple times based on the following different paramete
 - `do_sample`: [True, False]
 
 **Result**:
-```
-max_answer_relevancy_average {'ground_truth_file': 'ground_truth.jsonl', 'input_path': 'data.txt', 'vector_database': 'Chroma', 'embedding_model': '/path/to/bge-large-en-v1.5', 'retrieval_type': 'default', 'polish': True, 'search_type': 'similarity', 'llm_model': '/path/to/neural-chat-7b-v3-1', 'k': 1, 'fetch_k': 5, 'score_threshold': 0.3, 'reranker_model': '/path/to/bge-reranker-large', 'top_n': 1, 'enable_rerank': True, 'max_chuck_size': 256, 'temperature': 0.01, 'top_k': 1, 'top_p': 0.1, 'repetition_penalty': 1.0, 'num_beams': 20, 'do_sample': True, 'answer_relevancy_average': 0.9533325665270252, 'faithfulness_average': 0.5083333333333333, 'context_recall_average': 1.0, 'context_precision_average': 0.49999999995}
+max_answer_relevancy_average
+|  Parameter & Result  | Value  |
+|  :----:  | :----:  |
+| "ground_truth_file"  | "ground_truth.jsonl" |
+| "input_path" | "data.txt" |
+| "vector_database"  | "Chroma" |
+| "embedding_model" | "/path/to/bge-large-en-v1.5" |
+| "retrieval_type" | "default" |
+| "polish" | True |
+| "search_type" | "similarity" |
+| "llm_model" | "/path/to/neural-chat-7b-v3-1/" |
+| "k" | 1 |
+| "fetch_k" | 5 |
+| "score_threshold" | 0.3 |
+| "reranker_model" | "/path/to/bge-reranker-large" |
+| "top_n" | 1 |
+| "enable_rerank" | True |
+| "max_chuck_size" | 256 |
+| "temperature" | 0.01 |
+| "top_k" | 1 |
+| "top_p" | 0.1 |
+| "repetition_penalty" | 1.0 |
+| "num_beams" | 20 |
+| "do_sample" | True |
+| "answer_relevancy_average" | 0.9533325665270252 |
+| "faithfulness_average" | 0.5083333333333333 |
+| "context_recall_average" | 1.0 |
+| "context_precision_average" | 0.49999999995 |
 ...
-max_faithfulness_average {'ground_truth_file': 'ground_truth.jsonl', 'input_path': 'data.txt', 'vector_database': 'Chroma', 'embedding_model': '/path/to/bge-large-en-v1.5', 'retrieval_type': 'default', 'polish': True, 'search_type': 'similarity', 'llm_model': '/path/to/neural-chat-7b-v3-1', 'k': 1, 'fetch_k': 5, 'score_threshold': 0.3, 'reranker_model': '/path/to/bge-reranker-large', 'top_n': 1, 'enable_rerank': True, 'max_chuck_size': 256, 'temperature': 0.01, 'top_k': 1, 'top_p': 0.1, 'repetition_penalty': 1.0, 'num_beams': 1, 'do_sample': True, 'answer_relevancy_average': 0.9354267206448277, 'faithfulness_average': 0.675, 'context_recall_average': 1.0, 'context_precision_average': 0.49999999995}
+max_faithfulness_average
+|  Parameter & Result  | Value  |
+|  :----:  | :----:  |
+| "ground_truth_file"  | "ground_truth.jsonl" |
+| "input_path" | "data.txt" |
+| "vector_database"  | "Chroma" |
+| "embedding_model" | "/path/to/bge-large-en-v1.5" |
+| "retrieval_type" | "default" |
+| "polish" | True |
+| "search_type" | "similarity" |
+| "llm_model" | "/path/to/neural-chat-7b-v3-1/" |
+| "k" | 1 |
+| "fetch_k" | 5 |
+| "score_threshold" | 0.3 |
+| "reranker_model" | "/path/to/bge-reranker-large" |
+| "top_n" | 1 |
+| "enable_rerank" | True |
+| "max_chuck_size" | 256 |
+| "temperature" | 0.01 |
+| "top_k" | 1 |
+| "top_p" | 0.1 |
+| "repetition_penalty" | 1.0 |
+| "num_beams" | 1 |
+| "do_sample" | True |
+| "answer_relevancy_average" | 0.9354267206448277 |
+| "faithfulness_average" | 0.675 |
+| "context_recall_average" | 1.0 |
+| "context_precision_average" | 0.49999999995 |
 ...
-max_context_recall_average {'ground_truth_file': 'ground_truth.jsonl', 'input_path': 'data.txt', 'vector_database': 'Chroma', 'embedding_model': '/path/to/bge-large-en-v1.5', 'retrieval_type': 'default', 'polish': True, 'search_type': 'similarity', 'llm_model': '/path/to/neural-chat-7b-v3-1', 'k': 1, 'fetch_k': 5, 'score_threshold': 0.3, 'reranker_model': '/path/to/bge-reranker-large', 'top_n': 1, 'enable_rerank': True, 'max_chuck_size': 256, 'temperature': 0.01, 'top_k': 1, 'top_p': 0.1, 'repetition_penalty': 1.0, 'num_beams': 1, 'do_sample': True, 'answer_relevancy_average': 0.9354267206448277, 'faithfulness_average': 0.675, 'context_recall_average': 1.0, 'context_precision_average': 0.49999999995}
+max_context_recall_average
+|  Parameter & Result  | Value  |
+|  :----:  | :----:  |
+| "ground_truth_file"  | "ground_truth.jsonl" |
+| "input_path" | "data.txt" |
+| "vector_database"  | "Chroma" |
+| "embedding_model" | "/path/to/bge-large-en-v1.5" |
+| "retrieval_type" | "default" |
+| "polish" | True |
+| "search_type" | "similarity" |
+| "llm_model" | "/path/to/neural-chat-7b-v3-1/" |
+| "k" | 1 |
+| "fetch_k" | 5 |
+| "score_threshold" | 0.3 |
+| "reranker_model" | "/path/to/bge-reranker-large" |
+| "top_n" | 1 |
+| "enable_rerank" | True |
+| "max_chuck_size" | 256 |
+| "temperature" | 0.01 |
+| "top_k" | 1 |
+| "top_p" | 0.1 |
+| "repetition_penalty" | 1.0 |
+| "num_beams" | 1 |
+| "do_sample" | True |
+| "answer_relevancy_average" | 0.9354267206448277 |
+| "faithfulness_average" | 0.675 |
+| "context_recall_average" | 1.0 |
+| "context_precision_average" | 0.49999999995 |
 ...
-max_context_precision_average {'ground_truth_file': 'ground_truth.jsonl', 'input_path': 'data.txt', 'vector_database': 'Chroma', 'embedding_model': '/path/to/bge-large-en-v1.5', 'retrieval_type': 'default', 'polish': True, 'search_type': 'similarity', 'llm_model': '/path/to/neural-chat-7b-v3-1', 'k': 1, 'fetch_k': 5, 'score_threshold': 0.3, 'reranker_model': '/path/to/bge-reranker-large', 'top_n': 1, 'enable_rerank': True, 'max_chuck_size': 256, 'temperature': 0.01, 'top_k': 1, 'top_p': 0.1, 'repetition_penalty': 1.1, 'num_beams': 1, 'do_sample': True, 'answer_relevancy_average': 0.7429146997306499, 'faithfulness_average': 0.6666666666666667, 'context_recall_average': 1.0, 'context_precision_average': 0.49999999995}
+max_context_precision_average
+|  Parameter & Result  | Value  |
+|  :----:  | :----:  |
+| "ground_truth_file"  | "ground_truth.jsonl" |
+| "input_path" | "data.txt" |
+| "vector_database"  | "Chroma" |
+| "embedding_model" | "/path/to/bge-large-en-v1.5" |
+| "retrieval_type" | "default" |
+| "polish" | True |
+| "search_type" | "similarity" |
+| "llm_model" | "/path/to/neural-chat-7b-v3-1/" |
+| "k" | 1 |
+| "fetch_k" | 5 |
+| "score_threshold" | 0.3 |
+| "reranker_model" | "/path/to/bge-reranker-large" |
+| "top_n" | 1 |
+| "enable_rerank" | True |
+| "max_chuck_size" | 256 |
+| "temperature" | 0.01 |
+| "top_k" | 1 |
+| "top_p" | 0.1 |
+| "repetition_penalty" | 1.1 |
+| "num_beams" | 1 |
+| "do_sample" | True |
+| "answer_relevancy_average" | 0.7429146997306499 |
+| "faithfulness_average" | 0.6666666666666667 |
+| "context_recall_average" | 1.0 |
+| "context_precision_average" | 0.49999999995 |
 ...
-```

From da2e8292a8e0dd04d93172b33e719b5b64aac3f2 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 1 Apr 2024 11:22:04 +0800
Subject: [PATCH 139/151] Update README.md

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tools/evaluation/README.md     | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
index 09297fcf9a8..efa02e9fbc3 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
@@ -44,6 +44,7 @@ bash retrieval_benchmark.sh \
 
 **Result**:
 The result will include all parameter values and MRR (Mean reciprocal rank) and Hit (Hit Ratio) values.
+```
 |  Parameter & Result  | Value  |
 |  :----:  | :----:  |
 | 'index_file_jsonl_path'  | '/path/to/candidate_context.jsonl' |
@@ -62,6 +63,7 @@ The result will include all parameter values and MRR (Mean reciprocal rank) and
 | 'enable_rerank' | False |
 | 'MRR' | 0.8 |
 | 'Hit' | 0.8 |
+```
 
 ### SuperBenchmark
 You can run retrieval superbenchmark by the following commands.
@@ -90,6 +92,7 @@ This will run benchmark multiple times based on the following different paramete
 
 **Result**:
 max_MRR 
+```
 |  Parameter & Result  | Value  |
 |  :----:  | :----:  |
 | 'index_file_jsonl_path'  | '/path/to/candidate_context.jsonl' |
@@ -108,8 +111,10 @@ max_MRR
 | 'enable_rerank' | True |
 | 'MRR' | 0.7 |
 | 'Hit' | 0.7 |
+```
 ...
 max_Hit
+```
 |  Parameter & Result  | Value  |
 |  :----:  | :----:  |
 | 'index_file_jsonl_path'  | '/path/to/candidate_context.jsonl' |
@@ -128,6 +133,7 @@ max_Hit
 | 'enable_rerank' | True |
 | 'MRR' | 0.7 |
 | 'Hit' | 0.7 |
+```
 ...
 
 ## 3. Rag Benchmark
@@ -185,7 +191,7 @@ bash ragas_benchmark.sh \
 
 **Result**:
 The result will include all parameter values and values of Average Answer Relevancy, Average Faithfulness, Average Context Recall, Average Context Precision.
-
+```
 |  Parameter & Result  | Value  |
 |  :----:  | :----:  |
 | "ground_truth_file"  | "ground_truth.jsonl" |
@@ -213,7 +219,7 @@ The result will include all parameter values and values of Average Answer Releva
 | "faithfulness_average" | 0.5833333333333333 |
 | "context_recall_average" | 1.0 |
 | "context_precision_average" | 0.49999999995 |
-
+```
 
 ### SuperBenchmark
 You can run rag superbenchmark by the following commands.
@@ -263,6 +269,7 @@ This will run benchmark multiple times based on the following different paramete
 
 **Result**:
 max_answer_relevancy_average
+```
 |  Parameter & Result  | Value  |
 |  :----:  | :----:  |
 | "ground_truth_file"  | "ground_truth.jsonl" |
@@ -290,8 +297,10 @@ max_answer_relevancy_average
 | "faithfulness_average" | 0.5083333333333333 |
 | "context_recall_average" | 1.0 |
 | "context_precision_average" | 0.49999999995 |
+```
 ...
 max_faithfulness_average
+```
 |  Parameter & Result  | Value  |
 |  :----:  | :----:  |
 | "ground_truth_file"  | "ground_truth.jsonl" |
@@ -319,8 +328,10 @@ max_faithfulness_average
 | "faithfulness_average" | 0.675 |
 | "context_recall_average" | 1.0 |
 | "context_precision_average" | 0.49999999995 |
+```
 ...
 max_context_recall_average
+```
 |  Parameter & Result  | Value  |
 |  :----:  | :----:  |
 | "ground_truth_file"  | "ground_truth.jsonl" |
@@ -348,8 +359,10 @@ max_context_recall_average
 | "faithfulness_average" | 0.675 |
 | "context_recall_average" | 1.0 |
 | "context_precision_average" | 0.49999999995 |
+```
 ...
 max_context_precision_average
+```
 |  Parameter & Result  | Value  |
 |  :----:  | :----:  |
 | "ground_truth_file"  | "ground_truth.jsonl" |
@@ -377,4 +390,5 @@ max_context_precision_average
 | "faithfulness_average" | 0.6666666666666667 |
 | "context_recall_average" | 1.0 |
 | "context_precision_average" | 0.49999999995 |
+```
 ...

From 3ce2cb20477b85e8cb29f73b9f2b9fc554fb4174 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 1 Apr 2024 11:24:37 +0800
Subject: [PATCH 140/151] Update README.md

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tools/evaluation/README.md               | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
index efa02e9fbc3..bc66378e5fa 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
@@ -44,7 +44,6 @@ bash retrieval_benchmark.sh \
 
 **Result**:
 The result will include all parameter values and MRR (Mean reciprocal rank) and Hit (Hit Ratio) values.
-```
 |  Parameter & Result  | Value  |
 |  :----:  | :----:  |
 | 'index_file_jsonl_path'  | '/path/to/candidate_context.jsonl' |
@@ -63,7 +62,6 @@ The result will include all parameter values and MRR (Mean reciprocal rank) and
 | 'enable_rerank' | False |
 | 'MRR' | 0.8 |
 | 'Hit' | 0.8 |
-```
 
 ### SuperBenchmark
 You can run retrieval superbenchmark by the following commands.
@@ -91,6 +89,7 @@ This will run benchmark multiple times based on the following different paramete
 - `enable_rerank`: [True, False]
 
 **Result**:
+```
 max_MRR 
 ```
 |  Parameter & Result  | Value  |
@@ -133,8 +132,8 @@ max_Hit
 | 'enable_rerank' | True |
 | 'MRR' | 0.7 |
 | 'Hit' | 0.7 |
-```
 ...
+```
 
 ## 3. Rag Benchmark
 ### Installation
@@ -191,7 +190,6 @@ bash ragas_benchmark.sh \
 
 **Result**:
 The result will include all parameter values and values of Average Answer Relevancy, Average Faithfulness, Average Context Recall, Average Context Precision.
-```
 |  Parameter & Result  | Value  |
 |  :----:  | :----:  |
 | "ground_truth_file"  | "ground_truth.jsonl" |
@@ -219,7 +217,6 @@ The result will include all parameter values and values of Average Answer Releva
 | "faithfulness_average" | 0.5833333333333333 |
 | "context_recall_average" | 1.0 |
 | "context_precision_average" | 0.49999999995 |
-```
 
 ### SuperBenchmark
 You can run rag superbenchmark by the following commands.
@@ -268,6 +265,7 @@ This will run benchmark multiple times based on the following different paramete
 - `do_sample`: [True, False]
 
 **Result**:
+```
 max_answer_relevancy_average
 ```
 |  Parameter & Result  | Value  |
@@ -392,3 +390,4 @@ max_context_precision_average
 | "context_precision_average" | 0.49999999995 |
 ```
 ...
+```

From 268d89cec0efe0e3ae3b096d894b5beaa234aa98 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 1 Apr 2024 11:29:39 +0800
Subject: [PATCH 141/151] Update README.md

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tools/evaluation/README.md    | 29 +++++--------------
 1 file changed, 7 insertions(+), 22 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
index bc66378e5fa..acc59fb7ab4 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
@@ -89,9 +89,7 @@ This will run benchmark multiple times based on the following different paramete
 - `enable_rerank`: [True, False]
 
 **Result**:
-```
-max_MRR 
-```
+***max_MRR***:
 |  Parameter & Result  | Value  |
 |  :----:  | :----:  |
 | 'index_file_jsonl_path'  | '/path/to/candidate_context.jsonl' |
@@ -110,10 +108,8 @@ max_MRR
 | 'enable_rerank' | True |
 | 'MRR' | 0.7 |
 | 'Hit' | 0.7 |
-```
-...
-max_Hit
-```
+......
+***max_Hit***:
 |  Parameter & Result  | Value  |
 |  :----:  | :----:  |
 | 'index_file_jsonl_path'  | '/path/to/candidate_context.jsonl' |
@@ -133,7 +129,6 @@ max_Hit
 | 'MRR' | 0.7 |
 | 'Hit' | 0.7 |
 ...
-```
 
 ## 3. Rag Benchmark
 ### Installation
@@ -265,9 +260,7 @@ This will run benchmark multiple times based on the following different paramete
 - `do_sample`: [True, False]
 
 **Result**:
-```
-max_answer_relevancy_average
-```
+***max_answer_relevancy_average***:
 |  Parameter & Result  | Value  |
 |  :----:  | :----:  |
 | "ground_truth_file"  | "ground_truth.jsonl" |
@@ -295,10 +288,8 @@ max_answer_relevancy_average
 | "faithfulness_average" | 0.5083333333333333 |
 | "context_recall_average" | 1.0 |
 | "context_precision_average" | 0.49999999995 |
-```
 ...
-max_faithfulness_average
-```
+***max_faithfulness_average***:
 |  Parameter & Result  | Value  |
 |  :----:  | :----:  |
 | "ground_truth_file"  | "ground_truth.jsonl" |
@@ -326,10 +317,8 @@ max_faithfulness_average
 | "faithfulness_average" | 0.675 |
 | "context_recall_average" | 1.0 |
 | "context_precision_average" | 0.49999999995 |
-```
 ...
-max_context_recall_average
-```
+***max_context_recall_average***:
 |  Parameter & Result  | Value  |
 |  :----:  | :----:  |
 | "ground_truth_file"  | "ground_truth.jsonl" |
@@ -357,10 +346,8 @@ max_context_recall_average
 | "faithfulness_average" | 0.675 |
 | "context_recall_average" | 1.0 |
 | "context_precision_average" | 0.49999999995 |
-```
 ...
-max_context_precision_average
-```
+***max_context_precision_average***:
 |  Parameter & Result  | Value  |
 |  :----:  | :----:  |
 | "ground_truth_file"  | "ground_truth.jsonl" |
@@ -388,6 +375,4 @@ max_context_precision_average
 | "faithfulness_average" | 0.6666666666666667 |
 | "context_recall_average" | 1.0 |
 | "context_precision_average" | 0.49999999995 |
-```
 ...
-```

From 40fc2e9cc3cc5b8ebf53f7c22af997fbb0aa2996 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 1 Apr 2024 11:32:41 +0800
Subject: [PATCH 142/151] Update README.md

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tools/evaluation/README.md                    | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
index acc59fb7ab4..67bdaebb146 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
@@ -109,6 +109,7 @@ This will run benchmark multiple times based on the following different paramete
 | 'MRR' | 0.7 |
 | 'Hit' | 0.7 |
 ......
+
 ***max_Hit***:
 |  Parameter & Result  | Value  |
 |  :----:  | :----:  |
@@ -289,6 +290,7 @@ This will run benchmark multiple times based on the following different paramete
 | "context_recall_average" | 1.0 |
 | "context_precision_average" | 0.49999999995 |
 ...
+
 ***max_faithfulness_average***:
 |  Parameter & Result  | Value  |
 |  :----:  | :----:  |
@@ -318,6 +320,7 @@ This will run benchmark multiple times based on the following different paramete
 | "context_recall_average" | 1.0 |
 | "context_precision_average" | 0.49999999995 |
 ...
+
 ***max_context_recall_average***:
 |  Parameter & Result  | Value  |
 |  :----:  | :----:  |
@@ -347,6 +350,7 @@ This will run benchmark multiple times based on the following different paramete
 | "context_recall_average" | 1.0 |
 | "context_precision_average" | 0.49999999995 |
 ...
+
 ***max_context_precision_average***:
 |  Parameter & Result  | Value  |
 |  :----:  | :----:  |

From 13bb3b8691b382967b4f20a733bb492229e42afc Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 1 Apr 2024 11:34:08 +0800
Subject: [PATCH 143/151] Update README.md

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tools/evaluation/README.md                | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
index 67bdaebb146..04f1d251d34 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
@@ -108,7 +108,8 @@ This will run benchmark multiple times based on the following different paramete
 | 'enable_rerank' | True |
 | 'MRR' | 0.7 |
 | 'Hit' | 0.7 |
-......
+
+...
 
 ***max_Hit***:
 |  Parameter & Result  | Value  |
@@ -129,6 +130,7 @@ This will run benchmark multiple times based on the following different paramete
 | 'enable_rerank' | True |
 | 'MRR' | 0.7 |
 | 'Hit' | 0.7 |
+
 ...
 
 ## 3. Rag Benchmark
@@ -289,6 +291,7 @@ This will run benchmark multiple times based on the following different paramete
 | "faithfulness_average" | 0.5083333333333333 |
 | "context_recall_average" | 1.0 |
 | "context_precision_average" | 0.49999999995 |
+
 ...
 
 ***max_faithfulness_average***:
@@ -319,6 +322,7 @@ This will run benchmark multiple times based on the following different paramete
 | "faithfulness_average" | 0.675 |
 | "context_recall_average" | 1.0 |
 | "context_precision_average" | 0.49999999995 |
+
 ...
 
 ***max_context_recall_average***:
@@ -349,6 +353,7 @@ This will run benchmark multiple times based on the following different paramete
 | "faithfulness_average" | 0.675 |
 | "context_recall_average" | 1.0 |
 | "context_precision_average" | 0.49999999995 |
+
 ...
 
 ***max_context_precision_average***:
@@ -379,4 +384,5 @@ This will run benchmark multiple times based on the following different paramete
 | "faithfulness_average" | 0.6666666666666667 |
 | "context_recall_average" | 1.0 |
 | "context_precision_average" | 0.49999999995 |
+
 ...

From 763bd1daac6f453949f140bae6654f8c821bae37 Mon Sep 17 00:00:00 2001
From: xmx-521 <1426356297@qq.com>
Date: Wed, 10 Apr 2024 17:36:04 +0800
Subject: [PATCH 144/151] add config file form rag evaluation

Signed-off-by: xmx-521 <1426356297@qq.com>
---
 .../tools/evaluation/framework/config.yaml    |  22 +++
 .../ragas_config_evaluation_benchmark.py      | 176 ++++++++++++++++++
 2 files changed, 198 insertions(+)
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/framework/config.yaml
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_config_evaluation_benchmark.py

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/config.yaml b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/config.yaml
new file mode 100644
index 00000000000..7f919dcca2f
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/config.yaml
@@ -0,0 +1,22 @@
+ground_truth_file: /home/itrex/manxin/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl
+input_path: /home/itrex/manxin/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/data.txt
+use_openai_key: false
+vector_database: Chroma
+embedding_model: facebook/opt-125m
+llm_model: facebook/opt-125m
+reranker_model: facebook/opt-125m 
+retrieval_type: [default]
+polish: [true]
+search_type: [similarity, mmr]
+k: [1]
+fetch_k: [5]
+score_threshold: [0.3]
+top_n: [1]
+enable_rerank: [true]
+max_chuck_size: [256]
+temperature: [0.01]
+top_k: [1, 3, 5]
+top_p: [0.1]
+repetition_penalty: [1.0]
+num_beams: [1]
+do_sample: [true] 
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_config_evaluation_benchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_config_evaluation_benchmark.py
new file mode 100644
index 00000000000..91c3ace236b
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_config_evaluation_benchmark.py
@@ -0,0 +1,176 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import subprocess
+import jsonlines
+import yaml
+
+def main():
+    if os.path.exists("result_ragas.jsonl"):
+        os.remove("result_ragas.jsonl")
+    script_path = 'ragas_benchmark.sh'
+    
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--config_path", type=str, default="config.yaml")
+    args = parser.parse_args()
+
+    data = read_yaml_file(args.config_path)
+    data = {k: [str(item) for item in v] if isinstance(v, list) else str(v) for k, v in data.items()}
+    arg1 = data['ground_truth_file']
+    arg2 = data['input_path']
+    arg3 = data['use_openai_key']
+    arg4 = data['vector_database']
+    arg5 = data['embedding_model']
+    arg6 = data['llm_model']
+    arg7 = data['reranker_model']
+    arg8_list = data['retrieval_type']
+    arg9_list = data['polish']
+    arg10_list = data['search_type']
+    arg11_list = data['k']
+    arg12_list = data['fetch_k']
+    arg13_list = data['score_threshold']
+    arg14_list = data['top_n']
+    arg15_list = data['enable_rerank']
+    arg16_list = data['max_chuck_size']
+    arg17_list = data['temperature']
+    arg18_list = data['top_k']
+    arg19_list = data['top_p']
+    arg20_list = data['repetition_penalty']
+    arg21_list = data['num_beams']
+    arg22_list = data['do_sample']
+
+    for arg8 in arg8_list:
+        print('--'*1 +'retrieval_type',arg8)
+        for arg9 in arg9_list:
+            print('--'*2 +'polish',arg9)
+            for arg10 in arg10_list:
+                print('--'*3 +'search_type',arg10)
+                for arg11 in arg11_list:
+                    print('--'*4 +'k',arg11)
+                    for arg12 in arg12_list:
+                        print('--'*5 +'fetch_k',arg12)
+                        for arg13 in arg13_list:
+                            print('--'*6 +'score_threshold',arg13)
+                            for arg14 in arg14_list:
+                                print('--'*7 +'top_n',arg14)
+                                for arg15 in arg15_list:
+                                    print('--'*8 +'enable_rerank',arg15)
+                                    for arg16 in arg16_list:
+                                        print('--'*9 +'max_chuck_size',arg16)
+                                        for arg17 in arg17_list:
+                                            print('--'*10 +'temperature',arg17)
+                                            for arg18 in arg18_list:
+                                                print('--'*11 +'top_k',arg18)
+                                                for arg19 in arg19_list:
+                                                    print('--'*12 +'top_p',arg19)
+                                                    for arg20 in arg20_list:
+                                                        print('--'*13 +'repetition_penalty',arg20)
+                                                        for arg21 in arg21_list:
+                                                            print('--'*14 +'num_beams',arg21)
+                                                            for arg22 in arg22_list:
+                                                                print('--'*15 +'do_sample',arg22)
+                                                                subprocess.run(['bash',
+                                                                                script_path,
+                                                                                '--ground_truth_file='+arg1,
+                                                                                '--input_path='+arg2,
+                                                                                '--use_openai_key='+arg3,
+                                                                                '--vector_database='+arg4,
+                                                                                '--embedding_model='+arg5,
+                                                                                '--llm_model='+arg6,
+                                                                                '--reranker_model='+arg7,
+                                                                                '--retrieval_type='+arg8,
+                                                                                '--polish='+arg9,
+                                                                                '--search_type='+arg10,
+                                                                                '--k='+arg11,
+                                                                                '--fetch_k='+arg12,
+                                                                                '--score_threshold='+arg13,
+                                                                                '--top_n='+arg14,
+                                                                                '--enable_rerank='+arg15,
+                                                                                '--max_chuck_size='+arg16,
+                                                                                '--temperature='+arg17,
+                                                                                '--top_k='+arg18,
+                                                                                '--top_p='+arg19,
+                                                                                '--repetition_penalty='+arg20,
+                                                                                '--num_beams='+arg21,
+                                                                                '--do_sample='+arg22],
+                                                                                stdout=subprocess.DEVNULL,
+                                                                                stderr=subprocess.DEVNULL)
+
+    file_jsonl_path='result_ragas.jsonl'
+
+    answer_relevancy_average_list = []
+    faithfulness_average_list = []
+    context_recall_average_list = []
+    context_precision_average_list = []
+
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            answer_relevancy_average=stu["answer_relevancy_average"]
+            faithfulness_average=stu["faithfulness_average"]
+            context_recall_average=stu["context_recall_average"]
+            context_precision_average=stu["context_precision_average"]
+
+            answer_relevancy_average_list.append(answer_relevancy_average)
+            faithfulness_average_list.append(faithfulness_average)
+            context_recall_average_list.append(context_recall_average)
+            context_precision_average_list.append(context_precision_average)
+
+    answer_relevancy_average_line_number_list = [i for i, v in enumerate(answer_relevancy_average_list) \
+                                                 if v == max(answer_relevancy_average_list)]
+    faithfulness_average_line_number_list = [i for i, v in enumerate(faithfulness_average_list) \
+                                             if v == max(faithfulness_average_list)]
+    context_recall_average_line_number_list = [i for i, v in enumerate(context_recall_average_list) \
+                                               if v == max(context_recall_average_list)]
+    context_precision_average_line_number_list = [i for i, v in enumerate(context_precision_average_list) \
+                                                  if v == max(context_precision_average_list)]
+
+    line=0
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            if line in answer_relevancy_average_line_number_list:
+                print('max_answer_relevancy_average',stu)
+            line+=1
+
+    line=0
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            if line in faithfulness_average_line_number_list:
+                print('max_faithfulness_average',stu)
+            line+=1
+
+    line=0
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            if line in context_recall_average_line_number_list:
+                print('max_context_recall_average',stu)
+            line+=1
+
+    line=0
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            if line in context_precision_average_line_number_list:
+                print('max_context_precision_average',stu)
+            line+=1
+
+def read_yaml_file(file_path):
+    with open(file_path, 'r') as stream:
+        try:
+            return yaml.safe_load(stream)
+        except yaml.YAMLError as exc:
+            print(exc)
+
+if __name__ == '__main__':
+    main()

From 092e951b4e13fc9ae31bc661c4724322af69e5d8 Mon Sep 17 00:00:00 2001
From: lvliang-intel <1426356297@qq.com>
Date: Mon, 15 Apr 2024 14:19:38 +0800
Subject: [PATCH 145/151] complete config superbenchmark

Signed-off-by: lvliang-intel <1426356297@qq.com>
---
 .../neural_chat/tools/evaluation/README.md    |  56 ++++++++
 ...mark.py => ragas_config_superbenchmark.py} |   0
 .../evaluation/framework/requirements.txt     |   1 +
 .../tools/evaluation/retriever/config.yaml    |  14 ++
 .../retrieval_config_superbenchmark.py        | 124 ++++++++++++++++++
 5 files changed, 195 insertions(+)
 rename intel_extension_for_transformers/neural_chat/tools/evaluation/framework/{ragas_config_evaluation_benchmark.py => ragas_config_superbenchmark.py} (100%)
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/config.yaml
 create mode 100644 intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_config_superbenchmark.py

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
index 04f1d251d34..3649dc42c05 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
@@ -133,6 +133,30 @@ This will run benchmark multiple times based on the following different paramete
 
 ...
 
+### Config SuperBenchmark
+You can also run retrieval superbenchmark by the following commands.
+```
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever
+python ragas_config_benchmark.py --config_path = /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/config.yaml
+```
+You can set the parameters in config.yaml.
+``` yaml
+index_file_jsonl_path: path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/candidate_context.jsonl
+query_file_jsonl_path: path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl
+vector_database: Chroma
+embedding_model: <embedding model name or path>
+llm_model: <llm model name or path>
+reranker_model: <reranker model name or path>
+retrieval_type: ['default','child_parent','bm25']
+polish: [True, False]
+search_type: ['similarity','mmr','similarity_score_threshold']
+k: [1, 3, 5]
+fetch_k: [5, 10, 20]
+score_threshold: [0.3, 0.5, 0.7]
+top_n: [1, 3, 5, 10]
+enable_rerank: [True, False]
+```
+
 ## 3. Rag Benchmark
 ### Installation
 Please ensure the installation of requirements for NeuralChat and retrieval plugin first by the following commands.
@@ -386,3 +410,35 @@ This will run benchmark multiple times based on the following different paramete
 | "context_precision_average" | 0.49999999995 |
 
 ...
+
+### Config SuperBenchmark
+You can also run rag superbenchmark by the following commands.
+```
+cd intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/framework
+python ragas_config_benchmark.py --config_path = /path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/config.yaml
+```
+You can set the parameters in config.yaml.
+``` yaml
+ground_truth_file: path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl
+input_path: path/to/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/data.txt
+use_openai_key: false
+vector_database: Chroma
+embedding_model: <embedding model name or path>
+llm_model: <llm model name or path>
+reranker_model: <reranker model name or path>
+retrieval_type: ['default','child_parent','bm25']
+polish: [True, False]
+search_type: ['similarity','mmr','similarity_score_threshold']
+k: [1, 3, 5]
+fetch_k: [5, 10, 20]
+score_threshold: [0.3, 0.5, 0.7]
+top_n: [1, 3, 5, 10]
+enable_rerank: [True, False]
+max_chuck_size: [256, 512, 768, 1024]
+temperature: [0.01, 0.05, 0.1, 0.3, 0.5, 0.7]
+top_k: [1, 3, 10, 20]
+top_p: [0.1, 0.3, 0.5, 0.7]
+repetition_penalty: [1.0, 1.1, 1.3, 1.5, 1.7]
+num_beams: [1, 3, 10, 20]
+do_sample: [True, False]
+```
\ No newline at end of file
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_config_evaluation_benchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_config_superbenchmark.py
similarity index 100%
rename from intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_config_evaluation_benchmark.py
rename to intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_config_superbenchmark.py
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/requirements.txt b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/requirements.txt
index 6a2850d42eb..8ef137f5450 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/requirements.txt
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/requirements.txt
@@ -3,3 +3,4 @@ intel-extension-for-transformers
 jsonlines
 ragas
 sentence-transformers==2.3.1
+pyyaml
\ No newline at end of file
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/config.yaml b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/config.yaml
new file mode 100644
index 00000000000..15c2e6411f2
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/config.yaml
@@ -0,0 +1,14 @@
+index_file_jsonl_path: /home/itrex/manxin/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/candidate_context.jsonl
+query_file_jsonl_path: /home/itrex/manxin/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl
+vector_database: Chroma
+embedding_model: facebook/opt-125m
+llm_model: facebook/opt-125m
+reranker_model: facebook/opt-125m
+retrieval_type: [default]
+polish: [true]
+search_type: [similarity, mmr]
+k: [1]
+fetch_k: [5]
+score_threshold: [0.3]
+top_n: [1]
+enable_rerank: [true]
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_config_superbenchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_config_superbenchmark.py
new file mode 100644
index 00000000000..6e603586adc
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_config_superbenchmark.py
@@ -0,0 +1,124 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import subprocess
+import jsonlines
+import yaml
+
+def main():
+    if os.path.exists("result_retrieval.jsonl"):
+        os.remove("result_retrieval.jsonl")
+    script_path = 'retrieval_benchmark.sh'
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--config_path", type=str, default="config.yaml")
+    args = parser.parse_args()
+    
+    data = read_yaml_file(args.config_path)
+    data = {k: [str(item) for item in v] if isinstance(v, list) else str(v) for k, v in data.items()}
+    args = parser.parse_args()
+    
+    data = read_yaml_file(args.config_path)
+    data = {k: [str(item) for item in v] if isinstance(v, list) else str(v) for k, v in data.items()}
+    arg1 = data['index_file_jsonl_path']
+    arg2 = data['query_file_jsonl_path']
+    arg3 = data['vector_database']
+    arg4 = data['embedding_model']
+    arg5 = data['llm_model']
+    arg6 = data['reranker_model']
+    arg7_list = data['retrieval_type']
+    arg8_list = data['polish']
+    arg9_list = data['search_type']
+    arg10_list = data['k']
+    arg11_list = data['fetch_k']
+    arg12_list = data['score_threshold']
+    arg13_list = data['top_n']
+    arg14_list = data['enable_rerank']
+
+    for arg7 in arg7_list:
+        print('--'*1 +'retrieval_type',arg7)
+        for arg8 in arg8_list:
+            print('--'*2 +'polish',arg8)
+            for arg9 in arg9_list:
+                print('--'*3 +'search_type',arg9)
+                for arg10 in arg10_list:
+                    print('--'*4 +'k',arg10)
+                    for arg11 in arg11_list:
+                        print('--'*5 +'fetch_k',arg11)
+                        for arg12 in arg12_list:
+                            print('--'*6 +'score_threshold',arg12)
+                            for arg13 in arg13_list:
+                                print('--'*7 +'top_n',arg13)
+                                for arg14 in arg14_list:
+                                    print('--'*8 +'enable_rerank',arg14)
+                                    # try:
+                                    subprocess.run(['bash',
+                                                    script_path,
+                                                    '--index_file_jsonl_path='+arg1,
+                                                    '--query_file_jsonl_path='+arg2,
+                                                    '--vector_database='+arg3,
+                                                    '--embedding_model='+arg4,
+                                                    '--llm_model='+arg5,
+                                                    '--reranker_model='+arg6,
+                                                    '--retrieval_type='+arg7,
+                                                    '--polish='+arg8,
+                                                    '--search_type='+arg9,
+                                                    '--k='+arg10,
+                                                    '--fetch_k='+arg11,
+                                                    '--score_threshold='+arg12,
+                                                    '--top_n='+arg13,
+                                                    '--enable_rerank='+arg14],
+                                                    stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+
+    file_jsonl_path='result_retrieval.jsonl'
+
+    MRR_list = []
+    Hit_list = []
+
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            MRR=stu["MRR"]
+            Hit=stu["Hit"]
+            MRR_list.append(MRR)
+            Hit_list.append(Hit)
+
+    MRR_line_number_list = [i for i, v in enumerate(MRR_list) if v == max(MRR_list)]
+    Hit_line_number_list = [i for i, v in enumerate(Hit_list) if v == max(Hit_list)]
+
+    line=0
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            if line in MRR_line_number_list:
+                print('max_MRR',stu)
+            line+=1
+
+    line=0
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            if line in Hit_line_number_list:
+                print('max_Hit',stu)
+            line+=1
+
+def read_yaml_file(file_path):
+    with open(file_path, 'r') as stream:
+        try:
+            return yaml.safe_load(stream)
+        except yaml.YAMLError as exc:
+            print(exc)
+
+if __name__ == '__main__':
+    main()
+

From e931143a67c927d0fb85a4fbe232e49ccb9ee0c0 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 15 Apr 2024 06:31:45 +0000
Subject: [PATCH 146/151] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../neural_chat/tools/evaluation/README.md    |  2 +-
 .../tools/evaluation/framework/config.yaml    | 18 ++++++++++--
 .../framework/ragas_config_superbenchmark.py  |  2 +-
 .../evaluation/framework/requirements.txt     |  2 +-
 .../tools/evaluation/retriever/config.yaml    | 14 ++++++++++
 .../retriever/evaluate_retrieval.py           |  4 +--
 .../retriever/evaluate_retrieval_benchmark.py |  4 +--
 .../retrieval_config_superbenchmark.py        |  5 ++--
 .../retriever/retrieval_superbenchmark.py     | 28 +++++++++----------
 9 files changed, 51 insertions(+), 28 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md b/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
index 3649dc42c05..84f931f8458 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/README.md
@@ -441,4 +441,4 @@ top_p: [0.1, 0.3, 0.5, 0.7]
 repetition_penalty: [1.0, 1.1, 1.3, 1.5, 1.7]
 num_beams: [1, 3, 10, 20]
 do_sample: [True, False]
-```
\ No newline at end of file
+```
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/config.yaml b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/config.yaml
index 7f919dcca2f..a97ec88bfe4 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/config.yaml
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/config.yaml
@@ -1,10 +1,24 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 ground_truth_file: /home/itrex/manxin/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl
 input_path: /home/itrex/manxin/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/data.txt
 use_openai_key: false
 vector_database: Chroma
 embedding_model: facebook/opt-125m
 llm_model: facebook/opt-125m
-reranker_model: facebook/opt-125m 
+reranker_model: facebook/opt-125m
 retrieval_type: [default]
 polish: [true]
 search_type: [similarity, mmr]
@@ -19,4 +33,4 @@ top_k: [1, 3, 5]
 top_p: [0.1]
 repetition_penalty: [1.0]
 num_beams: [1]
-do_sample: [true] 
+do_sample: [true]
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_config_superbenchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_config_superbenchmark.py
index 91c3ace236b..5d7a72cfa18 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_config_superbenchmark.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_config_superbenchmark.py
@@ -22,7 +22,7 @@ def main():
     if os.path.exists("result_ragas.jsonl"):
         os.remove("result_ragas.jsonl")
     script_path = 'ragas_benchmark.sh'
-    
+
     parser = argparse.ArgumentParser()
     parser.add_argument("--config_path", type=str, default="config.yaml")
     args = parser.parse_args()
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/requirements.txt b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/requirements.txt
index 8ef137f5450..e8bf3fd1350 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/requirements.txt
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/requirements.txt
@@ -1,6 +1,6 @@
 InstructorEmbedding
 intel-extension-for-transformers
 jsonlines
+pyyaml
 ragas
 sentence-transformers==2.3.1
-pyyaml
\ No newline at end of file
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/config.yaml b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/config.yaml
index 15c2e6411f2..7b293c3e442 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/config.yaml
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/config.yaml
@@ -1,3 +1,17 @@
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 index_file_jsonl_path: /home/itrex/manxin/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/candidate_context.jsonl
 query_file_jsonl_path: /home/itrex/manxin/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl
 vector_database: Chroma
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval.py
index c4fe8303530..4a8349bbde7 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval.py
@@ -77,9 +77,7 @@ def load_list(file_jsonl_path, item):
     return data
 
 def evaluate(preds, labels, cutoffs=[1,5]):
-    """
-    Evaluate MRR and Hit at cutoffs.
-    """
+    """Evaluate MRR and Hit at cutoffs."""
     metrics = {}
 
     # MRR
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_benchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_benchmark.py
index ac00952180f..ebf2fd2e9d5 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_benchmark.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_benchmark.py
@@ -80,9 +80,7 @@ def load_list(file_jsonl_path, item):
     return data
 
 def evaluate(preds, labels, cutoffs=[1]):
-    """
-    Evaluate MRR and Hit at cutoffs.
-    """
+    """Evaluate MRR and Hit at cutoffs."""
     metrics = {}
 
     # MRR
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_config_superbenchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_config_superbenchmark.py
index 6e603586adc..50c17777ff3 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_config_superbenchmark.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_config_superbenchmark.py
@@ -26,11 +26,11 @@ def main():
     parser = argparse.ArgumentParser()
     parser.add_argument("--config_path", type=str, default="config.yaml")
     args = parser.parse_args()
-    
+
     data = read_yaml_file(args.config_path)
     data = {k: [str(item) for item in v] if isinstance(v, list) else str(v) for k, v in data.items()}
     args = parser.parse_args()
-    
+
     data = read_yaml_file(args.config_path)
     data = {k: [str(item) for item in v] if isinstance(v, list) else str(v) for k, v in data.items()}
     arg1 = data['index_file_jsonl_path']
@@ -121,4 +121,3 @@ def read_yaml_file(file_path):
 
 if __name__ == '__main__':
     main()
-
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_superbenchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_superbenchmark.py
index edcaa85c8c1..5a5d3067a6e 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_superbenchmark.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_superbenchmark.py
@@ -1,17 +1,17 @@
-# Copyright (c) 2024 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import argparse
 import os
 import subprocess

From 895075beb2a2a64d56fe3934abcfdf8a0dda1d25 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 8 May 2024 01:41:51 +0000
Subject: [PATCH 147/151] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../neural_chat/tests/requirements.txt        |   2 +-
 .../evaluation/data_augmentation/data.txt     |  20 +-
 .../evaluation/data_augmentation/hn_mine.py   | 198 ++---
 .../llm_generate_raw_data.py                  | 230 +++---
 .../data_augmentation/llm_generate_truth.py   | 294 +++----
 .../mine_hard_negatives_check_similarity.py   | 132 +--
 .../data_augmentation/requirements_cpu.txt    |   4 +-
 .../retrieval_dataset_construction.py         | 300 +++----
 .../framework/ragas_superbenchmark.py         | 350 ++++----
 .../retriever/evaluate_retrieval.py           | 568 ++++++-------
 .../retriever/evaluate_retrieval_benchmark.py | 750 +++++++++---------
 .../retriever/retrieval_superbenchmark.py     | 232 +++---
 12 files changed, 1540 insertions(+), 1540 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tests/requirements.txt b/intel_extension_for_transformers/neural_chat/tests/requirements.txt
index b65b965b35f..4f5177133b9 100644
--- a/intel_extension_for_transformers/neural_chat/tests/requirements.txt
+++ b/intel_extension_for_transformers/neural_chat/tests/requirements.txt
@@ -64,8 +64,8 @@ pypinyin
 python-docx
 python-multipart
 pyyaml
-ragas
 qdrant-client==1.8.2
+ragas
 rank_bm25
 resampy==0.3.1
 rouge_score
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/data.txt b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/data.txt
index 46f0e982af6..b468c8b9399 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/data.txt
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/data.txt
@@ -1,10 +1,10 @@
-We aim to deliver open software and hardware platforms with industry-defining standards.
-Our world-class talent is at the heart of everything we do. Together we strive to have a positive effect on business, society, and the planet.
-We invest in public and private companies and do not always realize a return on our investments.
-The past several years demonstrated just how much technology is increasingly central to every aspect of our lives, all of which depends on semiconductors.
-With our focus on delivering leadership products, open and secure platforms and resilient manufacturing, Intel has the right strategy in place to enable this global digitalization and fuel customer success.
-The COVID-19 pandemic could materially adversely affect our financial condition and results of operations.
-Theft, loss, or misuse of personal data about our employees, customers, or other third parties could increase our expenses, damage our reputation, or result in legal or regulatory proceedings.
-We rely on access to third-party IP, which may not be available to us on commercially reasonable terms or at all.
-We receive a significant portion of our revenue from a limited number of customers.
-Intel plans to regain transistor performance and power performance leadership by 2025, and we remain on track to deliver on our goal of five manufacturing technology nodes in four years.
\ No newline at end of file
+We aim to deliver open software and hardware platforms with industry-defining standards.
+Our world-class talent is at the heart of everything we do. Together we strive to have a positive effect on business, society, and the planet.
+We invest in public and private companies and do not always realize a return on our investments.
+The past several years demonstrated just how much technology is increasingly central to every aspect of our lives, all of which depends on semiconductors.
+With our focus on delivering leadership products, open and secure platforms and resilient manufacturing, Intel has the right strategy in place to enable this global digitalization and fuel customer success.
+The COVID-19 pandemic could materially adversely affect our financial condition and results of operations.
+Theft, loss, or misuse of personal data about our employees, customers, or other third parties could increase our expenses, damage our reputation, or result in legal or regulatory proceedings.
+We rely on access to third-party IP, which may not be available to us on commercially reasonable terms or at all.
+We receive a significant portion of our revenue from a limited number of customers.
+Intel plans to regain transistor performance and power performance leadership by 2025, and we remain on track to deliver on our goal of five manufacturing technology nodes in four years.
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/hn_mine.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/hn_mine.py
index 73f49914396..8aa51306950 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/hn_mine.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/hn_mine.py
@@ -1,99 +1,99 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2023 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import json
-import random
-import numpy as np
-import faiss
-from tqdm import tqdm
-
-def create_index(embeddings, use_gpu):
-    index = faiss.IndexFlatIP(len(embeddings[0]))
-    embeddings = np.asarray(embeddings, dtype=np.float32)
-    if use_gpu:
-        co = faiss.GpuMultipleClonerOptions() # pylint: disable=E1101
-        co.shard = True
-        co.useFloat16 = True
-        index = faiss.index_cpu_to_all_gpus(index, co=co)
-    else:
-        pass
-    index.add(embeddings)
-    return index
-
-def batch_search(index,
-                 query,
-                 topk: int = 200,
-                 batch_size: int = 64):
-    all_scores, all_inxs = [], []
-    for start_index in tqdm(range(0, len(query), batch_size), desc="Batches", disable=len(query) < 256):
-        batch_query = query[start_index:start_index + batch_size]
-        batch_scores, batch_inxs = index.search(np.asarray(batch_query, dtype=np.float32), k=topk)
-        all_scores.extend(batch_scores.tolist())
-        all_inxs.extend(batch_inxs.tolist())
-    return all_scores, all_inxs
-
-def get_corpus(candidate_pool):
-    corpus = []
-    for line in open(candidate_pool):
-        line = json.loads(line.strip())
-        corpus.append(line['text'])
-    return corpus
-
-def find_knn_neg(model, input_file, candidate_pool, output_file, sample_range, negative_number, use_gpu):
-    corpus = []
-    queries = []
-    train_data = []
-    for line in open(input_file):
-        line = json.loads(line.strip())
-        train_data.append(line)
-        corpus.extend(line['pos'])
-        if 'neg' in line:
-            corpus.extend(line['neg'])
-        queries.append(line['query'])
-
-    if candidate_pool is not None:
-        if not isinstance(candidate_pool, list):
-            candidate_pool = get_corpus(candidate_pool)
-        corpus = list(set(candidate_pool))
-    else:
-        corpus = list(set(corpus))
-
-    p_vecs = model.encode(corpus, batch_size=256)
-    q_vecs = model.encode(queries, batch_size=256)
-
-    index = create_index(p_vecs, use_gpu=use_gpu)
-    _, all_inxs = batch_search(index, q_vecs, topk=sample_range[-1])
-    assert len(all_inxs) == len(train_data)
-
-    for i, data in enumerate(train_data):
-        query = data['query']
-        inxs = all_inxs[i][sample_range[0]:sample_range[1]]
-        filtered_inx = []
-        for inx in inxs:
-            if inx == -1: break
-            if corpus[inx] not in data['pos'] and corpus[inx] != query:
-                filtered_inx.append(inx)
-
-        if len(filtered_inx) > negative_number:
-            filtered_inx = random.sample(filtered_inx, negative_number)
-        data['neg'] = [corpus[inx] for inx in filtered_inx]
-
-    with open(output_file, 'w') as f:
-        for data in train_data:
-            if len(data['neg']) < negative_number:
-                data['neg'].extend(random.sample(corpus, negative_number - len(data['neg'])))
-            f.write(json.dumps(data, ensure_ascii=False) + '\n')
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import random
+import numpy as np
+import faiss
+from tqdm import tqdm
+
+def create_index(embeddings, use_gpu):
+    index = faiss.IndexFlatIP(len(embeddings[0]))
+    embeddings = np.asarray(embeddings, dtype=np.float32)
+    if use_gpu:
+        co = faiss.GpuMultipleClonerOptions() # pylint: disable=E1101
+        co.shard = True
+        co.useFloat16 = True
+        index = faiss.index_cpu_to_all_gpus(index, co=co)
+    else:
+        pass
+    index.add(embeddings)
+    return index
+
+def batch_search(index,
+                 query,
+                 topk: int = 200,
+                 batch_size: int = 64):
+    all_scores, all_inxs = [], []
+    for start_index in tqdm(range(0, len(query), batch_size), desc="Batches", disable=len(query) < 256):
+        batch_query = query[start_index:start_index + batch_size]
+        batch_scores, batch_inxs = index.search(np.asarray(batch_query, dtype=np.float32), k=topk)
+        all_scores.extend(batch_scores.tolist())
+        all_inxs.extend(batch_inxs.tolist())
+    return all_scores, all_inxs
+
+def get_corpus(candidate_pool):
+    corpus = []
+    for line in open(candidate_pool):
+        line = json.loads(line.strip())
+        corpus.append(line['text'])
+    return corpus
+
+def find_knn_neg(model, input_file, candidate_pool, output_file, sample_range, negative_number, use_gpu):
+    corpus = []
+    queries = []
+    train_data = []
+    for line in open(input_file):
+        line = json.loads(line.strip())
+        train_data.append(line)
+        corpus.extend(line['pos'])
+        if 'neg' in line:
+            corpus.extend(line['neg'])
+        queries.append(line['query'])
+
+    if candidate_pool is not None:
+        if not isinstance(candidate_pool, list):
+            candidate_pool = get_corpus(candidate_pool)
+        corpus = list(set(candidate_pool))
+    else:
+        corpus = list(set(corpus))
+
+    p_vecs = model.encode(corpus, batch_size=256)
+    q_vecs = model.encode(queries, batch_size=256)
+
+    index = create_index(p_vecs, use_gpu=use_gpu)
+    _, all_inxs = batch_search(index, q_vecs, topk=sample_range[-1])
+    assert len(all_inxs) == len(train_data)
+
+    for i, data in enumerate(train_data):
+        query = data['query']
+        inxs = all_inxs[i][sample_range[0]:sample_range[1]]
+        filtered_inx = []
+        for inx in inxs:
+            if inx == -1: break
+            if corpus[inx] not in data['pos'] and corpus[inx] != query:
+                filtered_inx.append(inx)
+
+        if len(filtered_inx) > negative_number:
+            filtered_inx = random.sample(filtered_inx, negative_number)
+        data['neg'] = [corpus[inx] for inx in filtered_inx]
+
+    with open(output_file, 'w') as f:
+        for data in train_data:
+            if len(data['neg']) < negative_number:
+                data['neg'].extend(random.sample(corpus, negative_number - len(data['neg'])))
+            f.write(json.dumps(data, ensure_ascii=False) + '\n')
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py
index 7f33eecbabd..c0a613577ea 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_raw_data.py
@@ -1,115 +1,115 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2023 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import torch
-from modelscope import AutoModelForCausalLM, AutoTokenizer  # pylint: disable=E0401
-import jsonlines
-import os, re
-from typing import List
-from intel_extension_for_transformers.neural_chat.pipeline.plugins.retrieval.parser.parser import DocumentParser
-import logging
-from intel_extension_for_transformers.neural_chat.prompts.prompt import QUERYGENERATE_PROMPT
-from transformers import GenerationConfig
-
-logging.basicConfig(
-    format="%(asctime)s %(name)s:%(levelname)s:%(message)s",
-    datefmt="%d-%M-%Y %H:%M:%S",
-    level=logging.INFO
-)
-
-device = "cuda" if torch.cuda.is_available() else "cpu"
-
-def document_append(data_collection):
-    documents = []
-    for data, metadata in data_collection:
-        if len(data) < 5:
-            continue
-        documents.append(data)
-    return documents
-
-def raw_data_generate(model_id,
-                      input_path,
-                      file_json_path,
-                      temperature,
-                      top_p,
-                      top_k,
-                      repetition_penalty,
-                      max_new_tokens,
-                      do_sample,
-                      num_beams,
-                      num_return_sequences,
-                      use_cache):
-   tokenizer = AutoTokenizer.from_pretrained(model_id)
-   model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto', torch_dtype=torch.float16)
-   data_collection = DocumentParser().load(input=input_path)
-   documents = document_append(data_collection)
-
-   generation_config = GenerationConfig(
-   temperature = temperature,
-   top_p = top_p,
-   top_k = top_k,
-   repetition_penalty = repetition_penalty,
-   max_new_tokens = max_new_tokens,
-   do_sample = do_sample,
-   num_beams = num_beams,
-   num_return_sequences = num_return_sequences,
-   use_cache = use_cache,
-   pad_token_id=tokenizer.eos_token_id
-   )
-
-   for i in range(len(documents)):
-      context = documents[i]
-
-      if context:
-         input = QUERYGENERATE_PROMPT.format(context=context)
-         if device=="cpu":
-            model_input = tokenizer(input, return_tensors="pt")
-         elif device=="cuda":
-            model_input = tokenizer(input, return_tensors="pt").to("cuda")
-         model.eval()
-         result = []
-
-         for j in range(5):
-            with torch.no_grad():
-               res = model.generate(**model_input, generation_config=generation_config)[0]
-               res=tokenizer.decode(res, skip_special_tokens=True)
-
-            res = res[res.find('Generated questions:') :]
-            res = re.sub('Generated questions:', '', res)
-            res = re.sub('---', '', res)
-
-            res = res.split("?")[0:2]
-            for r in res:
-               r = r.replace('1.', "").replace('2.', "")
-               r = r.replace('Evaluation:', "")
-               r = r.replace('#', " ").replace(r'\t', " ").replace('\n', ' ').replace('\n\n', ' ').strip()
-               r = r + '?'
-               result.append(r)
-
-         result_str=''
-         result_set = list(set(result))
-         for k in range(len(result_set)):
-            result_str = result_str + str(k) + '. '+ result_set[k]
-
-         if result_str and result_str.isspace()==False:
-            data = {
-                     "query": result_str,
-                     "pos": [context],
-               }
-            with jsonlines.open(file_json_path,"a") as file_json:
-                  file_json.write(data)
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import torch
+from modelscope import AutoModelForCausalLM, AutoTokenizer  # pylint: disable=E0401
+import jsonlines
+import os, re
+from typing import List
+from intel_extension_for_transformers.neural_chat.pipeline.plugins.retrieval.parser.parser import DocumentParser
+import logging
+from intel_extension_for_transformers.neural_chat.prompts.prompt import QUERYGENERATE_PROMPT
+from transformers import GenerationConfig
+
+logging.basicConfig(
+    format="%(asctime)s %(name)s:%(levelname)s:%(message)s",
+    datefmt="%d-%M-%Y %H:%M:%S",
+    level=logging.INFO
+)
+
+device = "cuda" if torch.cuda.is_available() else "cpu"
+
+def document_append(data_collection):
+    documents = []
+    for data, metadata in data_collection:
+        if len(data) < 5:
+            continue
+        documents.append(data)
+    return documents
+
+def raw_data_generate(model_id,
+                      input_path,
+                      file_json_path,
+                      temperature,
+                      top_p,
+                      top_k,
+                      repetition_penalty,
+                      max_new_tokens,
+                      do_sample,
+                      num_beams,
+                      num_return_sequences,
+                      use_cache):
+   tokenizer = AutoTokenizer.from_pretrained(model_id)
+   model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto', torch_dtype=torch.float16)
+   data_collection = DocumentParser().load(input=input_path)
+   documents = document_append(data_collection)
+
+   generation_config = GenerationConfig(
+   temperature = temperature,
+   top_p = top_p,
+   top_k = top_k,
+   repetition_penalty = repetition_penalty,
+   max_new_tokens = max_new_tokens,
+   do_sample = do_sample,
+   num_beams = num_beams,
+   num_return_sequences = num_return_sequences,
+   use_cache = use_cache,
+   pad_token_id=tokenizer.eos_token_id
+   )
+
+   for i in range(len(documents)):
+      context = documents[i]
+
+      if context:
+         input = QUERYGENERATE_PROMPT.format(context=context)
+         if device=="cpu":
+            model_input = tokenizer(input, return_tensors="pt")
+         elif device=="cuda":
+            model_input = tokenizer(input, return_tensors="pt").to("cuda")
+         model.eval()
+         result = []
+
+         for j in range(5):
+            with torch.no_grad():
+               res = model.generate(**model_input, generation_config=generation_config)[0]
+               res=tokenizer.decode(res, skip_special_tokens=True)
+
+            res = res[res.find('Generated questions:') :]
+            res = re.sub('Generated questions:', '', res)
+            res = re.sub('---', '', res)
+
+            res = res.split("?")[0:2]
+            for r in res:
+               r = r.replace('1.', "").replace('2.', "")
+               r = r.replace('Evaluation:', "")
+               r = r.replace('#', " ").replace(r'\t', " ").replace('\n', ' ').replace('\n\n', ' ').strip()
+               r = r + '?'
+               result.append(r)
+
+         result_str=''
+         result_set = list(set(result))
+         for k in range(len(result_set)):
+            result_str = result_str + str(k) + '. '+ result_set[k]
+
+         if result_str and result_str.isspace()==False:
+            data = {
+                     "query": result_str,
+                     "pos": [context],
+               }
+            with jsonlines.open(file_json_path,"a") as file_json:
+                  file_json.write(data)
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_truth.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_truth.py
index 89e6c985c10..6edd4942796 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_truth.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/llm_generate_truth.py
@@ -1,147 +1,147 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2023 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import torch
-from modelscope import AutoModelForCausalLM, AutoTokenizer  # pylint: disable=E0401
-import jsonlines
-import re
-import logging
-from intel_extension_for_transformers.neural_chat.prompts.prompt import TRUTHGENERATE_PROMPT
-from transformers import GenerationConfig
-import argparse
-
-logging.basicConfig(
-    format="%(asctime)s %(name)s:%(levelname)s:%(message)s",
-    datefmt="%d-%M-%Y %H:%M:%S",
-    level=logging.INFO
-)
-
-device = "cuda" if torch.cuda.is_available() else "cpu"
-
-def document_set(document_file_jsonl_path):
-    document_list = []
-    with open(document_file_jsonl_path) as file:
-        for stu in jsonlines.Reader(file):
-            passages=[stu["query"],stu["pos"][0]]
-            document_list.append(passages)
-    return document_list
-
-def raw_data_generate(model_id,
-                      base_dir,
-                      file_json_path,
-                      temperature,
-                      top_p,
-                      top_k,
-                      repetition_penalty,
-                      max_new_tokens,
-                      do_sample,
-                      num_beams,
-                      num_return_sequences,
-                      use_cache):
-   tokenizer = AutoTokenizer.from_pretrained(model_id)
-   model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto', torch_dtype=torch.float16)
-   documents = document_set(base_dir)
-   generation_config = GenerationConfig(
-   temperature = temperature,
-   top_p = top_p,
-   top_k = top_k,
-   repetition_penalty = repetition_penalty,
-   max_new_tokens = max_new_tokens,
-   do_sample = do_sample,
-   num_beams = num_beams,
-   num_return_sequences = num_return_sequences,
-   use_cache = use_cache,
-   pad_token_id=tokenizer.eos_token_id
-   )
-
-   for i in range(len(documents)):
-      [question, context] = documents[i]
-
-      if context:
-         input = TRUTHGENERATE_PROMPT.format(question=question,context=context)
-         if device=="cpu":
-            model_input = tokenizer(input, return_tensors="pt")
-         elif device=="cuda":
-            model_input = tokenizer(input, return_tensors="pt").to("cuda")
-         model.eval()
-
-         with torch.no_grad():
-            res = model.generate(**model_input, generation_config=generation_config)[0]
-            res=tokenizer.decode(res, skip_special_tokens=True)
-
-         res = res[res.find('Generated ground_truth:') :]
-         res = re.sub('Generated ground_truth:', '', res)
-         res = re.sub('---', '', res)
-
-         result_str=res.replace('#', " ").replace(r'\t', " ").replace('\n', ' ').replace('\n\n', ' ').strip()
-
-         if result_str and result_str.isspace()==False:
-            data = {
-                     "question": question,
-                     "context": [context],
-                     "ground_truth": result_str,
-               }
-            with jsonlines.open(file_json_path,"a") as file_json:
-                  file_json.write(data)
-
-def main():
-   parser = argparse.ArgumentParser()
-   parser.add_argument("--llm_model", type=str)
-   parser.add_argument("--input", type=str)
-   parser.add_argument("--output", type=str)
-
-   parser.add_argument("--temperature", type=float, default=0.8)
-   parser.add_argument("--top_p", type=float, default=0.9)
-   parser.add_argument("--top_k", type=int, default=40)
-   parser.add_argument("--repetition_penalty", type=float, default=2.0)
-   parser.add_argument("--max_new_tokens", type=int, default=48)
-   parser.add_argument("--do_sample", type=bool, default=True)
-   parser.add_argument("--num_beams", type=int, default=2)
-   parser.add_argument("--num_return_sequences", type=int, default=2)
-   parser.add_argument("--use_cache", type=bool, default=True)
-
-   args = parser.parse_args()
-
-   llm_model = args.llm_model
-   input = args.input
-   output = args.output
-
-   temperature = args.temperature
-   top_p = args.top_p
-   top_k = args.top_k
-   repetition_penalty = args.repetition_penalty
-   max_new_tokens = args.max_new_tokens
-   do_sample = args.do_sample
-   num_beams = args.num_beams
-   num_return_sequences = args.num_return_sequences
-   use_cache = args.use_cache
-
-   raw_data_generate(llm_model,
-                     input,
-                     output,
-                     temperature,
-                     top_p,
-                     top_k,
-                     repetition_penalty,
-                     max_new_tokens,
-                     do_sample,
-                     num_beams,
-                     num_return_sequences,
-                     use_cache)
-
-if __name__ == '__main__':
-    main()
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+from modelscope import AutoModelForCausalLM, AutoTokenizer  # pylint: disable=E0401
+import jsonlines
+import re
+import logging
+from intel_extension_for_transformers.neural_chat.prompts.prompt import TRUTHGENERATE_PROMPT
+from transformers import GenerationConfig
+import argparse
+
+logging.basicConfig(
+    format="%(asctime)s %(name)s:%(levelname)s:%(message)s",
+    datefmt="%d-%M-%Y %H:%M:%S",
+    level=logging.INFO
+)
+
+device = "cuda" if torch.cuda.is_available() else "cpu"
+
+def document_set(document_file_jsonl_path):
+    document_list = []
+    with open(document_file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            passages=[stu["query"],stu["pos"][0]]
+            document_list.append(passages)
+    return document_list
+
+def raw_data_generate(model_id,
+                      base_dir,
+                      file_json_path,
+                      temperature,
+                      top_p,
+                      top_k,
+                      repetition_penalty,
+                      max_new_tokens,
+                      do_sample,
+                      num_beams,
+                      num_return_sequences,
+                      use_cache):
+   tokenizer = AutoTokenizer.from_pretrained(model_id)
+   model = AutoModelForCausalLM.from_pretrained(model_id, device_map='auto', torch_dtype=torch.float16)
+   documents = document_set(base_dir)
+   generation_config = GenerationConfig(
+   temperature = temperature,
+   top_p = top_p,
+   top_k = top_k,
+   repetition_penalty = repetition_penalty,
+   max_new_tokens = max_new_tokens,
+   do_sample = do_sample,
+   num_beams = num_beams,
+   num_return_sequences = num_return_sequences,
+   use_cache = use_cache,
+   pad_token_id=tokenizer.eos_token_id
+   )
+
+   for i in range(len(documents)):
+      [question, context] = documents[i]
+
+      if context:
+         input = TRUTHGENERATE_PROMPT.format(question=question,context=context)
+         if device=="cpu":
+            model_input = tokenizer(input, return_tensors="pt")
+         elif device=="cuda":
+            model_input = tokenizer(input, return_tensors="pt").to("cuda")
+         model.eval()
+
+         with torch.no_grad():
+            res = model.generate(**model_input, generation_config=generation_config)[0]
+            res=tokenizer.decode(res, skip_special_tokens=True)
+
+         res = res[res.find('Generated ground_truth:') :]
+         res = re.sub('Generated ground_truth:', '', res)
+         res = re.sub('---', '', res)
+
+         result_str=res.replace('#', " ").replace(r'\t', " ").replace('\n', ' ').replace('\n\n', ' ').strip()
+
+         if result_str and result_str.isspace()==False:
+            data = {
+                     "question": question,
+                     "context": [context],
+                     "ground_truth": result_str,
+               }
+            with jsonlines.open(file_json_path,"a") as file_json:
+                  file_json.write(data)
+
+def main():
+   parser = argparse.ArgumentParser()
+   parser.add_argument("--llm_model", type=str)
+   parser.add_argument("--input", type=str)
+   parser.add_argument("--output", type=str)
+
+   parser.add_argument("--temperature", type=float, default=0.8)
+   parser.add_argument("--top_p", type=float, default=0.9)
+   parser.add_argument("--top_k", type=int, default=40)
+   parser.add_argument("--repetition_penalty", type=float, default=2.0)
+   parser.add_argument("--max_new_tokens", type=int, default=48)
+   parser.add_argument("--do_sample", type=bool, default=True)
+   parser.add_argument("--num_beams", type=int, default=2)
+   parser.add_argument("--num_return_sequences", type=int, default=2)
+   parser.add_argument("--use_cache", type=bool, default=True)
+
+   args = parser.parse_args()
+
+   llm_model = args.llm_model
+   input = args.input
+   output = args.output
+
+   temperature = args.temperature
+   top_p = args.top_p
+   top_k = args.top_k
+   repetition_penalty = args.repetition_penalty
+   max_new_tokens = args.max_new_tokens
+   do_sample = args.do_sample
+   num_beams = args.num_beams
+   num_return_sequences = args.num_return_sequences
+   use_cache = args.use_cache
+
+   raw_data_generate(llm_model,
+                     input,
+                     output,
+                     temperature,
+                     top_p,
+                     top_k,
+                     repetition_penalty,
+                     max_new_tokens,
+                     do_sample,
+                     num_beams,
+                     num_return_sequences,
+                     use_cache)
+
+if __name__ == '__main__':
+    main()
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/mine_hard_negatives_check_similarity.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/mine_hard_negatives_check_similarity.py
index 78848a1fb19..eaba94528a2 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/mine_hard_negatives_check_similarity.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/mine_hard_negatives_check_similarity.py
@@ -1,66 +1,66 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2023 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import jsonlines
-from .hn_mine import find_knn_neg
-from sentence_transformers import SentenceTransformer
-
-def mine_hard_negatives(model_name_or_path,
-                        input_file,
-                        output_file,
-                        range_for_sampling,
-                        negative_number,
-                        use_gpu_for_searching):
-   candidate_pool=None
-
-   sample_range = range_for_sampling.split('-')
-   sample_range = [int(x) for x in sample_range]
-
-   model = SentenceTransformer(model_name_or_path)
-
-   find_knn_neg(model,
-               input_file=input_file,
-               candidate_pool=candidate_pool,
-               output_file=output_file,
-               sample_range=sample_range,
-               negative_number=negative_number,
-               use_gpu=use_gpu_for_searching)
-
-def similarity_score(queries,passages,model_name_or_path):
-   queries = [queries]
-   passages = passages
-   instruction = ""
-   model = SentenceTransformer(model_name_or_path)
-   q_embeddings = model.encode([instruction+q for q in queries], normalize_embeddings=True)
-   p_embeddings = model.encode(passages, normalize_embeddings=True)
-   similarity_score =  q_embeddings @ p_embeddings.T
-   return similarity_score
-
-def similarity_check(file_jsonl_path,file_json_split_path,model_name_or_path, similarity_threshold):
-   with open(file_jsonl_path) as file:
-      for stu in jsonlines.Reader(file):
-         stu["query"]=stu["query"].split("?")[:-1]
-         for i in range(len(stu["query"])):
-               stu["query"][i]=stu["query"][i].lstrip('0123456789-. ')+ '?'
-               if similarity_score(stu["query"][i],stu["pos"],model_name_or_path) >= similarity_threshold:
-                  data = {
-                        "query": stu["query"][i],
-                        "pos": stu["pos"],
-                        "neg": stu["neg"],
-                     }
-                  with jsonlines.open(file_json_split_path,"a") as file_json:
-                     file_json.write(data)
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import jsonlines
+from .hn_mine import find_knn_neg
+from sentence_transformers import SentenceTransformer
+
+def mine_hard_negatives(model_name_or_path,
+                        input_file,
+                        output_file,
+                        range_for_sampling,
+                        negative_number,
+                        use_gpu_for_searching):
+   candidate_pool=None
+
+   sample_range = range_for_sampling.split('-')
+   sample_range = [int(x) for x in sample_range]
+
+   model = SentenceTransformer(model_name_or_path)
+
+   find_knn_neg(model,
+               input_file=input_file,
+               candidate_pool=candidate_pool,
+               output_file=output_file,
+               sample_range=sample_range,
+               negative_number=negative_number,
+               use_gpu=use_gpu_for_searching)
+
+def similarity_score(queries,passages,model_name_or_path):
+   queries = [queries]
+   passages = passages
+   instruction = ""
+   model = SentenceTransformer(model_name_or_path)
+   q_embeddings = model.encode([instruction+q for q in queries], normalize_embeddings=True)
+   p_embeddings = model.encode(passages, normalize_embeddings=True)
+   similarity_score =  q_embeddings @ p_embeddings.T
+   return similarity_score
+
+def similarity_check(file_jsonl_path,file_json_split_path,model_name_or_path, similarity_threshold):
+   with open(file_jsonl_path) as file:
+      for stu in jsonlines.Reader(file):
+         stu["query"]=stu["query"].split("?")[:-1]
+         for i in range(len(stu["query"])):
+               stu["query"][i]=stu["query"][i].lstrip('0123456789-. ')+ '?'
+               if similarity_score(stu["query"][i],stu["pos"],model_name_or_path) >= similarity_threshold:
+                  data = {
+                        "query": stu["query"][i],
+                        "pos": stu["pos"],
+                        "neg": stu["neg"],
+                     }
+                  with jsonlines.open(file_json_split_path,"a") as file_json:
+                     file_json.write(data)
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/requirements_cpu.txt b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/requirements_cpu.txt
index e0841115212..85afbe99ffa 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/requirements_cpu.txt
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/requirements_cpu.txt
@@ -1,2 +1,2 @@
-faiss-cpu
-modelscope
+faiss-cpu
+modelscope
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py
index 3c9a5424c61..095a5aff944 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/retrieval_dataset_construction.py
@@ -1,150 +1,150 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2023 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from .llm_generate_raw_data import raw_data_generate
-from .mine_hard_negatives_check_similarity import mine_hard_negatives, similarity_check
-import argparse
-import os
-
-def construct_retrieval_dataset(
-      llm_model,
-      embedding_model,
-      input,
-      output,
-      temperature,
-      top_p,
-      top_k,
-      repetition_penalty,
-      max_new_tokens,
-      do_sample,
-      num_beams,
-      num_return_sequences,
-      use_cache,
-      range_for_sampling,
-      negative_number,
-      use_gpu_for_searching,
-      similarity_threshold):
-
-   output_path=output+'/raw.jsonl'
-   raw_data_generate(llm_model,
-                     input,
-                     output_path,
-                     temperature,
-                     top_p,
-                     top_k,
-                     repetition_penalty,
-                     max_new_tokens,
-                     do_sample,
-                     num_beams,
-                     num_return_sequences,
-                     use_cache)
-
-   output_hn_path=output+'/minedHN.jsonl'
-   mine_hard_negatives(embedding_model,
-                       output_path,
-                       output_hn_path,
-                       range_for_sampling,
-                       negative_number,
-                       use_gpu_for_searching)
-
-   output_json_split_path = output+"/minedHN_split.jsonl"
-   similarity_check(output_hn_path,
-                    output_json_split_path,
-                    embedding_model,
-                    similarity_threshold)
-
-
-def main():
-   parser = argparse.ArgumentParser()
-   parser.add_argument("--llm_model", type=str)
-   parser.add_argument("--embedding_model", type=str)
-   parser.add_argument("--input", type=str)
-   parser.add_argument("--output", type=str, default='./data')
-
-   parser.add_argument("--temperature", type=float, default=0.8)
-   parser.add_argument("--top_p", type=float, default=0.9)
-   parser.add_argument("--top_k", type=int, default=40)
-   parser.add_argument("--repetition_penalty", type=float, default=2.0)
-   parser.add_argument("--max_new_tokens", type=int, default=48)
-   parser.add_argument("--do_sample", type=bool, default=True)
-   parser.add_argument("--num_beams", type=int, default=2)
-   parser.add_argument("--num_return_sequences", type=int, default=2)
-   parser.add_argument("--use_cache", type=bool, default=True)
-
-   parser.add_argument("--range_for_sampling", type=str, default='2-10')
-   parser.add_argument("--negative_number", type=int, default=5)
-   parser.add_argument("--use_gpu_for_searching", type=bool, default=False)
-
-   parser.add_argument("--similarity_threshold", type=float, default=0.6)
-
-   args = parser.parse_args()
-
-   llm_model = args.llm_model
-   embedding_model = args.embedding_model
-   input = args.input
-   output = args.output
-
-   temperature = args.temperature
-   top_p = args.top_p
-   top_k = args.top_k
-   repetition_penalty = args.repetition_penalty
-   max_new_tokens = args.max_new_tokens
-   do_sample = args.do_sample
-   num_beams = args.num_beams
-   num_return_sequences = args.num_return_sequences
-   use_cache = args.use_cache
-
-   range_for_sampling=args.range_for_sampling
-   negative_number=args.negative_number
-   use_gpu_for_searching=args.use_gpu_for_searching
-
-   similarity_threshold=args.similarity_threshold
-
-   try:
-      if os.path.exists(output) == False:
-         os.mkdir(output)
-      else:
-         if os.path.exists(output+'/raw.jsonl'):
-            os.remove(output+'/raw.jsonl')
-         if os.path.exists(output+'/minedHN.jsonl'):
-            os.remove(output+'/minedHN.jsonl')
-         if os.path.exists(output+'/minedHN_split.jsonl'):
-            os.remove(output+'/minedHN_split.jsonl')
-   except:
-      pass
-
-   construct_retrieval_dataset(
-      llm_model,
-      embedding_model,
-      input,
-      output,
-      temperature,
-      top_p,
-      top_k,
-      repetition_penalty,
-      max_new_tokens,
-      do_sample,
-      num_beams,
-      num_return_sequences,
-      use_cache,
-      range_for_sampling,
-      negative_number,
-      use_gpu_for_searching,
-      similarity_threshold)
-
-if __name__ == '__main__':
-    main()
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .llm_generate_raw_data import raw_data_generate
+from .mine_hard_negatives_check_similarity import mine_hard_negatives, similarity_check
+import argparse
+import os
+
+def construct_retrieval_dataset(
+      llm_model,
+      embedding_model,
+      input,
+      output,
+      temperature,
+      top_p,
+      top_k,
+      repetition_penalty,
+      max_new_tokens,
+      do_sample,
+      num_beams,
+      num_return_sequences,
+      use_cache,
+      range_for_sampling,
+      negative_number,
+      use_gpu_for_searching,
+      similarity_threshold):
+
+   output_path=output+'/raw.jsonl'
+   raw_data_generate(llm_model,
+                     input,
+                     output_path,
+                     temperature,
+                     top_p,
+                     top_k,
+                     repetition_penalty,
+                     max_new_tokens,
+                     do_sample,
+                     num_beams,
+                     num_return_sequences,
+                     use_cache)
+
+   output_hn_path=output+'/minedHN.jsonl'
+   mine_hard_negatives(embedding_model,
+                       output_path,
+                       output_hn_path,
+                       range_for_sampling,
+                       negative_number,
+                       use_gpu_for_searching)
+
+   output_json_split_path = output+"/minedHN_split.jsonl"
+   similarity_check(output_hn_path,
+                    output_json_split_path,
+                    embedding_model,
+                    similarity_threshold)
+
+
+def main():
+   parser = argparse.ArgumentParser()
+   parser.add_argument("--llm_model", type=str)
+   parser.add_argument("--embedding_model", type=str)
+   parser.add_argument("--input", type=str)
+   parser.add_argument("--output", type=str, default='./data')
+
+   parser.add_argument("--temperature", type=float, default=0.8)
+   parser.add_argument("--top_p", type=float, default=0.9)
+   parser.add_argument("--top_k", type=int, default=40)
+   parser.add_argument("--repetition_penalty", type=float, default=2.0)
+   parser.add_argument("--max_new_tokens", type=int, default=48)
+   parser.add_argument("--do_sample", type=bool, default=True)
+   parser.add_argument("--num_beams", type=int, default=2)
+   parser.add_argument("--num_return_sequences", type=int, default=2)
+   parser.add_argument("--use_cache", type=bool, default=True)
+
+   parser.add_argument("--range_for_sampling", type=str, default='2-10')
+   parser.add_argument("--negative_number", type=int, default=5)
+   parser.add_argument("--use_gpu_for_searching", type=bool, default=False)
+
+   parser.add_argument("--similarity_threshold", type=float, default=0.6)
+
+   args = parser.parse_args()
+
+   llm_model = args.llm_model
+   embedding_model = args.embedding_model
+   input = args.input
+   output = args.output
+
+   temperature = args.temperature
+   top_p = args.top_p
+   top_k = args.top_k
+   repetition_penalty = args.repetition_penalty
+   max_new_tokens = args.max_new_tokens
+   do_sample = args.do_sample
+   num_beams = args.num_beams
+   num_return_sequences = args.num_return_sequences
+   use_cache = args.use_cache
+
+   range_for_sampling=args.range_for_sampling
+   negative_number=args.negative_number
+   use_gpu_for_searching=args.use_gpu_for_searching
+
+   similarity_threshold=args.similarity_threshold
+
+   try:
+      if os.path.exists(output) == False:
+         os.mkdir(output)
+      else:
+         if os.path.exists(output+'/raw.jsonl'):
+            os.remove(output+'/raw.jsonl')
+         if os.path.exists(output+'/minedHN.jsonl'):
+            os.remove(output+'/minedHN.jsonl')
+         if os.path.exists(output+'/minedHN_split.jsonl'):
+            os.remove(output+'/minedHN_split.jsonl')
+   except:
+      pass
+
+   construct_retrieval_dataset(
+      llm_model,
+      embedding_model,
+      input,
+      output,
+      temperature,
+      top_p,
+      top_k,
+      repetition_penalty,
+      max_new_tokens,
+      do_sample,
+      num_beams,
+      num_return_sequences,
+      use_cache,
+      range_for_sampling,
+      negative_number,
+      use_gpu_for_searching,
+      similarity_threshold)
+
+if __name__ == '__main__':
+    main()
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_superbenchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_superbenchmark.py
index ba63c8bbb5d..999e712871d 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_superbenchmark.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_superbenchmark.py
@@ -1,175 +1,175 @@
-# Copyright (c) 2024 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import os
-import subprocess
-import jsonlines
-
-def main():
-    if os.path.exists("result_ragas.jsonl"):
-        os.remove("result_ragas.jsonl")
-    script_path = 'ragas_benchmark.sh'
-
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--ground_truth_file", type=str)
-    parser.add_argument("--input_path", type=str)
-    parser.add_argument("--use_openai_key", default=False, action='store_true')
-    parser.add_argument("--vector_database", type=str, default="Chroma")
-    parser.add_argument("--embedding_model", type=str, default="BAAI/bge-large-en-v1.5")
-    parser.add_argument("--llm_model", type=str)
-    parser.add_argument("--reranker_model", type=str, default="BAAI/bge-reranker-large")
-
-    args = parser.parse_args()
-
-    arg1 = args.ground_truth_file
-    arg2 = args.input_path
-    arg3 = str(args.use_openai_key)
-    arg4 = args.vector_database
-    arg5 = args.embedding_model
-    arg6 = args.llm_model
-    arg7 = args.reranker_model
-
-    arg8_list = ['default','child_parent','bm25']
-    arg9_list = ['True','False']
-    arg10_list = ['similarity','mmr','similarity_score_threshold']
-    arg11_list = ['1', '3', '5']
-    arg12_list = ['5', '10', '20']
-    arg13_list = ['0.3','0.5','0.7']
-    arg14_list = ['1','3', '5','10']
-    arg15_list = ['True','False']
-    arg16_list = ['256','512', '768','1024']
-    arg17_list = ['0.01','0.05', '0.1','0.3','0.5','0.7']
-    arg18_list = ['1','3', '10','20']
-    arg19_list = ['0.1','0.3', '0.5','0.7']
-    arg20_list = ['1.0','1.1', '1.3','1.5','1.7']
-    arg21_list = ['1','3', '10','20']
-    arg22_list = ['True','False']
-
-    for arg8 in arg8_list:
-        print('--'*1 +'retrieval_type',arg8)
-        for arg9 in arg9_list:
-            print('--'*2 +'polish',arg9)
-            for arg10 in arg10_list:
-                print('--'*3 +'search_type',arg10)
-                for arg11 in arg11_list:
-                    print('--'*4 +'k',arg11)
-                    for arg12 in arg12_list:
-                        print('--'*5 +'fetch_k',arg12)
-                        for arg13 in arg13_list:
-                            print('--'*6 +'score_threshold',arg13)
-                            for arg14 in arg14_list:
-                                print('--'*7 +'top_n',arg14)
-                                for arg15 in arg15_list:
-                                    print('--'*8 +'enable_rerank',arg15)
-                                    for arg16 in arg16_list:
-                                        print('--'*9 +'max_chuck_size',arg16)
-                                        for arg17 in arg17_list:
-                                            print('--'*10 +'temperature',arg17)
-                                            for arg18 in arg18_list:
-                                                print('--'*11 +'top_k',arg18)
-                                                for arg19 in arg19_list:
-                                                    print('--'*12 +'top_p',arg19)
-                                                    for arg20 in arg20_list:
-                                                        print('--'*13 +'repetition_penalty',arg20)
-                                                        for arg21 in arg21_list:
-                                                            print('--'*14 +'num_beams',arg21)
-                                                            for arg22 in arg22_list:
-                                                                print('--'*15 +'do_sample',arg22)
-                                                                subprocess.run(['bash',
-                                                                                script_path,
-                                                                                '--ground_truth_file='+arg1,
-                                                                                '--input_path='+arg2,
-                                                                                '--use_openai_key='+arg3,
-                                                                                '--vector_database='+arg4,
-                                                                                '--embedding_model='+arg5,
-                                                                                '--llm_model='+arg6,
-                                                                                '--reranker_model='+arg7,
-                                                                                '--retrieval_type='+arg8,
-                                                                                '--polish='+arg9,
-                                                                                '--search_type='+arg10,
-                                                                                '--k='+arg11,
-                                                                                '--fetch_k='+arg12,
-                                                                                '--score_threshold='+arg13,
-                                                                                '--top_n='+arg14,
-                                                                                '--enable_rerank='+arg15,
-                                                                                '--max_chuck_size='+arg16,
-                                                                                '--temperature='+arg17,
-                                                                                '--top_k='+arg18,
-                                                                                '--top_p='+arg19,
-                                                                                '--repetition_penalty='+arg20,
-                                                                                '--num_beams='+arg21,
-                                                                                '--do_sample='+arg22],
-                                                                                stdout=subprocess.DEVNULL,
-                                                                                stderr=subprocess.DEVNULL)
-
-    file_jsonl_path='result_ragas.jsonl'
-
-    answer_relevancy_average_list = []
-    faithfulness_average_list = []
-    context_recall_average_list = []
-    context_precision_average_list = []
-
-    with open(file_jsonl_path) as file:
-        for stu in jsonlines.Reader(file):
-            answer_relevancy_average=stu["answer_relevancy_average"]
-            faithfulness_average=stu["faithfulness_average"]
-            context_recall_average=stu["context_recall_average"]
-            context_precision_average=stu["context_precision_average"]
-
-            answer_relevancy_average_list.append(answer_relevancy_average)
-            faithfulness_average_list.append(faithfulness_average)
-            context_recall_average_list.append(context_recall_average)
-            context_precision_average_list.append(context_precision_average)
-
-    answer_relevancy_average_line_number_list = [i for i, v in enumerate(answer_relevancy_average_list) \
-                                                 if v == max(answer_relevancy_average_list)]
-    faithfulness_average_line_number_list = [i for i, v in enumerate(faithfulness_average_list) \
-                                             if v == max(faithfulness_average_list)]
-    context_recall_average_line_number_list = [i for i, v in enumerate(context_recall_average_list) \
-                                               if v == max(context_recall_average_list)]
-    context_precision_average_line_number_list = [i for i, v in enumerate(context_precision_average_list) \
-                                                  if v == max(context_precision_average_list)]
-
-    line=0
-    with open(file_jsonl_path) as file:
-        for stu in jsonlines.Reader(file):
-            if line in answer_relevancy_average_line_number_list:
-                print('max_answer_relevancy_average',stu)
-            line+=1
-
-    line=0
-    with open(file_jsonl_path) as file:
-        for stu in jsonlines.Reader(file):
-            if line in faithfulness_average_line_number_list:
-                print('max_faithfulness_average',stu)
-            line+=1
-
-    line=0
-    with open(file_jsonl_path) as file:
-        for stu in jsonlines.Reader(file):
-            if line in context_recall_average_line_number_list:
-                print('max_context_recall_average',stu)
-            line+=1
-
-    line=0
-    with open(file_jsonl_path) as file:
-        for stu in jsonlines.Reader(file):
-            if line in context_precision_average_line_number_list:
-                print('max_context_precision_average',stu)
-            line+=1
-
-
-if __name__ == '__main__':
-    main()
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import subprocess
+import jsonlines
+
+def main():
+    if os.path.exists("result_ragas.jsonl"):
+        os.remove("result_ragas.jsonl")
+    script_path = 'ragas_benchmark.sh'
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--ground_truth_file", type=str)
+    parser.add_argument("--input_path", type=str)
+    parser.add_argument("--use_openai_key", default=False, action='store_true')
+    parser.add_argument("--vector_database", type=str, default="Chroma")
+    parser.add_argument("--embedding_model", type=str, default="BAAI/bge-large-en-v1.5")
+    parser.add_argument("--llm_model", type=str)
+    parser.add_argument("--reranker_model", type=str, default="BAAI/bge-reranker-large")
+
+    args = parser.parse_args()
+
+    arg1 = args.ground_truth_file
+    arg2 = args.input_path
+    arg3 = str(args.use_openai_key)
+    arg4 = args.vector_database
+    arg5 = args.embedding_model
+    arg6 = args.llm_model
+    arg7 = args.reranker_model
+
+    arg8_list = ['default','child_parent','bm25']
+    arg9_list = ['True','False']
+    arg10_list = ['similarity','mmr','similarity_score_threshold']
+    arg11_list = ['1', '3', '5']
+    arg12_list = ['5', '10', '20']
+    arg13_list = ['0.3','0.5','0.7']
+    arg14_list = ['1','3', '5','10']
+    arg15_list = ['True','False']
+    arg16_list = ['256','512', '768','1024']
+    arg17_list = ['0.01','0.05', '0.1','0.3','0.5','0.7']
+    arg18_list = ['1','3', '10','20']
+    arg19_list = ['0.1','0.3', '0.5','0.7']
+    arg20_list = ['1.0','1.1', '1.3','1.5','1.7']
+    arg21_list = ['1','3', '10','20']
+    arg22_list = ['True','False']
+
+    for arg8 in arg8_list:
+        print('--'*1 +'retrieval_type',arg8)
+        for arg9 in arg9_list:
+            print('--'*2 +'polish',arg9)
+            for arg10 in arg10_list:
+                print('--'*3 +'search_type',arg10)
+                for arg11 in arg11_list:
+                    print('--'*4 +'k',arg11)
+                    for arg12 in arg12_list:
+                        print('--'*5 +'fetch_k',arg12)
+                        for arg13 in arg13_list:
+                            print('--'*6 +'score_threshold',arg13)
+                            for arg14 in arg14_list:
+                                print('--'*7 +'top_n',arg14)
+                                for arg15 in arg15_list:
+                                    print('--'*8 +'enable_rerank',arg15)
+                                    for arg16 in arg16_list:
+                                        print('--'*9 +'max_chuck_size',arg16)
+                                        for arg17 in arg17_list:
+                                            print('--'*10 +'temperature',arg17)
+                                            for arg18 in arg18_list:
+                                                print('--'*11 +'top_k',arg18)
+                                                for arg19 in arg19_list:
+                                                    print('--'*12 +'top_p',arg19)
+                                                    for arg20 in arg20_list:
+                                                        print('--'*13 +'repetition_penalty',arg20)
+                                                        for arg21 in arg21_list:
+                                                            print('--'*14 +'num_beams',arg21)
+                                                            for arg22 in arg22_list:
+                                                                print('--'*15 +'do_sample',arg22)
+                                                                subprocess.run(['bash',
+                                                                                script_path,
+                                                                                '--ground_truth_file='+arg1,
+                                                                                '--input_path='+arg2,
+                                                                                '--use_openai_key='+arg3,
+                                                                                '--vector_database='+arg4,
+                                                                                '--embedding_model='+arg5,
+                                                                                '--llm_model='+arg6,
+                                                                                '--reranker_model='+arg7,
+                                                                                '--retrieval_type='+arg8,
+                                                                                '--polish='+arg9,
+                                                                                '--search_type='+arg10,
+                                                                                '--k='+arg11,
+                                                                                '--fetch_k='+arg12,
+                                                                                '--score_threshold='+arg13,
+                                                                                '--top_n='+arg14,
+                                                                                '--enable_rerank='+arg15,
+                                                                                '--max_chuck_size='+arg16,
+                                                                                '--temperature='+arg17,
+                                                                                '--top_k='+arg18,
+                                                                                '--top_p='+arg19,
+                                                                                '--repetition_penalty='+arg20,
+                                                                                '--num_beams='+arg21,
+                                                                                '--do_sample='+arg22],
+                                                                                stdout=subprocess.DEVNULL,
+                                                                                stderr=subprocess.DEVNULL)
+
+    file_jsonl_path='result_ragas.jsonl'
+
+    answer_relevancy_average_list = []
+    faithfulness_average_list = []
+    context_recall_average_list = []
+    context_precision_average_list = []
+
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            answer_relevancy_average=stu["answer_relevancy_average"]
+            faithfulness_average=stu["faithfulness_average"]
+            context_recall_average=stu["context_recall_average"]
+            context_precision_average=stu["context_precision_average"]
+
+            answer_relevancy_average_list.append(answer_relevancy_average)
+            faithfulness_average_list.append(faithfulness_average)
+            context_recall_average_list.append(context_recall_average)
+            context_precision_average_list.append(context_precision_average)
+
+    answer_relevancy_average_line_number_list = [i for i, v in enumerate(answer_relevancy_average_list) \
+                                                 if v == max(answer_relevancy_average_list)]
+    faithfulness_average_line_number_list = [i for i, v in enumerate(faithfulness_average_list) \
+                                             if v == max(faithfulness_average_list)]
+    context_recall_average_line_number_list = [i for i, v in enumerate(context_recall_average_list) \
+                                               if v == max(context_recall_average_list)]
+    context_precision_average_line_number_list = [i for i, v in enumerate(context_precision_average_list) \
+                                                  if v == max(context_precision_average_list)]
+
+    line=0
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            if line in answer_relevancy_average_line_number_list:
+                print('max_answer_relevancy_average',stu)
+            line+=1
+
+    line=0
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            if line in faithfulness_average_line_number_list:
+                print('max_faithfulness_average',stu)
+            line+=1
+
+    line=0
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            if line in context_recall_average_line_number_list:
+                print('max_context_recall_average',stu)
+            line+=1
+
+    line=0
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            if line in context_precision_average_line_number_list:
+                print('max_context_precision_average',stu)
+            line+=1
+
+
+if __name__ == '__main__':
+    main()
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval.py
index 4a8349bbde7..010add7914d 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval.py
@@ -1,284 +1,284 @@
-# !/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2023 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from typing import ClassVar, Collection
-from intel_extension_for_transformers.langchain_community.embeddings import HuggingFaceEmbeddings, \
-    HuggingFaceInstructEmbeddings, HuggingFaceBgeEmbeddings  # pylint: disable=E0401, E0611
-from langchain_community.embeddings import GooglePalmEmbeddings
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from intel_extension_for_transformers.langchain_community.vectorstores import Chroma, Qdrant  # pylint: disable=E0401, E0611
-import uuid
-from langchain_core.documents import Document
-from intel_extension_for_transformers.langchain_community.retrievers import ChildParentRetriever  # pylint: disable=E0401, E0611
-from langchain_core.vectorstores import VectorStoreRetriever
-from langchain_community.retrievers import BM25Retriever
-import jsonlines
-import numpy as np
-import logging
-import argparse
-
-logging.basicConfig(
-    format="%(asctime)s %(name)s:%(levelname)s:%(message)s",
-    datefmt="%d-%M-%Y %H:%M:%S",
-    level=logging.INFO
-)
-
-def document_transfer(data_collection):
-    "Transfer the raw document into langchain supported format."
-    documents = []
-    for data, meta in data_collection:
-        doc_id = str(uuid.uuid4())
-        metadata = {"source": meta, "identify_id":doc_id}
-        doc = Document(page_content=data, metadata=metadata)
-        documents.append(doc)
-    return documents
-
-def document_append_id(documents):
-    for _doc in documents:
-        _doc.metadata["doc_id"] = _doc.metadata["identify_id"]
-    return documents
-
-def index_library(index_file_jsonl_path):
-    index_list = []
-    with open(index_file_jsonl_path) as file:
-        for stu in jsonlines.Reader(file):
-            passages=[stu["context"][0],index_file_jsonl_path]
-            index_list.append(passages)
-    return index_list
-
-def query_set(query_file_jsonl_path):
-    query_list = []
-    with open(query_file_jsonl_path) as file:
-        for stu in jsonlines.Reader(file):
-            passages=stu["query"]
-            query_list.append(passages)
-    return query_list
-
-def load_list(file_jsonl_path, item):
-    with open(file_jsonl_path) as file:
-        data = []
-        for stu in jsonlines.Reader(file):
-            content = ",".join(stu[item])
-            data.append(content)
-    return data
-
-def evaluate(preds, labels, cutoffs=[1,5]):
-    """Evaluate MRR and Hit at cutoffs."""
-    metrics = {}
-
-    # MRR
-    mrrs = np.zeros(len(cutoffs))
-    for pred, label in zip(preds, labels):
-        jump = False
-        for i, x in enumerate(pred, 1):
-            if x in label:
-                for k, cutoff in enumerate(cutoffs):
-                    if i <= cutoff:
-                        mrrs[k] += 1 / i
-                jump = True
-            if jump:
-                break
-    mrrs /= len(preds)
-    for i, cutoff in enumerate(cutoffs):
-        mrr = mrrs[i]
-        metrics[f"MRR@{cutoff}"] = mrr
-
-    # Hit
-    hit_rate_list=[]
-    for cutoff in cutoffs:
-        hit_num = 0
-        for pred, label in zip(preds, labels):
-            hit_list=np.intersect1d(label, pred[:cutoff])
-            hit_num = hit_num+len(hit_list)
-        hit_rate = hit_num/len(labels)
-        hit_rate_list.append(hit_rate)
-    for i, cutoff in enumerate(cutoffs):
-        hit_rate = hit_rate_list[i]
-        metrics[f"Hit@{cutoff}"] = hit_rate
-
-    return metrics
-
-class Retrieval():
-    def __init__(self,
-                 vector_database="Chroma",
-                 embedding_model="BAAI/bge-base-en-v1.5",
-                 input_path = None,
-                 retrieval_type = 'default',
-                 append=True,
-                 **kwargs):
-
-        self.vector_database = vector_database
-        self.input_path = None
-        self.retrieval_type = retrieval_type
-        self.retriever = None
-        self.splitter = RecursiveCharacterTextSplitter(chunk_size= kwargs['child_size'] \
-                    if 'child_size' in kwargs else 512)
-        allowed_retrieval_type: ClassVar[Collection[str]] = (
-            "default",
-            "child_parent",
-            'bm25',
-        )
-
-        assert self.retrieval_type in allowed_retrieval_type, "search_type of {} not allowed.".format(   \
-            self.retrieval_type)
-
-        self.input_path = input_path
-        assert self.input_path != None, "Should gave an input path!"
-
-        try:
-            if "instruct" in embedding_model:
-                self.embeddings = HuggingFaceInstructEmbeddings(model_name=embedding_model)
-            elif "bge" in embedding_model:
-                self.embeddings = HuggingFaceBgeEmbeddings(
-                    model_name=embedding_model,
-                    encode_kwargs={'normalize_embeddings': True},
-                    query_instruction="Represent this sentence for searching relevant passages:")
-            elif "Google" == embedding_model:
-                self.embeddings = GooglePalmEmbeddings()
-            else:
-                self.embeddings = HuggingFaceEmbeddings(
-                    model_name=embedding_model,
-                    encode_kwargs={"normalize_embeddings": True},
-                )
-        except Exception as e:
-            logging.error("Please select a proper embedding model.")
-            logging.error(e)
-
-        data_collection = index_library(self.input_path)
-        logging.info("The parsing for the uploaded files is finished.")
-
-        langchain_documents = document_transfer(data_collection)
-        logging.info("The format of parsed documents is transferred.")
-
-        if kwargs['search_type']=="similarity":
-            kwargs['search_kwargs']={"k":5}
-        elif kwargs['search_type']=="mmr":
-            kwargs['search_kwargs']={"k":5}
-        elif kwargs['search_type']=="similarity_score_threshold":
-            kwargs['search_kwargs']={"k":5, "score_threshold":0.6}
-
-        if self.vector_database == "Chroma":
-            self.database = Chroma
-        elif self.vector_database == "Qdrant":
-            self.database = Qdrant
-        if self.retrieval_type == 'default':  # Using vector store retriever
-            if append:
-                knowledge_base = self.database.from_documents(documents=langchain_documents, embedding=self.embeddings,
-                                                              **kwargs)
-            else:
-                knowledge_base = self.database.build(documents=langchain_documents, embedding=self.embeddings, **kwargs)
-            self.retriever = RetrieverAdapter(retrieval_type=self.retrieval_type, document_store=knowledge_base, \
-                                              **kwargs)
-            if self.vector_database == "Qdrant" and knowledge_base.is_local():
-               # one local storage folder cannot be accessed by multiple instances of Qdrant client simultaneously.
-               knowledge_base.client.close()
-        elif self.retrieval_type == "child_parent":    # Using child-parent store retriever
-            child_documents = self.splitter.split_documents(langchain_documents)
-            langchain_documents = document_append_id(langchain_documents)
-            if append:
-                knowledge_base = self.database.from_documents(documents=langchain_documents, embedding=self.embeddings,
-                                                              **kwargs)
-                child_knowledge_base = self.database.from_documents(documents=child_documents, sign='child', \
-                                                                    embedding=self.embeddings, **kwargs)
-            else:
-                knowledge_base = self.database.build(documents=langchain_documents, embedding=self.embeddings, **kwargs)
-                child_knowledge_base = self.database.build(documents=langchain_documents, embedding=self.embeddings, \
-                                            sign='child', **kwargs)
-            self.retriever = RetrieverAdapter(retrieval_type=self.retrieval_type, document_store=knowledge_base, \
-                               child_document_store=child_knowledge_base, **kwargs)
-            if self.vector_database == "Qdrant" :
-                # one local storage folder cannot be accessed by multiple instances of Qdrant client simultaneously.
-                if knowledge_base.is_local():
-                    knowledge_base.client.close()
-                if child_knowledge_base.is_local():
-                    child_knowledge_base.client.close()
-        elif self.retrieval_type == "bm25":
-            self.docs = document_append_id(langchain_documents)
-            self.retriever = RetrieverAdapter(retrieval_type=self.retrieval_type, docs=self.docs, **kwargs)
-        logging.info("The retriever is successfully built.")
-
-    def pre_llm_inference_actions(self, query):
-        assert self.retriever is not None, logging.info("Please check the status of retriever")
-        context = self.retriever.get_context(query)
-        return context
-
-
-class RetrieverAdapter():
-    def __init__(self, retrieval_type='default', document_store=None, child_document_store=None, docs=None,  \
-                 reranker_model="BAAI/bge-reranker-large", top_n = 1, enable_rerank = False, **kwargs):
-        self.retrieval_type = retrieval_type
-        if enable_rerank:
-            from intel_extension_for_transformers.langchain_community.retrievers.bge_reranker import BgeReranker  # pylint: disable=E0401, E0611
-            from FlagEmbedding import FlagReranker
-            reranker = FlagReranker(reranker_model)
-            self.reranker = BgeReranker(model = reranker, top_n=top_n)
-        else:
-            self.reranker = None
-
-        if self.retrieval_type == "default":
-            self.retriever = VectorStoreRetriever(vectorstore=document_store, **kwargs)
-        elif self.retrieval_type == "bm25":
-            self.retriever = BM25Retriever.from_documents(docs, **kwargs)
-        elif self.retrieval_type == "child_parent":
-            self.retriever = ChildParentRetriever(parentstore=document_store, \
-                                                  vectorstore=child_document_store,
-                                                  **kwargs)  # pylint: disable=abstract-class-instantiated
-        else:
-            logging.error('The chosen retrieval type remains outside the supported scope.')
-
-    def get_context(self, query):
-        context = []
-        retrieved_documents = self.retriever.get_relevant_documents(query)
-        if self.reranker is not None:
-            retrieved_documents = self.reranker.compress_documents(documents = retrieved_documents, query = query)
-        for doc in retrieved_documents:
-            context.append(doc.page_content)
-        return context
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--index_file_jsonl_path", type=str)
-    parser.add_argument("--query_file_jsonl_path", type=str)
-    parser.add_argument("--vector_database", type=str, default="Chroma")
-    parser.add_argument("--embedding_model", type=str, default="BAAI/bge-base-en-v1.5")
-    parser.add_argument("--retrieval_type", type=str, default='default')
-    parser.add_argument("--search_type", type=str, default="similarity")
-    args = parser.parse_args()
-
-    index_file_jsonl_path = args.index_file_jsonl_path
-    query_file_jsonl_path = args.query_file_jsonl_path
-    vector_database = args.vector_database
-    embedding_model = args.embedding_model
-    retrieval_type = args.retrieval_type
-    search_type = args.search_type
-
-    query_list = query_set(query_file_jsonl_path)
-    retrieval_results=[]
-    for query in query_list:
-        context=Retrieval(input_path=index_file_jsonl_path,
-                         vector_database=vector_database,
-                         embedding_model=embedding_model,
-                         retrieval_type = retrieval_type,
-                         search_type=search_type).pre_llm_inference_actions(query=query)
-        retrieval_results.append(context)
-    ground_truths=load_list(query_file_jsonl_path, "pos")
-    metrics = evaluate(retrieval_results, ground_truths)
-    print(metrics)
-    return metrics
-
-if __name__ == '__main__':
-    main()
+# !/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import ClassVar, Collection
+from intel_extension_for_transformers.langchain_community.embeddings import HuggingFaceEmbeddings, \
+    HuggingFaceInstructEmbeddings, HuggingFaceBgeEmbeddings  # pylint: disable=E0401, E0611
+from langchain_community.embeddings import GooglePalmEmbeddings
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from intel_extension_for_transformers.langchain_community.vectorstores import Chroma, Qdrant  # pylint: disable=E0401, E0611
+import uuid
+from langchain_core.documents import Document
+from intel_extension_for_transformers.langchain_community.retrievers import ChildParentRetriever  # pylint: disable=E0401, E0611
+from langchain_core.vectorstores import VectorStoreRetriever
+from langchain_community.retrievers import BM25Retriever
+import jsonlines
+import numpy as np
+import logging
+import argparse
+
+logging.basicConfig(
+    format="%(asctime)s %(name)s:%(levelname)s:%(message)s",
+    datefmt="%d-%M-%Y %H:%M:%S",
+    level=logging.INFO
+)
+
+def document_transfer(data_collection):
+    "Transfer the raw document into langchain supported format."
+    documents = []
+    for data, meta in data_collection:
+        doc_id = str(uuid.uuid4())
+        metadata = {"source": meta, "identify_id":doc_id}
+        doc = Document(page_content=data, metadata=metadata)
+        documents.append(doc)
+    return documents
+
+def document_append_id(documents):
+    for _doc in documents:
+        _doc.metadata["doc_id"] = _doc.metadata["identify_id"]
+    return documents
+
+def index_library(index_file_jsonl_path):
+    index_list = []
+    with open(index_file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            passages=[stu["context"][0],index_file_jsonl_path]
+            index_list.append(passages)
+    return index_list
+
+def query_set(query_file_jsonl_path):
+    query_list = []
+    with open(query_file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            passages=stu["query"]
+            query_list.append(passages)
+    return query_list
+
+def load_list(file_jsonl_path, item):
+    with open(file_jsonl_path) as file:
+        data = []
+        for stu in jsonlines.Reader(file):
+            content = ",".join(stu[item])
+            data.append(content)
+    return data
+
+def evaluate(preds, labels, cutoffs=[1,5]):
+    """Evaluate MRR and Hit at cutoffs."""
+    metrics = {}
+
+    # MRR
+    mrrs = np.zeros(len(cutoffs))
+    for pred, label in zip(preds, labels):
+        jump = False
+        for i, x in enumerate(pred, 1):
+            if x in label:
+                for k, cutoff in enumerate(cutoffs):
+                    if i <= cutoff:
+                        mrrs[k] += 1 / i
+                jump = True
+            if jump:
+                break
+    mrrs /= len(preds)
+    for i, cutoff in enumerate(cutoffs):
+        mrr = mrrs[i]
+        metrics[f"MRR@{cutoff}"] = mrr
+
+    # Hit
+    hit_rate_list=[]
+    for cutoff in cutoffs:
+        hit_num = 0
+        for pred, label in zip(preds, labels):
+            hit_list=np.intersect1d(label, pred[:cutoff])
+            hit_num = hit_num+len(hit_list)
+        hit_rate = hit_num/len(labels)
+        hit_rate_list.append(hit_rate)
+    for i, cutoff in enumerate(cutoffs):
+        hit_rate = hit_rate_list[i]
+        metrics[f"Hit@{cutoff}"] = hit_rate
+
+    return metrics
+
+class Retrieval():
+    def __init__(self,
+                 vector_database="Chroma",
+                 embedding_model="BAAI/bge-base-en-v1.5",
+                 input_path = None,
+                 retrieval_type = 'default',
+                 append=True,
+                 **kwargs):
+
+        self.vector_database = vector_database
+        self.input_path = None
+        self.retrieval_type = retrieval_type
+        self.retriever = None
+        self.splitter = RecursiveCharacterTextSplitter(chunk_size= kwargs['child_size'] \
+                    if 'child_size' in kwargs else 512)
+        allowed_retrieval_type: ClassVar[Collection[str]] = (
+            "default",
+            "child_parent",
+            'bm25',
+        )
+
+        assert self.retrieval_type in allowed_retrieval_type, "search_type of {} not allowed.".format(   \
+            self.retrieval_type)
+
+        self.input_path = input_path
+        assert self.input_path != None, "Should gave an input path!"
+
+        try:
+            if "instruct" in embedding_model:
+                self.embeddings = HuggingFaceInstructEmbeddings(model_name=embedding_model)
+            elif "bge" in embedding_model:
+                self.embeddings = HuggingFaceBgeEmbeddings(
+                    model_name=embedding_model,
+                    encode_kwargs={'normalize_embeddings': True},
+                    query_instruction="Represent this sentence for searching relevant passages:")
+            elif "Google" == embedding_model:
+                self.embeddings = GooglePalmEmbeddings()
+            else:
+                self.embeddings = HuggingFaceEmbeddings(
+                    model_name=embedding_model,
+                    encode_kwargs={"normalize_embeddings": True},
+                )
+        except Exception as e:
+            logging.error("Please select a proper embedding model.")
+            logging.error(e)
+
+        data_collection = index_library(self.input_path)
+        logging.info("The parsing for the uploaded files is finished.")
+
+        langchain_documents = document_transfer(data_collection)
+        logging.info("The format of parsed documents is transferred.")
+
+        if kwargs['search_type']=="similarity":
+            kwargs['search_kwargs']={"k":5}
+        elif kwargs['search_type']=="mmr":
+            kwargs['search_kwargs']={"k":5}
+        elif kwargs['search_type']=="similarity_score_threshold":
+            kwargs['search_kwargs']={"k":5, "score_threshold":0.6}
+
+        if self.vector_database == "Chroma":
+            self.database = Chroma
+        elif self.vector_database == "Qdrant":
+            self.database = Qdrant
+        if self.retrieval_type == 'default':  # Using vector store retriever
+            if append:
+                knowledge_base = self.database.from_documents(documents=langchain_documents, embedding=self.embeddings,
+                                                              **kwargs)
+            else:
+                knowledge_base = self.database.build(documents=langchain_documents, embedding=self.embeddings, **kwargs)
+            self.retriever = RetrieverAdapter(retrieval_type=self.retrieval_type, document_store=knowledge_base, \
+                                              **kwargs)
+            if self.vector_database == "Qdrant" and knowledge_base.is_local():
+               # one local storage folder cannot be accessed by multiple instances of Qdrant client simultaneously.
+               knowledge_base.client.close()
+        elif self.retrieval_type == "child_parent":    # Using child-parent store retriever
+            child_documents = self.splitter.split_documents(langchain_documents)
+            langchain_documents = document_append_id(langchain_documents)
+            if append:
+                knowledge_base = self.database.from_documents(documents=langchain_documents, embedding=self.embeddings,
+                                                              **kwargs)
+                child_knowledge_base = self.database.from_documents(documents=child_documents, sign='child', \
+                                                                    embedding=self.embeddings, **kwargs)
+            else:
+                knowledge_base = self.database.build(documents=langchain_documents, embedding=self.embeddings, **kwargs)
+                child_knowledge_base = self.database.build(documents=langchain_documents, embedding=self.embeddings, \
+                                            sign='child', **kwargs)
+            self.retriever = RetrieverAdapter(retrieval_type=self.retrieval_type, document_store=knowledge_base, \
+                               child_document_store=child_knowledge_base, **kwargs)
+            if self.vector_database == "Qdrant" :
+                # one local storage folder cannot be accessed by multiple instances of Qdrant client simultaneously.
+                if knowledge_base.is_local():
+                    knowledge_base.client.close()
+                if child_knowledge_base.is_local():
+                    child_knowledge_base.client.close()
+        elif self.retrieval_type == "bm25":
+            self.docs = document_append_id(langchain_documents)
+            self.retriever = RetrieverAdapter(retrieval_type=self.retrieval_type, docs=self.docs, **kwargs)
+        logging.info("The retriever is successfully built.")
+
+    def pre_llm_inference_actions(self, query):
+        assert self.retriever is not None, logging.info("Please check the status of retriever")
+        context = self.retriever.get_context(query)
+        return context
+
+
+class RetrieverAdapter():
+    def __init__(self, retrieval_type='default', document_store=None, child_document_store=None, docs=None,  \
+                 reranker_model="BAAI/bge-reranker-large", top_n = 1, enable_rerank = False, **kwargs):
+        self.retrieval_type = retrieval_type
+        if enable_rerank:
+            from intel_extension_for_transformers.langchain_community.retrievers.bge_reranker import BgeReranker  # pylint: disable=E0401, E0611
+            from FlagEmbedding import FlagReranker
+            reranker = FlagReranker(reranker_model)
+            self.reranker = BgeReranker(model = reranker, top_n=top_n)
+        else:
+            self.reranker = None
+
+        if self.retrieval_type == "default":
+            self.retriever = VectorStoreRetriever(vectorstore=document_store, **kwargs)
+        elif self.retrieval_type == "bm25":
+            self.retriever = BM25Retriever.from_documents(docs, **kwargs)
+        elif self.retrieval_type == "child_parent":
+            self.retriever = ChildParentRetriever(parentstore=document_store, \
+                                                  vectorstore=child_document_store,
+                                                  **kwargs)  # pylint: disable=abstract-class-instantiated
+        else:
+            logging.error('The chosen retrieval type remains outside the supported scope.')
+
+    def get_context(self, query):
+        context = []
+        retrieved_documents = self.retriever.get_relevant_documents(query)
+        if self.reranker is not None:
+            retrieved_documents = self.reranker.compress_documents(documents = retrieved_documents, query = query)
+        for doc in retrieved_documents:
+            context.append(doc.page_content)
+        return context
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--index_file_jsonl_path", type=str)
+    parser.add_argument("--query_file_jsonl_path", type=str)
+    parser.add_argument("--vector_database", type=str, default="Chroma")
+    parser.add_argument("--embedding_model", type=str, default="BAAI/bge-base-en-v1.5")
+    parser.add_argument("--retrieval_type", type=str, default='default')
+    parser.add_argument("--search_type", type=str, default="similarity")
+    args = parser.parse_args()
+
+    index_file_jsonl_path = args.index_file_jsonl_path
+    query_file_jsonl_path = args.query_file_jsonl_path
+    vector_database = args.vector_database
+    embedding_model = args.embedding_model
+    retrieval_type = args.retrieval_type
+    search_type = args.search_type
+
+    query_list = query_set(query_file_jsonl_path)
+    retrieval_results=[]
+    for query in query_list:
+        context=Retrieval(input_path=index_file_jsonl_path,
+                         vector_database=vector_database,
+                         embedding_model=embedding_model,
+                         retrieval_type = retrieval_type,
+                         search_type=search_type).pre_llm_inference_actions(query=query)
+        retrieval_results.append(context)
+    ground_truths=load_list(query_file_jsonl_path, "pos")
+    metrics = evaluate(retrieval_results, ground_truths)
+    print(metrics)
+    return metrics
+
+if __name__ == '__main__':
+    main()
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_benchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_benchmark.py
index ebf2fd2e9d5..080b3434f2a 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_benchmark.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/evaluate_retrieval_benchmark.py
@@ -1,375 +1,375 @@
-# !/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2023 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from typing import ClassVar, Collection
-from intel_extension_for_transformers.langchain_community.embeddings import HuggingFaceEmbeddings, \
-    HuggingFaceInstructEmbeddings, HuggingFaceBgeEmbeddings  # pylint: disable=E0401, E0611
-from langchain_community.embeddings import GooglePalmEmbeddings
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from intel_extension_for_transformers.langchain_community.vectorstores import Chroma, Qdrant  # pylint: disable=E0401, E0611
-import uuid
-from langchain_core.documents import Document
-from intel_extension_for_transformers.langchain_community.retrievers import ChildParentRetriever  # pylint: disable=E0401, E0611
-from langchain_core.vectorstores import VectorStoreRetriever
-from langchain_community.retrievers import BM25Retriever
-from intel_extension_for_transformers.neural_chat.pipeline.plugins.retrieval.detector.query_explainer \
-    import QueryPolisher
-from intel_extension_for_transformers.neural_chat import build_chatbot, PipelineConfig
-import jsonlines
-import numpy as np
-import logging
-import argparse
-
-logging.basicConfig(
-    format="%(asctime)s %(name)s:%(levelname)s:%(message)s",
-    datefmt="%d-%M-%Y %H:%M:%S",
-    level=logging.INFO
-)
-
-def document_transfer(data_collection):
-    "Transfer the raw document into langchain supported format."
-    documents = []
-    for data, meta in data_collection:
-        doc_id = str(uuid.uuid4())
-        metadata = {"source": meta, "identify_id":doc_id}
-        doc = Document(page_content=data, metadata=metadata)
-        documents.append(doc)
-    return documents
-
-def document_append_id(documents):
-    for _doc in documents:
-        _doc.metadata["doc_id"] = _doc.metadata["identify_id"]
-    return documents
-
-def index_library(index_file_jsonl_path):
-    index_list = []
-    with open(index_file_jsonl_path) as file:
-        for stu in jsonlines.Reader(file):
-            passages=[stu["context"][0],index_file_jsonl_path]
-            index_list.append(passages)
-    return index_list
-
-def query_set(query_file_jsonl_path):
-    query_list = []
-    with open(query_file_jsonl_path) as file:
-        for stu in jsonlines.Reader(file):
-            passages=stu["query"]
-            query_list.append(passages)
-    return query_list
-
-def load_list(file_jsonl_path, item):
-    with open(file_jsonl_path) as file:
-        data = []
-        for stu in jsonlines.Reader(file):
-            content = ",".join(stu[item])
-            data.append(content)
-    return data
-
-def evaluate(preds, labels, cutoffs=[1]):
-    """Evaluate MRR and Hit at cutoffs."""
-    metrics = {}
-
-    # MRR
-    mrrs = np.zeros(len(cutoffs))
-    for pred, label in zip(preds, labels):
-        jump = False
-        for i, x in enumerate(pred, 1):
-            if x in label:
-                for k, cutoff in enumerate(cutoffs):
-                    if i <= cutoff:
-                        mrrs[k] += 1 / i
-                jump = True
-            if jump:
-                break
-    mrrs /= len(preds)
-    for i, cutoff in enumerate(cutoffs):
-        mrr = mrrs[i]
-        metrics[f"MRR@{cutoff}"] = mrr
-
-    # Hit
-    hit_rate_list=[]
-    for cutoff in cutoffs:
-        hit_num = 0
-        for pred, label in zip(preds, labels):
-            hit_list=np.intersect1d(label, pred[:cutoff])
-            hit_num = hit_num+len(hit_list)
-        hit_rate = hit_num/len(labels)
-        hit_rate_list.append(hit_rate)
-    for i, cutoff in enumerate(cutoffs):
-        hit_rate = hit_rate_list[i]
-        metrics[f"Hit@{cutoff}"] = hit_rate
-
-    return metrics["MRR@1"], metrics["Hit@1"]
-
-class Retrieval():
-    def __init__(self,
-                 vector_database="Chroma",
-                 embedding_model="BAAI/bge-large-en-v1.5",
-                 input_path = None,
-                 retrieval_type = 'default',
-                 append=True,
-                 polish=False,
-                 k=1,
-                 fetch_k=1,
-                 score_threshold=0.3,
-                 reranker_model= "BAAI/bge-reranker-large",
-                 top_n = 1,
-                 enable_rerank = False,
-                 **kwargs):
-
-        self.vector_database = vector_database
-        self.input_path = None
-        self.retrieval_type = retrieval_type
-        self.retriever = None
-        self.k = k
-        self.fetch_k = fetch_k
-        self.score_threshold = score_threshold
-        self.reranker_model= reranker_model,
-        self.top_n = top_n
-        self.enable_rerank=enable_rerank
-
-        self.splitter = RecursiveCharacterTextSplitter(chunk_size= kwargs['child_size'] \
-                    if 'child_size' in kwargs else 512)
-        allowed_retrieval_type: ClassVar[Collection[str]] = (
-            "default",
-            "child_parent",
-            'bm25',
-        )
-
-        if polish:
-            self.polisher = QueryPolisher()
-        else:
-            self.polisher = None
-
-        assert self.retrieval_type in allowed_retrieval_type, "search_type of {} not allowed.".format(   \
-            self.retrieval_type)
-
-        self.input_path = input_path
-        assert self.input_path != None, "Should gave an input path!"
-
-        try:
-            if "instruct" in embedding_model:
-                self.embeddings = HuggingFaceInstructEmbeddings(model_name=embedding_model)
-            elif "bge" in embedding_model:
-                self.embeddings = HuggingFaceBgeEmbeddings(
-                    model_name=embedding_model,
-                    encode_kwargs={'normalize_embeddings': True},
-                    query_instruction="Represent this sentence for searching relevant passages:")
-            elif "Google" == embedding_model:
-                self.embeddings = GooglePalmEmbeddings()
-            else:
-                self.embeddings = HuggingFaceEmbeddings(
-                    model_name=embedding_model,
-                    encode_kwargs={"normalize_embeddings": True},
-                )
-        except Exception as e:
-            logging.error("Please select a proper embedding model.")
-            logging.error(e)
-
-        data_collection = index_library(self.input_path)
-        logging.info("The parsing for the uploaded files is finished.")
-
-        langchain_documents = document_transfer(data_collection)
-        logging.info("The format of parsed documents is transferred.")
-
-        if kwargs['search_type']=="similarity":
-            kwargs['search_kwargs']={"k":self.k}
-        elif kwargs['search_type']=="mmr":
-            kwargs['search_kwargs']={"k":self.k, "fetch_k":self.fetch_k}
-        elif kwargs['search_type']=="similarity_score_threshold":
-            kwargs['search_kwargs']={"k":self.k, "score_threshold":self.score_threshold}
-
-        if self.vector_database == "Chroma":
-            self.database = Chroma
-        elif self.vector_database == "Qdrant":
-            self.database = Qdrant
-        if self.retrieval_type == 'default':  # Using vector store retriever
-            if append:
-                knowledge_base = self.database.from_documents(documents=langchain_documents, embedding=self.embeddings,
-                                                              **kwargs)
-            else:
-                knowledge_base = self.database.build(documents=langchain_documents, embedding=self.embeddings, **kwargs)
-            self.retriever = RetrieverAdapter(retrieval_type=self.retrieval_type, document_store=knowledge_base, \
-                                              **kwargs)
-            if self.vector_database == "Qdrant" and knowledge_base.is_local():
-               # one local storage folder cannot be accessed by multiple instances of Qdrant client simultaneously.
-               knowledge_base.client.close()
-        elif self.retrieval_type == "child_parent":    # Using child-parent store retriever
-            child_documents = self.splitter.split_documents(langchain_documents)
-            langchain_documents = document_append_id(langchain_documents)
-            if append:
-                knowledge_base = self.database.from_documents(documents=langchain_documents, embedding=self.embeddings,
-                                                              **kwargs)
-                child_knowledge_base = self.database.from_documents(documents=child_documents, sign='child', \
-                                                                    embedding=self.embeddings, **kwargs)
-            else:
-                knowledge_base = self.database.build(documents=langchain_documents, embedding=self.embeddings, **kwargs)
-                child_knowledge_base = self.database.build(documents=langchain_documents, embedding=self.embeddings, \
-                                            sign='child', **kwargs)
-            self.retriever = RetrieverAdapter(retrieval_type=self.retrieval_type, document_store=knowledge_base, \
-                               child_document_store=child_knowledge_base, **kwargs)
-            if self.vector_database == "Qdrant" :
-                # one local storage folder cannot be accessed by multiple instances of Qdrant client simultaneously.
-                if knowledge_base.is_local():
-                    knowledge_base.client.close()
-                if child_knowledge_base.is_local():
-                    child_knowledge_base.client.close()
-        elif self.retrieval_type == "bm25":
-            self.docs = document_append_id(langchain_documents)
-            self.retriever = RetrieverAdapter(retrieval_type=self.retrieval_type,
-                                              docs=self.docs,
-                                              reranker_model=self.reranker_model,
-                                              top_n = self.top_n,
-                                              enable_rerank = self.enable_rerank,
-                                              **kwargs)
-        logging.info("The retriever is successfully built.")
-
-    def pre_llm_inference_actions(self, model_name, query):
-        if self.polisher:
-            try:
-                query = self.polisher.polish_query(model_name, query)
-            except Exception as e:
-                logging.info(f"Polish the user query failed, {e}")
-                raise Exception("[Rereieval ERROR] query polish failed!")
-
-        assert self.retriever is not None, logging.info("Please check the status of retriever")
-        context = self.retriever.get_context(query)
-        return context
-
-
-class RetrieverAdapter():
-    def __init__(self, retrieval_type='default', document_store=None, child_document_store=None, docs=None,  \
-                 reranker_model="BAAI/bge-reranker-large", top_n = 1, enable_rerank = False, **kwargs):
-        self.retrieval_type = retrieval_type
-        if enable_rerank:
-            from intel_extension_for_transformers.langchain_community.retrievers.bge_reranker import BgeReranker  # pylint: disable=E0401, E0611
-            from FlagEmbedding import FlagReranker
-            reranker = FlagReranker(reranker_model)
-            self.reranker = BgeReranker(model = reranker, top_n=top_n)
-        else:
-            self.reranker = None
-
-        if self.retrieval_type == "default":
-            self.retriever = VectorStoreRetriever(vectorstore=document_store, **kwargs)
-        elif self.retrieval_type == "bm25":
-            self.retriever = BM25Retriever.from_documents(docs, **kwargs)
-        elif self.retrieval_type == "child_parent":
-            self.retriever = ChildParentRetriever(parentstore=document_store, \
-                                                  vectorstore=child_document_store,
-                                                  **kwargs)  # pylint: disable=abstract-class-instantiated
-        else:
-            logging.error('The chosen retrieval type remains outside the supported scope.')
-
-    def get_context(self, query):
-        context = []
-        retrieved_documents = self.retriever.get_relevant_documents(query)
-        if self.reranker is not None:
-            retrieved_documents = self.reranker.compress_documents(documents = retrieved_documents, query = query)
-        for doc in retrieved_documents:
-            context.append(doc.page_content)
-        return context
-
-def main():
-    import os, shutil
-    if os.path.exists("output"):
-        shutil.rmtree("output", ignore_errors=True)
-
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--index_file_jsonl_path", type=str)
-    parser.add_argument("--query_file_jsonl_path", type=str)
-    parser.add_argument("--vector_database", type=str, default="Chroma")
-    parser.add_argument("--embedding_model", type=str, default="BAAI/bge-large-en-v1.5")
-    parser.add_argument("--llm_model", type=str)
-    parser.add_argument("--reranker_model", type=str, default="BAAI/bge-reranker-large")
-
-    parser.add_argument("--retrieval_type", type=str, default='default')
-    parser.add_argument("--polish", default=False, action='store_true')
-    parser.add_argument("--search_type", type=str, default="similarity")
-    parser.add_argument("--k", type=int, default=1)
-    parser.add_argument("--fetch_k", type=int, default=5)
-    parser.add_argument("--score_threshold", type=float, default=0.3)
-    parser.add_argument("--top_n", type=int, default=1)
-    parser.add_argument("--enable_rerank", default=False, action='store_true')
-
-    args = parser.parse_args()
-
-    index_file_jsonl_path = args.index_file_jsonl_path
-    query_file_jsonl_path = args.query_file_jsonl_path
-    vector_database = args.vector_database
-    embedding_model = args.embedding_model
-    retrieval_type = args.retrieval_type
-    polish = args.polish
-    search_type = args.search_type
-    llm_model = args.llm_model
-    k = args.k
-    fetch_k = args.fetch_k
-    score_threshold = args.score_threshold
-    reranker_model = args.reranker_model
-    top_n = args.top_n
-    enable_rerank = args.enable_rerank
-
-    query_list = query_set(query_file_jsonl_path)
-
-    config = PipelineConfig(model_name_or_path=llm_model)
-    build_chatbot(config)
-
-    retrieval_results=[]
-    for query in query_list:
-        context=Retrieval(input_path=index_file_jsonl_path,
-                         vector_database=vector_database,
-                         embedding_model=embedding_model,
-                         retrieval_type = retrieval_type,
-                         polish = polish,
-                         search_type=search_type,
-                         k=k,
-                         fetch_k=fetch_k,
-                         score_threshold=score_threshold,
-                         reranker_model=reranker_model,
-                         top_n = top_n,
-                         enable_rerank = enable_rerank
-                         ).pre_llm_inference_actions(model_name=llm_model, query=query)
-        retrieval_results.append(context)
-    ground_truths=load_list(query_file_jsonl_path, "pos")
-    MRR, Hit = evaluate(retrieval_results, ground_truths)
-
-    file_json_path='result_retrieval.jsonl'
-
-    if MRR and Hit:
-        data = {
-                "index_file_jsonl_path": args.index_file_jsonl_path,
-                "query_file_jsonl_path": args.query_file_jsonl_path,
-                "vector_database": args.vector_database,
-                "embedding_model": args.embedding_model,
-                "retrieval_type": args.retrieval_type,
-                "polish": args.polish,
-                "search_type": args.search_type,
-                "llm_model": args.llm_model,
-                "k": args.k,
-                "fetch_k": args.fetch_k,
-                "score_threshold": args.score_threshold,
-                "reranker_model": args.reranker_model,
-                "top_n": args.top_n,
-                "enable_rerank": args.enable_rerank,
-                "MRR": MRR,
-                "Hit": Hit,
-            }
-        print(data)
-        with jsonlines.open(file_json_path,"a") as file_json:
-                file_json.write(data)
-
-if __name__ == '__main__':
-    main()
+# !/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import ClassVar, Collection
+from intel_extension_for_transformers.langchain_community.embeddings import HuggingFaceEmbeddings, \
+    HuggingFaceInstructEmbeddings, HuggingFaceBgeEmbeddings  # pylint: disable=E0401, E0611
+from langchain_community.embeddings import GooglePalmEmbeddings
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from intel_extension_for_transformers.langchain_community.vectorstores import Chroma, Qdrant  # pylint: disable=E0401, E0611
+import uuid
+from langchain_core.documents import Document
+from intel_extension_for_transformers.langchain_community.retrievers import ChildParentRetriever  # pylint: disable=E0401, E0611
+from langchain_core.vectorstores import VectorStoreRetriever
+from langchain_community.retrievers import BM25Retriever
+from intel_extension_for_transformers.neural_chat.pipeline.plugins.retrieval.detector.query_explainer \
+    import QueryPolisher
+from intel_extension_for_transformers.neural_chat import build_chatbot, PipelineConfig
+import jsonlines
+import numpy as np
+import logging
+import argparse
+
+logging.basicConfig(
+    format="%(asctime)s %(name)s:%(levelname)s:%(message)s",
+    datefmt="%d-%M-%Y %H:%M:%S",
+    level=logging.INFO
+)
+
+def document_transfer(data_collection):
+    "Transfer the raw document into langchain supported format."
+    documents = []
+    for data, meta in data_collection:
+        doc_id = str(uuid.uuid4())
+        metadata = {"source": meta, "identify_id":doc_id}
+        doc = Document(page_content=data, metadata=metadata)
+        documents.append(doc)
+    return documents
+
+def document_append_id(documents):
+    for _doc in documents:
+        _doc.metadata["doc_id"] = _doc.metadata["identify_id"]
+    return documents
+
+def index_library(index_file_jsonl_path):
+    index_list = []
+    with open(index_file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            passages=[stu["context"][0],index_file_jsonl_path]
+            index_list.append(passages)
+    return index_list
+
+def query_set(query_file_jsonl_path):
+    query_list = []
+    with open(query_file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            passages=stu["query"]
+            query_list.append(passages)
+    return query_list
+
+def load_list(file_jsonl_path, item):
+    with open(file_jsonl_path) as file:
+        data = []
+        for stu in jsonlines.Reader(file):
+            content = ",".join(stu[item])
+            data.append(content)
+    return data
+
+def evaluate(preds, labels, cutoffs=[1]):
+    """Evaluate MRR and Hit at cutoffs."""
+    metrics = {}
+
+    # MRR
+    mrrs = np.zeros(len(cutoffs))
+    for pred, label in zip(preds, labels):
+        jump = False
+        for i, x in enumerate(pred, 1):
+            if x in label:
+                for k, cutoff in enumerate(cutoffs):
+                    if i <= cutoff:
+                        mrrs[k] += 1 / i
+                jump = True
+            if jump:
+                break
+    mrrs /= len(preds)
+    for i, cutoff in enumerate(cutoffs):
+        mrr = mrrs[i]
+        metrics[f"MRR@{cutoff}"] = mrr
+
+    # Hit
+    hit_rate_list=[]
+    for cutoff in cutoffs:
+        hit_num = 0
+        for pred, label in zip(preds, labels):
+            hit_list=np.intersect1d(label, pred[:cutoff])
+            hit_num = hit_num+len(hit_list)
+        hit_rate = hit_num/len(labels)
+        hit_rate_list.append(hit_rate)
+    for i, cutoff in enumerate(cutoffs):
+        hit_rate = hit_rate_list[i]
+        metrics[f"Hit@{cutoff}"] = hit_rate
+
+    return metrics["MRR@1"], metrics["Hit@1"]
+
+class Retrieval():
+    def __init__(self,
+                 vector_database="Chroma",
+                 embedding_model="BAAI/bge-large-en-v1.5",
+                 input_path = None,
+                 retrieval_type = 'default',
+                 append=True,
+                 polish=False,
+                 k=1,
+                 fetch_k=1,
+                 score_threshold=0.3,
+                 reranker_model= "BAAI/bge-reranker-large",
+                 top_n = 1,
+                 enable_rerank = False,
+                 **kwargs):
+
+        self.vector_database = vector_database
+        self.input_path = None
+        self.retrieval_type = retrieval_type
+        self.retriever = None
+        self.k = k
+        self.fetch_k = fetch_k
+        self.score_threshold = score_threshold
+        self.reranker_model= reranker_model,
+        self.top_n = top_n
+        self.enable_rerank=enable_rerank
+
+        self.splitter = RecursiveCharacterTextSplitter(chunk_size= kwargs['child_size'] \
+                    if 'child_size' in kwargs else 512)
+        allowed_retrieval_type: ClassVar[Collection[str]] = (
+            "default",
+            "child_parent",
+            'bm25',
+        )
+
+        if polish:
+            self.polisher = QueryPolisher()
+        else:
+            self.polisher = None
+
+        assert self.retrieval_type in allowed_retrieval_type, "search_type of {} not allowed.".format(   \
+            self.retrieval_type)
+
+        self.input_path = input_path
+        assert self.input_path != None, "Should gave an input path!"
+
+        try:
+            if "instruct" in embedding_model:
+                self.embeddings = HuggingFaceInstructEmbeddings(model_name=embedding_model)
+            elif "bge" in embedding_model:
+                self.embeddings = HuggingFaceBgeEmbeddings(
+                    model_name=embedding_model,
+                    encode_kwargs={'normalize_embeddings': True},
+                    query_instruction="Represent this sentence for searching relevant passages:")
+            elif "Google" == embedding_model:
+                self.embeddings = GooglePalmEmbeddings()
+            else:
+                self.embeddings = HuggingFaceEmbeddings(
+                    model_name=embedding_model,
+                    encode_kwargs={"normalize_embeddings": True},
+                )
+        except Exception as e:
+            logging.error("Please select a proper embedding model.")
+            logging.error(e)
+
+        data_collection = index_library(self.input_path)
+        logging.info("The parsing for the uploaded files is finished.")
+
+        langchain_documents = document_transfer(data_collection)
+        logging.info("The format of parsed documents is transferred.")
+
+        if kwargs['search_type']=="similarity":
+            kwargs['search_kwargs']={"k":self.k}
+        elif kwargs['search_type']=="mmr":
+            kwargs['search_kwargs']={"k":self.k, "fetch_k":self.fetch_k}
+        elif kwargs['search_type']=="similarity_score_threshold":
+            kwargs['search_kwargs']={"k":self.k, "score_threshold":self.score_threshold}
+
+        if self.vector_database == "Chroma":
+            self.database = Chroma
+        elif self.vector_database == "Qdrant":
+            self.database = Qdrant
+        if self.retrieval_type == 'default':  # Using vector store retriever
+            if append:
+                knowledge_base = self.database.from_documents(documents=langchain_documents, embedding=self.embeddings,
+                                                              **kwargs)
+            else:
+                knowledge_base = self.database.build(documents=langchain_documents, embedding=self.embeddings, **kwargs)
+            self.retriever = RetrieverAdapter(retrieval_type=self.retrieval_type, document_store=knowledge_base, \
+                                              **kwargs)
+            if self.vector_database == "Qdrant" and knowledge_base.is_local():
+               # one local storage folder cannot be accessed by multiple instances of Qdrant client simultaneously.
+               knowledge_base.client.close()
+        elif self.retrieval_type == "child_parent":    # Using child-parent store retriever
+            child_documents = self.splitter.split_documents(langchain_documents)
+            langchain_documents = document_append_id(langchain_documents)
+            if append:
+                knowledge_base = self.database.from_documents(documents=langchain_documents, embedding=self.embeddings,
+                                                              **kwargs)
+                child_knowledge_base = self.database.from_documents(documents=child_documents, sign='child', \
+                                                                    embedding=self.embeddings, **kwargs)
+            else:
+                knowledge_base = self.database.build(documents=langchain_documents, embedding=self.embeddings, **kwargs)
+                child_knowledge_base = self.database.build(documents=langchain_documents, embedding=self.embeddings, \
+                                            sign='child', **kwargs)
+            self.retriever = RetrieverAdapter(retrieval_type=self.retrieval_type, document_store=knowledge_base, \
+                               child_document_store=child_knowledge_base, **kwargs)
+            if self.vector_database == "Qdrant" :
+                # one local storage folder cannot be accessed by multiple instances of Qdrant client simultaneously.
+                if knowledge_base.is_local():
+                    knowledge_base.client.close()
+                if child_knowledge_base.is_local():
+                    child_knowledge_base.client.close()
+        elif self.retrieval_type == "bm25":
+            self.docs = document_append_id(langchain_documents)
+            self.retriever = RetrieverAdapter(retrieval_type=self.retrieval_type,
+                                              docs=self.docs,
+                                              reranker_model=self.reranker_model,
+                                              top_n = self.top_n,
+                                              enable_rerank = self.enable_rerank,
+                                              **kwargs)
+        logging.info("The retriever is successfully built.")
+
+    def pre_llm_inference_actions(self, model_name, query):
+        if self.polisher:
+            try:
+                query = self.polisher.polish_query(model_name, query)
+            except Exception as e:
+                logging.info(f"Polish the user query failed, {e}")
+                raise Exception("[Rereieval ERROR] query polish failed!")
+
+        assert self.retriever is not None, logging.info("Please check the status of retriever")
+        context = self.retriever.get_context(query)
+        return context
+
+
+class RetrieverAdapter():
+    def __init__(self, retrieval_type='default', document_store=None, child_document_store=None, docs=None,  \
+                 reranker_model="BAAI/bge-reranker-large", top_n = 1, enable_rerank = False, **kwargs):
+        self.retrieval_type = retrieval_type
+        if enable_rerank:
+            from intel_extension_for_transformers.langchain_community.retrievers.bge_reranker import BgeReranker  # pylint: disable=E0401, E0611
+            from FlagEmbedding import FlagReranker
+            reranker = FlagReranker(reranker_model)
+            self.reranker = BgeReranker(model = reranker, top_n=top_n)
+        else:
+            self.reranker = None
+
+        if self.retrieval_type == "default":
+            self.retriever = VectorStoreRetriever(vectorstore=document_store, **kwargs)
+        elif self.retrieval_type == "bm25":
+            self.retriever = BM25Retriever.from_documents(docs, **kwargs)
+        elif self.retrieval_type == "child_parent":
+            self.retriever = ChildParentRetriever(parentstore=document_store, \
+                                                  vectorstore=child_document_store,
+                                                  **kwargs)  # pylint: disable=abstract-class-instantiated
+        else:
+            logging.error('The chosen retrieval type remains outside the supported scope.')
+
+    def get_context(self, query):
+        context = []
+        retrieved_documents = self.retriever.get_relevant_documents(query)
+        if self.reranker is not None:
+            retrieved_documents = self.reranker.compress_documents(documents = retrieved_documents, query = query)
+        for doc in retrieved_documents:
+            context.append(doc.page_content)
+        return context
+
+def main():
+    import os, shutil
+    if os.path.exists("output"):
+        shutil.rmtree("output", ignore_errors=True)
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--index_file_jsonl_path", type=str)
+    parser.add_argument("--query_file_jsonl_path", type=str)
+    parser.add_argument("--vector_database", type=str, default="Chroma")
+    parser.add_argument("--embedding_model", type=str, default="BAAI/bge-large-en-v1.5")
+    parser.add_argument("--llm_model", type=str)
+    parser.add_argument("--reranker_model", type=str, default="BAAI/bge-reranker-large")
+
+    parser.add_argument("--retrieval_type", type=str, default='default')
+    parser.add_argument("--polish", default=False, action='store_true')
+    parser.add_argument("--search_type", type=str, default="similarity")
+    parser.add_argument("--k", type=int, default=1)
+    parser.add_argument("--fetch_k", type=int, default=5)
+    parser.add_argument("--score_threshold", type=float, default=0.3)
+    parser.add_argument("--top_n", type=int, default=1)
+    parser.add_argument("--enable_rerank", default=False, action='store_true')
+
+    args = parser.parse_args()
+
+    index_file_jsonl_path = args.index_file_jsonl_path
+    query_file_jsonl_path = args.query_file_jsonl_path
+    vector_database = args.vector_database
+    embedding_model = args.embedding_model
+    retrieval_type = args.retrieval_type
+    polish = args.polish
+    search_type = args.search_type
+    llm_model = args.llm_model
+    k = args.k
+    fetch_k = args.fetch_k
+    score_threshold = args.score_threshold
+    reranker_model = args.reranker_model
+    top_n = args.top_n
+    enable_rerank = args.enable_rerank
+
+    query_list = query_set(query_file_jsonl_path)
+
+    config = PipelineConfig(model_name_or_path=llm_model)
+    build_chatbot(config)
+
+    retrieval_results=[]
+    for query in query_list:
+        context=Retrieval(input_path=index_file_jsonl_path,
+                         vector_database=vector_database,
+                         embedding_model=embedding_model,
+                         retrieval_type = retrieval_type,
+                         polish = polish,
+                         search_type=search_type,
+                         k=k,
+                         fetch_k=fetch_k,
+                         score_threshold=score_threshold,
+                         reranker_model=reranker_model,
+                         top_n = top_n,
+                         enable_rerank = enable_rerank
+                         ).pre_llm_inference_actions(model_name=llm_model, query=query)
+        retrieval_results.append(context)
+    ground_truths=load_list(query_file_jsonl_path, "pos")
+    MRR, Hit = evaluate(retrieval_results, ground_truths)
+
+    file_json_path='result_retrieval.jsonl'
+
+    if MRR and Hit:
+        data = {
+                "index_file_jsonl_path": args.index_file_jsonl_path,
+                "query_file_jsonl_path": args.query_file_jsonl_path,
+                "vector_database": args.vector_database,
+                "embedding_model": args.embedding_model,
+                "retrieval_type": args.retrieval_type,
+                "polish": args.polish,
+                "search_type": args.search_type,
+                "llm_model": args.llm_model,
+                "k": args.k,
+                "fetch_k": args.fetch_k,
+                "score_threshold": args.score_threshold,
+                "reranker_model": args.reranker_model,
+                "top_n": args.top_n,
+                "enable_rerank": args.enable_rerank,
+                "MRR": MRR,
+                "Hit": Hit,
+            }
+        print(data)
+        with jsonlines.open(file_json_path,"a") as file_json:
+                file_json.write(data)
+
+if __name__ == '__main__':
+    main()
diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_superbenchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_superbenchmark.py
index 5a5d3067a6e..ae5cbc9c8d5 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_superbenchmark.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/retriever/retrieval_superbenchmark.py
@@ -1,116 +1,116 @@
-# Copyright (c) 2024 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import argparse
-import os
-import subprocess
-import jsonlines
-
-def main():
-    if os.path.exists("result_retrieval.jsonl"):
-        os.remove("result_retrieval.jsonl")
-    script_path = 'retrieval_benchmark.sh'
-
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--index_file_jsonl_path", type=str)
-    parser.add_argument("--query_file_jsonl_path", type=str)
-    parser.add_argument("--vector_database", type=str, default="Chroma")
-    parser.add_argument("--embedding_model", type=str, default="BAAI/bge-large-en-v1.5")
-    parser.add_argument("--llm_model", type=str)
-    parser.add_argument("--reranker_model", type=str, default="BAAI/bge-reranker-large")
-
-    args = parser.parse_args()
-
-    arg1 = args.index_file_jsonl_path
-    arg2 = args.query_file_jsonl_path
-    arg3 = args.vector_database
-    arg4 = args.embedding_model
-    arg5 = args.llm_model
-    arg6 = args.reranker_model
-
-    arg7_list = ['default','child_parent','bm25']
-    arg8_list = ['True','False']
-    arg9_list = ['similarity','mmr','similarity_score_threshold']
-    arg10_list = ['1', '3', '5']
-    arg11_list = ['5', '10', '20']
-    arg12_list = ['0.3','0.5','0.7']
-    arg13_list = ['1','3', '5','10']
-    arg14_list = ['True','False']
-
-    for arg7 in arg7_list:
-        print('--'*1 +'retrieval_type',arg7)
-        for arg8 in arg8_list:
-            print('--'*2 +'polish',arg8)
-            for arg9 in arg9_list:
-                print('--'*3 +'search_type',arg9)
-                for arg10 in arg10_list:
-                    print('--'*4 +'k',arg10)
-                    for arg11 in arg11_list:
-                        print('--'*5 +'fetch_k',arg11)
-                        for arg12 in arg12_list:
-                            print('--'*6 +'score_threshold',arg12)
-                            for arg13 in arg13_list:
-                                print('--'*7 +'top_n',arg13)
-                                for arg14 in arg14_list:
-                                    print('--'*8 +'enable_rerank',arg14)
-                                    # try:
-                                    subprocess.run(['bash',
-                                                    script_path,
-                                                    '--index_file_jsonl_path='+arg1,
-                                                    '--query_file_jsonl_path='+arg2,
-                                                    '--vector_database='+arg3,
-                                                    '--embedding_model='+arg4,
-                                                    '--llm_model='+arg5,
-                                                    '--reranker_model='+arg6,
-                                                    '--retrieval_type='+arg7,
-                                                    '--polish='+arg8,
-                                                    '--search_type='+arg9,
-                                                    '--k='+arg10,
-                                                    '--fetch_k='+arg11,
-                                                    '--score_threshold='+arg12,
-                                                    '--top_n='+arg13,
-                                                    '--enable_rerank='+arg14],
-                                                    stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
-
-    file_jsonl_path='result_retrieval.jsonl'
-
-    MRR_list = []
-    Hit_list = []
-
-    with open(file_jsonl_path) as file:
-        for stu in jsonlines.Reader(file):
-            MRR=stu["MRR"]
-            Hit=stu["Hit"]
-            MRR_list.append(MRR)
-            Hit_list.append(Hit)
-
-    MRR_line_number_list = [i for i, v in enumerate(MRR_list) if v == max(MRR_list)]
-    Hit_line_number_list = [i for i, v in enumerate(Hit_list) if v == max(Hit_list)]
-
-    line=0
-    with open(file_jsonl_path) as file:
-        for stu in jsonlines.Reader(file):
-            if line in MRR_line_number_list:
-                print('max_MRR',stu)
-            line+=1
-
-    line=0
-    with open(file_jsonl_path) as file:
-        for stu in jsonlines.Reader(file):
-            if line in Hit_line_number_list:
-                print('max_Hit',stu)
-            line+=1
-
-if __name__ == '__main__':
-    main()
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import subprocess
+import jsonlines
+
+def main():
+    if os.path.exists("result_retrieval.jsonl"):
+        os.remove("result_retrieval.jsonl")
+    script_path = 'retrieval_benchmark.sh'
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--index_file_jsonl_path", type=str)
+    parser.add_argument("--query_file_jsonl_path", type=str)
+    parser.add_argument("--vector_database", type=str, default="Chroma")
+    parser.add_argument("--embedding_model", type=str, default="BAAI/bge-large-en-v1.5")
+    parser.add_argument("--llm_model", type=str)
+    parser.add_argument("--reranker_model", type=str, default="BAAI/bge-reranker-large")
+
+    args = parser.parse_args()
+
+    arg1 = args.index_file_jsonl_path
+    arg2 = args.query_file_jsonl_path
+    arg3 = args.vector_database
+    arg4 = args.embedding_model
+    arg5 = args.llm_model
+    arg6 = args.reranker_model
+
+    arg7_list = ['default','child_parent','bm25']
+    arg8_list = ['True','False']
+    arg9_list = ['similarity','mmr','similarity_score_threshold']
+    arg10_list = ['1', '3', '5']
+    arg11_list = ['5', '10', '20']
+    arg12_list = ['0.3','0.5','0.7']
+    arg13_list = ['1','3', '5','10']
+    arg14_list = ['True','False']
+
+    for arg7 in arg7_list:
+        print('--'*1 +'retrieval_type',arg7)
+        for arg8 in arg8_list:
+            print('--'*2 +'polish',arg8)
+            for arg9 in arg9_list:
+                print('--'*3 +'search_type',arg9)
+                for arg10 in arg10_list:
+                    print('--'*4 +'k',arg10)
+                    for arg11 in arg11_list:
+                        print('--'*5 +'fetch_k',arg11)
+                        for arg12 in arg12_list:
+                            print('--'*6 +'score_threshold',arg12)
+                            for arg13 in arg13_list:
+                                print('--'*7 +'top_n',arg13)
+                                for arg14 in arg14_list:
+                                    print('--'*8 +'enable_rerank',arg14)
+                                    # try:
+                                    subprocess.run(['bash',
+                                                    script_path,
+                                                    '--index_file_jsonl_path='+arg1,
+                                                    '--query_file_jsonl_path='+arg2,
+                                                    '--vector_database='+arg3,
+                                                    '--embedding_model='+arg4,
+                                                    '--llm_model='+arg5,
+                                                    '--reranker_model='+arg6,
+                                                    '--retrieval_type='+arg7,
+                                                    '--polish='+arg8,
+                                                    '--search_type='+arg9,
+                                                    '--k='+arg10,
+                                                    '--fetch_k='+arg11,
+                                                    '--score_threshold='+arg12,
+                                                    '--top_n='+arg13,
+                                                    '--enable_rerank='+arg14],
+                                                    stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+
+    file_jsonl_path='result_retrieval.jsonl'
+
+    MRR_list = []
+    Hit_list = []
+
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            MRR=stu["MRR"]
+            Hit=stu["Hit"]
+            MRR_list.append(MRR)
+            Hit_list.append(Hit)
+
+    MRR_line_number_list = [i for i, v in enumerate(MRR_list) if v == max(MRR_list)]
+    Hit_line_number_list = [i for i, v in enumerate(Hit_list) if v == max(Hit_list)]
+
+    line=0
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            if line in MRR_line_number_list:
+                print('max_MRR',stu)
+            line+=1
+
+    line=0
+    with open(file_jsonl_path) as file:
+        for stu in jsonlines.Reader(file):
+            if line in Hit_line_number_list:
+                print('max_Hit',stu)
+            line+=1
+
+if __name__ == '__main__':
+    main()

From 6b60154a2b816f700c660d020b34bc17add7f73c Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Fri, 10 May 2024 16:28:09 +0800
Subject: [PATCH 148/151] Create test_evaluation.py in CI

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../tests/ci/tools/test_evaluation.py         | 116 ++++++++++++++++++
 1 file changed, 116 insertions(+)
 create mode 100644 intel_extension_for_transformers/neural_chat/tests/ci/tools/test_evaluation.py

diff --git a/intel_extension_for_transformers/neural_chat/tests/ci/tools/test_evaluation.py b/intel_extension_for_transformers/neural_chat/tests/ci/tools/test_evaluation.py
new file mode 100644
index 00000000000..f223e0f5fc5
--- /dev/null
+++ b/intel_extension_for_transformers/neural_chat/tests/ci/tools/test_evaluation.py
@@ -0,0 +1,116 @@
+# !/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest, os, shutil
+from unittest.mock import patch
+from intel_extension_for_transformers.neural_chat.tools.evaluation.data_augmentation import retrieval_dataset_construction, llm_generate_truth
+from intel_extension_for_transformers.neural_chat.tools.evaluation.retriever import evaluate_retrieval
+from intel_extension_for_transformers.neural_chat.tools.evaluation.framework import ragas_evaluation
+
+class TestEvaluation(unittest.TestCase):
+    def setUp(self) -> None:
+        if os.path.exists("data"):
+            shutil.rmtree("data", ignore_errors=True)
+        if os.path.exists("ground_truth.jsonl"):
+            os.remove("ground_truth.jsonl")
+        if os.path.exists("output"):
+            shutil.rmtree("output", ignore_errors=True)
+        return super().setUp()
+
+    def tearDown(self) -> None:
+        if os.path.exists("data"):
+            shutil.rmtree("data", ignore_errors=True)
+        if os.path.exists("ground_truth.jsonl"):
+            os.remove("ground_truth.jsonl")
+        if os.path.exists("output"):
+            shutil.rmtree("output", ignore_errors=True)
+        return super().tearDown()
+
+    def test_retrieval_dataset_construction(self):
+        path = \
+          "/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/assets/docs/retrieve_multi_doc/"
+        if os.path.exists(path):
+            input_path=path
+        else:
+            input_path='../assets/docs/retrieve_multi_doc/'
+        argv = ['--llm_model', '/tf_dataset2/models/nlp_toolkit/neural-chat-7b-v3-1', \
+                '--embedding_model', '/tf_dataset2/inc-ut/gte-base', \
+                '--input', input_path, \
+                '--output', './data', \
+                '--range_for_sampling', '2-2', \
+                '--negative_number', '1']
+        with patch('sys.argv', ['python retrieval_dataset_construction.py'] + argv):
+            retrieval_dataset_construction.main()
+            self.assertTrue(os.path.exists("./data/minedHN_split.jsonl"))
+
+    def test_llm_generate_truth(self):
+        path = \
+          "/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl"
+        if os.path.exists(path):
+            input_path=path
+        else:
+            input_path='../tools/evaluation/data_augmentation/example.jsonl'
+        argv = ['--llm_model', '/tf_dataset2/models/nlp_toolkit/neural-chat-7b-v3-1', \
+                '--input', input_path, \
+                '--output', 'ground_truth.jsonl']
+        with patch('sys.argv', ['python llm_generate_truth.py'] + argv):
+            llm_generate_truth.main()
+            self.assertTrue(os.path.exists("ground_truth.jsonl"))
+
+    def test_evaluate_retrieval(self):
+        path1 = \
+          "/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/candidate_context.jsonl"
+        if os.path.exists(path1):
+            index_file_jsonl_path=path1
+        else:
+            index_file_jsonl_path='../tools/evaluation/data_augmentation/candidate_context.jsonl'
+        path2 = \
+          "/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/example.jsonl"
+        if os.path.exists(path2):
+            query_file_jsonl_path=path2
+        else:
+            query_file_jsonl_path='../tools/evaluation/data_augmentation/example.jsonl'
+        argv = ['--index_file_jsonl_path', index_file_jsonl_path, \
+                '--query_file_jsonl_path', query_file_jsonl_path, \
+                '--embedding_model', '/tf_dataset2/inc-ut/gte-base']
+        with patch('sys.argv', ['python evaluate_retrieval.py'] + argv):
+            result = evaluate_retrieval.main()
+            self.assertIsNotNone(result)
+
+    def test_ragas_evaluation(self):
+        path1 = \
+          "/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/answer.jsonl"
+        if os.path.exists(path1):
+            answer_file_path=path1
+        else:
+            answer_file_path='../tools/evaluation/data_augmentation/answer.jsonl'
+        path2 = \
+          "/intel-extension-for-transformers/intel_extension_for_transformers/neural_chat/tools/evaluation/data_augmentation/ground_truth.jsonl"
+        if os.path.exists(path2):
+            ground_truth_file_path=path2
+        else:
+            ground_truth_file_path='../tools/evaluation/data_augmentation/ground_truth.jsonl'
+        argv = ['--answer_file', answer_file_path, \
+                '--ground_truth_file', ground_truth_file_path, \
+                '--llm_model', '/tf_dataset2/models/nlp_toolkit/neural-chat-7b-v3-1', \
+                '--embedding_model', '/tf_dataset2/inc-ut/gte-base']
+        with patch('sys.argv', ['python ragas_evaluation.py'] + argv):
+            result = ragas_evaluation.main()
+            self.assertIsNotNone(result)
+
+if __name__ == '__main__':
+    unittest.main()

From c73a68f7a2b60abe065819618793f72201c400c1 Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Sat, 11 May 2024 10:48:43 +0800
Subject: [PATCH 149/151] Update requirements.txt

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../neural_chat/tests/requirements.txt                          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/intel_extension_for_transformers/neural_chat/tests/requirements.txt b/intel_extension_for_transformers/neural_chat/tests/requirements.txt
index 4f5177133b9..7e81b307cc2 100644
--- a/intel_extension_for_transformers/neural_chat/tests/requirements.txt
+++ b/intel_extension_for_transformers/neural_chat/tests/requirements.txt
@@ -65,7 +65,7 @@ python-docx
 python-multipart
 pyyaml
 qdrant-client==1.8.2
-ragas
+ragas==0.1.7
 rank_bm25
 resampy==0.3.1
 rouge_score

From b4c0e67023ab278d6ee2cd710b54575105a05abc Mon Sep 17 00:00:00 2001
From: Liangyx2 <yuxiang.liang@intel.com>
Date: Mon, 3 Jun 2024 15:16:21 +0800
Subject: [PATCH 150/151] Update ragas_evaluation_benchmark.py

Signed-off-by: Liangyx2 <yuxiang.liang@intel.com>
---
 .../framework/ragas_evaluation_benchmark.py   | 101 ++++++------------
 1 file changed, 30 insertions(+), 71 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py
index 7abf65bc6c9..c21fa7ea6e1 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py
@@ -103,54 +103,8 @@ def ragas(answer_file, ground_truth_file, llm_model, embedding_model, use_openai
     return answer_relevancy_average, faithfulness_average, context_recall_average, context_precision_average
 
 
-def rag(text,
-        input_path,
-        vector_database="Chroma",
-        embedding_model="BAAI/bge-large-en-v1.5",
-        retrieval_type='default',
-        max_chuck_size=256,
-        search_type="similarity",
-        k=1,
-        fetch_k=5,
-        score_threshold=0.3,
-        polish=False,
-        top_n=1,
-        enable_rerank=False,
-        reranker_model="BAAI/bge-reranker-large",
-        llm_model='intel/neural-chat-7b-v3-1',
-        temperature=0.01,
-        top_k=1,
-        top_p=0.1,
-        repetition_penalty=1.0,
-        num_beams=1,
-        do_sample=True
-        ):
-    plugins.retrieval.enable=True
-    plugins.retrieval.args["input_path"]=input_path
-    plugins.retrieval.args["vector_database"]=vector_database
-    plugins.retrieval.args["embedding_model"]=embedding_model
-    plugins.retrieval.args["retrieval_type"]=retrieval_type
-    plugins.retrieval.args["max_chuck_size"]=max_chuck_size
-    plugins.retrieval.args["search_type"]=search_type
-    if search_type=="similarity":
-        plugins.retrieval.args["search_kwargs"]={"k":k}
-    elif search_type=="mmr":
-        plugins.retrieval.args["search_kwargs"]={"k":k, "fetch_k":fetch_k}
-    elif search_type=="similarity_score_threshold":
-        plugins.retrieval.args["search_kwargs"]={"k":k, "score_threshold":score_threshold}
-    plugins.retrieval.args["polish"]=polish
-    plugins.retrieval.args["top_n"]=top_n
-    plugins.retrieval.args["enable_rerank"]=enable_rerank
-    plugins.retrieval.args["reranker_model"]=reranker_model
-    config = PipelineConfig(plugins=plugins, model_name_or_path=llm_model)
-    chatbot = build_chatbot(config)
-    response = chatbot.predict(text,
-                            config=GenerationConfig(temperature=temperature,
-                                                    top_k=top_k,
-                                                    top_p=top_p,
-                                                    repetition_penalty=repetition_penalty,
-                                                    num_beams=num_beams,
-                                                    do_sample=do_sample))
+def rag(text, chatbot, generation_config):
+    response = chatbot.predict(text, config=generation_config)
     return response
 
 def result_data(ground_truth_file,
@@ -183,30 +137,35 @@ def result_data(ground_truth_file,
 
     if os.path.exists("output"):
         shutil.rmtree("output", ignore_errors=True)
+    
+    plugins.retrieval.enable=True
+    plugins.retrieval.args["input_path"]=input_path
+    plugins.retrieval.args["vector_database"]=vector_database
+    plugins.retrieval.args["embedding_model"]=embedding_model
+    plugins.retrieval.args["retrieval_type"]=retrieval_type
+    plugins.retrieval.args["max_chuck_size"]=max_chuck_size
+    plugins.retrieval.args["search_type"]=search_type
+    if search_type=="similarity":
+        plugins.retrieval.args["search_kwargs"]={"k":k}
+    elif search_type=="mmr":
+        plugins.retrieval.args["search_kwargs"]={"k":k, "fetch_k":fetch_k}
+    elif search_type=="similarity_score_threshold":
+        plugins.retrieval.args["search_kwargs"]={"k":k, "score_threshold":score_threshold}
+    plugins.retrieval.args["polish"]=polish
+    plugins.retrieval.args["top_n"]=top_n
+    plugins.retrieval.args["enable_rerank"]=enable_rerank
+    plugins.retrieval.args["reranker_model"]=reranker_model
+    config = PipelineConfig(plugins=plugins, model_name_or_path=llm_model, device="cuda")
+    chatbot = build_chatbot(config)
+    generation_config=GenerationConfig(temperature=temperature, 
+                                                    top_k=top_k, 
+                                                    top_p=top_p, 
+                                                    repetition_penalty=repetition_penalty, 
+                                                    num_beams=num_beams, 
+                                                    do_sample=do_sample)
+
     for question in question_list:
-        response = rag(
-                        question,
-                        input_path,
-                        vector_database,
-                        embedding_model,
-                        retrieval_type,
-                        max_chuck_size,
-                        search_type,
-                        k,
-                        fetch_k,
-                        score_threshold,
-                        polish,
-                        top_n,
-                        enable_rerank,
-                        reranker_model,
-                        llm_model,
-                        temperature,
-                        top_k,
-                        top_p,
-                        repetition_penalty,
-                        num_beams,
-                        do_sample
-                     )
+        response = rag(question, chatbot, generation_config)
         data = {
                 "question": question,
                 "answer": response,

From e75bbe471b24c83322fe3c701c97bc206c1e77c8 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 3 Jun 2024 07:19:01 +0000
Subject: [PATCH 151/151] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../framework/ragas_evaluation_benchmark.py          | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py
index c21fa7ea6e1..7493ec06032 100644
--- a/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py
+++ b/intel_extension_for_transformers/neural_chat/tools/evaluation/framework/ragas_evaluation_benchmark.py
@@ -137,7 +137,7 @@ def result_data(ground_truth_file,
 
     if os.path.exists("output"):
         shutil.rmtree("output", ignore_errors=True)
-    
+
     plugins.retrieval.enable=True
     plugins.retrieval.args["input_path"]=input_path
     plugins.retrieval.args["vector_database"]=vector_database
@@ -157,11 +157,11 @@ def result_data(ground_truth_file,
     plugins.retrieval.args["reranker_model"]=reranker_model
     config = PipelineConfig(plugins=plugins, model_name_or_path=llm_model, device="cuda")
     chatbot = build_chatbot(config)
-    generation_config=GenerationConfig(temperature=temperature, 
-                                                    top_k=top_k, 
-                                                    top_p=top_p, 
-                                                    repetition_penalty=repetition_penalty, 
-                                                    num_beams=num_beams, 
+    generation_config=GenerationConfig(temperature=temperature,
+                                                    top_k=top_k,
+                                                    top_p=top_p,
+                                                    repetition_penalty=repetition_penalty,
+                                                    num_beams=num_beams,
                                                     do_sample=do_sample)
 
     for question in question_list: