issue-#11045, fix for llm reranker bug in which postprocessing 'defau… (

#11051)
run-llama · Feb 21, 2024 · 36477cb · 36477cb
1 parent 757c66c
commit 36477cb
Show file tree

Hide file tree

Showing 2 changed files with 21 additions and 3 deletions.
diff --git a/llama-index-core/llama_index/core/indices/utils.py b/llama-index-core/llama_index/core/indices/utils.py
@@ -1,13 +1,12 @@
 """Utilities for GPT indices."""
 import logging
 import re
-from typing import Dict, List, Optional, Sequence, Set, Tuple
-
 from llama_index.core.base.embeddings.base import BaseEmbedding
 from llama_index.core.embeddings.multi_modal_base import MultiModalEmbedding
 from llama_index.core.schema import BaseNode, ImageNode, MetadataMode
 from llama_index.core.utils import globals_helper, truncate_text
 from llama_index.core.vector_stores.types import VectorStoreQueryResult
+from typing import Dict, List, Optional, Sequence, Set, Tuple
 
 _logger = logging.getLogger(__name__)
 
@@ -106,7 +105,9 @@ def default_parse_choice_select_answer_fn(
         if answer_num > num_choices:
             continue
         answer_nums.append(answer_num)
-        answer_relevances.append(float(line_tokens[1].split(":")[1].strip()))
+        # extract just the first digits after the colon.
+        _answer_relevance = re.findall(r"\d+", line_tokens[1].split(":")[1].strip())[0]
+        answer_relevances.append(float(_answer_relevance))
     return answer_nums, answer_relevances
 
 

diff --git a/llama-index-core/tests/indices/test_utils.py b/llama-index-core/tests/indices/test_utils.py
@@ -1,4 +1,5 @@
 """Test indices/utils.py."""
+import pytest
 from llama_index.core.indices.utils import expand_tokens_with_subtokens
 
 
@@ -16,3 +17,19 @@ def test_expand_tokens_with_subtokens() -> None:
         "world",
         "bye",
     }
+
+
+parse_choice_test_lines = [
+    """ Doc: 2, Relevance: 8 (The document mentions taking a "tasty turn around Barcelona\'s Santa Caterina market" and listening to an episode about Barcelona.)\nDoc: 4, Relevance: 6 (The document mentions Ferramenta in Barcelona and recommends cocktails and pasta dishes that can be tried there.)""",
+    "Doc: 2, Relevance: 8\nDoc: 4, Relevance: 6",
+    "answer_num: 2, answer_relevance:8\nanswer_num: 4, answer_relevance:6",
+]
+
+
+@pytest.mark.parametrize("answer", parse_choice_test_lines)
+def test_default_parse_choice_select_answer_fn(answer):
+    from llama_index.core.indices.utils import default_parse_choice_select_answer_fn
+
+    answer_nums, answer_relevances = default_parse_choice_select_answer_fn(answer, 5)
+    assert answer_nums == [2, 4]
+    assert answer_relevances == [8, 6]