diff --git a/llama-index-core/llama_index/core/indices/utils.py b/llama-index-core/llama_index/core/indices/utils.py index 1428891bdf436..2aed8534944e3 100644 --- a/llama-index-core/llama_index/core/indices/utils.py +++ b/llama-index-core/llama_index/core/indices/utils.py @@ -1,13 +1,12 @@ """Utilities for GPT indices.""" import logging import re -from typing import Dict, List, Optional, Sequence, Set, Tuple - from llama_index.core.base.embeddings.base import BaseEmbedding from llama_index.core.embeddings.multi_modal_base import MultiModalEmbedding from llama_index.core.schema import BaseNode, ImageNode, MetadataMode from llama_index.core.utils import globals_helper, truncate_text from llama_index.core.vector_stores.types import VectorStoreQueryResult +from typing import Dict, List, Optional, Sequence, Set, Tuple _logger = logging.getLogger(__name__) @@ -106,7 +105,9 @@ def default_parse_choice_select_answer_fn( if answer_num > num_choices: continue answer_nums.append(answer_num) - answer_relevances.append(float(line_tokens[1].split(":")[1].strip())) + # extract just the first digits after the colon. + _answer_relevance = re.findall(r"\d+", line_tokens[1].split(":")[1].strip())[0] + answer_relevances.append(float(_answer_relevance)) return answer_nums, answer_relevances diff --git a/llama-index-core/tests/indices/test_utils.py b/llama-index-core/tests/indices/test_utils.py index 62693d7835ad9..ff2e538c96829 100644 --- a/llama-index-core/tests/indices/test_utils.py +++ b/llama-index-core/tests/indices/test_utils.py @@ -1,4 +1,5 @@ """Test indices/utils.py.""" +import pytest from llama_index.core.indices.utils import expand_tokens_with_subtokens @@ -16,3 +17,19 @@ def test_expand_tokens_with_subtokens() -> None: "world", "bye", } + + +parse_choice_test_lines = [ + """ Doc: 2, Relevance: 8 (The document mentions taking a "tasty turn around Barcelona\'s Santa Caterina market" and listening to an episode about Barcelona.)\nDoc: 4, Relevance: 6 (The document mentions Ferramenta in Barcelona and recommends cocktails and pasta dishes that can be tried there.)""", + "Doc: 2, Relevance: 8\nDoc: 4, Relevance: 6", + "answer_num: 2, answer_relevance:8\nanswer_num: 4, answer_relevance:6", +] + + +@pytest.mark.parametrize("answer", parse_choice_test_lines) +def test_default_parse_choice_select_answer_fn(answer): + from llama_index.core.indices.utils import default_parse_choice_select_answer_fn + + answer_nums, answer_relevances = default_parse_choice_select_answer_fn(answer, 5) + assert answer_nums == [2, 4] + assert answer_relevances == [8, 6]