diff --git a/docs/howtos/applications/singlehop_testset_gen.md b/docs/howtos/applications/singlehop_testset_gen.md index 2358a950f..2b77a83b7 100644 --- a/docs/howtos/applications/singlehop_testset_gen.md +++ b/docs/howtos/applications/singlehop_testset_gen.md @@ -211,70 +211,70 @@ Output Wut do I do if my baggage is Delayed, Lost, or... [Baggage Policies\n\nThis section provides a d... If your baggage is delayed, lost, or damaged, ... - single_hop_specifc_query_synthesizer + single_hop_specific_query_synthesizer 1 Wht asistance is provided by the airline durin... [Flight Delays\n\nFlight delays can be caused ... Depending on the length of the delay, Ragas Ai... - single_hop_specifc_query_synthesizer + single_hop_specific_query_synthesizer 2 What is Step 1: Check Fare Rules in the contex... [Flight Cancellations\n\nFlight cancellations ... Step 1: Check Fare Rules involves logging into... - single_hop_specifc_query_synthesizer + single_hop_specific_query_synthesizer 3 How can I access my booking online with Ragas ... [Managing Reservations\n\nManaging your reserv... To access your booking online with Ragas Airli... - single_hop_specifc_query_synthesizer + single_hop_specific_query_synthesizer 4 What assistance does Ragas Airlines provide fo... [Special Assistance\n\nRagas Airlines provides... Ragas Airlines provides special assistance ser... - single_hop_specifc_query_synthesizer + single_hop_specific_query_synthesizer 5 What steps should I take if my baggage is dela... [Baggage Policies This section provides a deta... If your baggage is delayed, lost, or damaged w... - single_hop_specifc_query_synthesizer + single_hop_specific_query_synthesizer 6 How can I resubmit the claim for my baggage is... [Potential Issues and Resolutions for Baggage ... To resubmit the claim for your baggage issue, ... - single_hop_specifc_query_synthesizer + single_hop_specific_query_synthesizer 7 Wut are the main causes of flight delays and h... [Flight Delays Flight delays can be caused by ... Flight delays can be caused by weather conditi... - single_hop_specifc_query_synthesizer + single_hop_specific_query_synthesizer 8 How can I request reimbursement for additional... [2. Additional Expenses Incurred Due to Delay ... To request reimbursement for additional expens... - single_hop_specifc_query_synthesizer + single_hop_specific_query_synthesizer 9 What are passenger-initiated cancelations? [Flight Cancellations Flight cancellations can... Passenger-initiated cancellations occur when a... - single_hop_specifc_query_synthesizer + single_hop_specific_query_synthesizer diff --git a/docs/howtos/customizations/testgenerator/_persona_generator.md b/docs/howtos/customizations/testgenerator/_persona_generator.md index d0d32824c..984fd6d8d 100644 --- a/docs/howtos/customizations/testgenerator/_persona_generator.md +++ b/docs/howtos/customizations/testgenerator/_persona_generator.md @@ -98,35 +98,35 @@ testset.to_pandas().head() What the Director do in GitLab and how they wo... [09db4f3e-1c10-4863-9024-f869af48d3e0\n\ntitle... The Director at GitLab, such as the Director o... - single_hop_specifc_query_synthesizer + single_hop_specific_query_synthesizer 1 Wht is the rol of the VP in GitLab? [56c84f1b-3558-4c80-b8a9-348e69a4801b\n\nJob F... The VP, or Vice President, at GitLab is respon... - single_hop_specifc_query_synthesizer + single_hop_specific_query_synthesizer 2 What GitLab do for career progression? [ead619a5-930f-4e2b-b797-41927a04d2e3\n\nGoals... The Job frameworks at GitLab help team members... - single_hop_specifc_query_synthesizer + single_hop_specific_query_synthesizer 3 Wht is the S-grop and how do they work with ot... [42babb12-b033-493f-b684-914e2b1b1d0f\n\nPeopl... Members of the S-group are expected to demonst... - single_hop_specifc_query_synthesizer + single_hop_specific_query_synthesizer 4 How does Google execute its company vision? [c3ed463d-1cdc-4ba4-a6ca-2c4ab12da883\n\nof mo... To effectively execute the company vision, man... - single_hop_specifc_query_synthesizer + single_hop_specific_query_synthesizer diff --git a/docs/howtos/customizations/testgenerator/persona_generator.ipynb b/docs/howtos/customizations/testgenerator/persona_generator.ipynb index c29d8a0fc..ba90e1260 100644 --- a/docs/howtos/customizations/testgenerator/persona_generator.ipynb +++ b/docs/howtos/customizations/testgenerator/persona_generator.ipynb @@ -122,35 +122,35 @@ " What the Director do in GitLab and how they wo...\n", " [09db4f3e-1c10-4863-9024-f869af48d3e0\\n\\ntitle...\n", " The Director at GitLab, such as the Director o...\n", - " single_hop_specifc_query_synthesizer\n", + " single_hop_specific_query_synthesizer\n", " \n", " \n", " 1\n", " Wht is the rol of the VP in GitLab?\n", " [56c84f1b-3558-4c80-b8a9-348e69a4801b\\n\\nJob F...\n", " The VP, or Vice President, at GitLab is respon...\n", - " single_hop_specifc_query_synthesizer\n", + " single_hop_specific_query_synthesizer\n", " \n", " \n", " 2\n", " What GitLab do for career progression?\n", " [ead619a5-930f-4e2b-b797-41927a04d2e3\\n\\nGoals...\n", " The Job frameworks at GitLab help team members...\n", - " single_hop_specifc_query_synthesizer\n", + " single_hop_specific_query_synthesizer\n", " \n", " \n", " 3\n", " Wht is the S-grop and how do they work with ot...\n", " [42babb12-b033-493f-b684-914e2b1b1d0f\\n\\nPeopl...\n", " Members of the S-group are expected to demonst...\n", - " single_hop_specifc_query_synthesizer\n", + " single_hop_specific_query_synthesizer\n", " \n", " \n", " 4\n", " How does Google execute its company vision?\n", " [c3ed463d-1cdc-4ba4-a6ca-2c4ab12da883\\n\\nof mo...\n", " To effectively execute the company vision, man...\n", - " single_hop_specifc_query_synthesizer\n", + " single_hop_specific_query_synthesizer\n", " \n", " \n", "\n", @@ -158,11 +158,11 @@ ], "text/plain": [ " user_input ... synthesizer_name\n", - "0 What the Director do in GitLab and how they wo... ... single_hop_specifc_query_synthesizer\n", - "1 Wht is the rol of the VP in GitLab? ... single_hop_specifc_query_synthesizer\n", - "2 What GitLab do for career progression? ... single_hop_specifc_query_synthesizer\n", - "3 Wht is the S-grop and how do they work with ot... ... single_hop_specifc_query_synthesizer\n", - "4 How does Google execute its company vision? ... single_hop_specifc_query_synthesizer\n", + "0 What the Director do in GitLab and how they wo... ... single_hop_specific_query_synthesizer\n", + "1 Wht is the rol of the VP in GitLab? ... single_hop_specific_query_synthesizer\n", + "2 What GitLab do for career progression? ... single_hop_specific_query_synthesizer\n", + "3 Wht is the S-grop and how do they work with ot... ... single_hop_specific_query_synthesizer\n", + "4 How does Google execute its company vision? ... single_hop_specific_query_synthesizer\n", "\n", "[5 rows x 4 columns]" ] diff --git a/docs/howtos/integrations/_llamaindex.md b/docs/howtos/integrations/_llamaindex.md index 865880cb8..f1c24adfd 100644 --- a/docs/howtos/integrations/_llamaindex.md +++ b/docs/howtos/integrations/_llamaindex.md @@ -88,21 +88,21 @@ df.head() Cud yu pleese explane the role of New York Cit... [New York, often called New York City or NYC, ... New York City serves as the geographical and d... - single_hop_specifc_query_synthesizer + single_hop_specific_query_synthesizer 1 So like, what was New York City called before ... [History == === Early history === In the pre-C... Before it was called New York, the area was kn... - single_hop_specifc_query_synthesizer + single_hop_specific_query_synthesizer 2 what happen in new york with slavery and how i... [and rechristened it "New Orange" after Willia... In the early 18th century, New York became a c... - single_hop_specifc_query_synthesizer + single_hop_specific_query_synthesizer 3 diff --git a/docs/howtos/integrations/llamaindex.ipynb b/docs/howtos/integrations/llamaindex.ipynb index 780c3dfb3..8cf44a942 100644 --- a/docs/howtos/integrations/llamaindex.ipynb +++ b/docs/howtos/integrations/llamaindex.ipynb @@ -135,21 +135,21 @@ " Cud yu pleese explane the role of New York Cit...\n", " [New York, often called New York City or NYC, ...\n", " New York City serves as the geographical and d...\n", - " single_hop_specifc_query_synthesizer\n", + " single_hop_specific_query_synthesizer\n", " \n", " \n", " 1\n", " So like, what was New York City called before ...\n", " [History == === Early history === In the pre-C...\n", " Before it was called New York, the area was kn...\n", - " single_hop_specifc_query_synthesizer\n", + " single_hop_specific_query_synthesizer\n", " \n", " \n", " 2\n", " what happen in new york with slavery and how i...\n", " [and rechristened it \"New Orange\" after Willia...\n", " In the early 18th century, New York became a c...\n", - " single_hop_specifc_query_synthesizer\n", + " single_hop_specific_query_synthesizer\n", " \n", " \n", " 3\n", @@ -192,9 +192,9 @@ "4 The Staten Island Ferry plays a significant ro... \n", "\n", " synthesizer_name \n", - "0 single_hop_specifc_query_synthesizer \n", - "1 single_hop_specifc_query_synthesizer \n", - "2 single_hop_specifc_query_synthesizer \n", + "0 single_hop_specific_query_synthesizer \n", + "1 single_hop_specific_query_synthesizer \n", + "2 single_hop_specific_query_synthesizer \n", "3 multi_hop_specific_query_synthesizer \n", "4 multi_hop_specific_query_synthesizer " ] diff --git a/src/ragas/testset/synthesizers/multi_hop/abstract.py b/src/ragas/testset/synthesizers/multi_hop/abstract.py index 09de39a78..1a581fb85 100644 --- a/src/ragas/testset/synthesizers/multi_hop/abstract.py +++ b/src/ragas/testset/synthesizers/multi_hop/abstract.py @@ -31,23 +31,18 @@ @dataclass class MultiHopAbstractQuerySynthesizer(MultiHopQuerySynthesizer): - """ - Synthesizes abstract multi-hop queries from given knowledge graph. - - Attributes - ---------- - """ + """Synthesize abstract multi-hop queries from given knowledge graph.""" name: str = "multi_hop_abstract_query_synthesizer" + relation_property: str = "summary_similarity" + abstract_property_name: str = "themes" concept_combination_prompt: PydanticPrompt = ConceptCombinationPrompt() theme_persona_matching_prompt: PydanticPrompt = ThemesPersonasMatchingPrompt() def get_node_clusters(self, knowledge_graph: KnowledgeGraph) -> t.List[t.Set[Node]]: - + """Identify clusters of nodes based on the specified relationship condition.""" node_clusters = knowledge_graph.find_indirect_clusters( - relationship_condition=lambda rel: ( - True if rel.get_property("summary_similarity") else False - ), + relationship_condition=lambda rel: bool(rel.get_property(self.relation_property)), depth_limit=3, ) logger.info("found %d clusters", len(node_clusters)) @@ -61,7 +56,8 @@ async def _generate_scenarios( callbacks: Callbacks, ) -> t.List[MultiHopScenario]: """ - Generates a list of scenarios on type MultiHopAbstractQuerySynthesizer + Generate a list of scenarios of type MultiHopScenario. + Steps to generate scenarios: 1. Find indirect clusters of nodes based on relationship condition 2. Calculate the number of samples that should be created per cluster to get n samples in total @@ -93,7 +89,7 @@ async def _generate_scenarios( nodes.append(node) base_scenarios = [] - node_themes = [node.properties.get("themes", []) for node in nodes] + node_themes = [node.properties.get(self.abstract_property_name, []) for node in nodes] prompt_input = ConceptsList( lists_of_concepts=node_themes, max_combinations=num_sample_per_cluster ) @@ -117,7 +113,7 @@ async def _generate_scenarios( concept_combination.combinations, personas=persona_list, persona_item_mapping=persona_concepts.mapping, - property_name="themes", + property_name=self.abstract_property_name, ) base_scenarios = self.sample_diverse_combinations( base_scenarios, num_sample_per_cluster diff --git a/src/ragas/testset/synthesizers/multi_hop/specific.py b/src/ragas/testset/synthesizers/multi_hop/specific.py index 53ce84094..bf9e557e5 100644 --- a/src/ragas/testset/synthesizers/multi_hop/specific.py +++ b/src/ragas/testset/synthesizers/multi_hop/specific.py @@ -27,28 +27,19 @@ @dataclass class MultiHopSpecificQuerySynthesizer(MultiHopQuerySynthesizer): - """ - Synthesizes overlap based queries by choosing specific chunks and generating a - keyphrase from them and then generating queries based on that. - - Attributes - ---------- - generate_query_prompt : PydanticPrompt - The prompt used for generating the query. - """ + """Synthesize multi-hop queries based on a chunk cluster defined by entity overlap.""" name: str = "multi_hop_specific_query_synthesizer" - relation_type: str = "entities_overlap" property_name: str = "entities" + relation_type: str = "entities_overlap" + relation_overlap_property: str = "overlapped_items" theme_persona_matching_prompt: PydanticPrompt = ThemesPersonasMatchingPrompt() generate_query_reference_prompt: PydanticPrompt = QueryAnswerGenerationPrompt() def get_node_clusters(self, knowledge_graph: KnowledgeGraph) -> t.List[t.Tuple]: - + """Identify clusters of nodes based on the specified relationship condition.""" node_clusters = knowledge_graph.find_two_nodes_single_rel( - relationship_condition=lambda rel: ( - True if rel.type == self.relation_type else False - ) + relationship_condition=lambda rel: rel.type == self.relation_type ) logger.info("found %d clusters", len(node_clusters)) return node_clusters @@ -61,7 +52,8 @@ async def _generate_scenarios( callbacks: Callbacks, ) -> t.List[MultiHopScenario]: """ - Generates a list of scenarios on type MultiHopSpecificQuerySynthesizer + Generate a list of scenarios of type MultiHopScenario. + Steps to generate scenarios: 1. Filter the knowledge graph to find cluster of nodes or defined relation type. Here entities_overlap 2. Calculate the number of samples that should be created per cluster to get n samples in total @@ -87,7 +79,7 @@ async def _generate_scenarios( if len(scenarios) < n: node_a, node_b = triplet[0], triplet[-1] overlapped_items = [] - overlapped_items = triplet[1].properties["overlapped_items"] + overlapped_items = triplet[1].properties[self.relation_overlap_property] if overlapped_items: themes = list(dict(overlapped_items).keys()) prompt_input = ThemesPersonasInput( diff --git a/src/ragas/testset/synthesizers/single_hop/specific.py b/src/ragas/testset/synthesizers/single_hop/specific.py index ac0f1b367..dfa6e5aff 100644 --- a/src/ragas/testset/synthesizers/single_hop/specific.py +++ b/src/ragas/testset/synthesizers/single_hop/specific.py @@ -39,12 +39,14 @@ class SingleHopScenario(BaseScenario): @dataclass class SingleHopSpecificQuerySynthesizer(SingleHopQuerySynthesizer): - name: str = "single_hop_specifc_query_synthesizer" - theme_persona_matching_prompt: PydanticPrompt = ThemesPersonasMatchingPrompt() + """Synthesize single-hop queries based on an entity of interest.""" + + name: str = "single_hop_specific_query_synthesizer" property_name: str = "entities" + theme_persona_matching_prompt: PydanticPrompt = ThemesPersonasMatchingPrompt() def get_node_clusters(self, knowledge_graph: KnowledgeGraph) -> t.List[Node]: - + """Identify clusters of nodes based on the entity of interest.""" node_type_dict = defaultdict(int) for node in knowledge_graph.nodes: if ( @@ -81,7 +83,8 @@ async def _generate_scenarios( callbacks: Callbacks, ) -> t.List[SingleHopScenario]: """ - Generates a list of scenarios on type SingleHopSpecificQuerySynthesizer + Generate a list of scenarios of type SingleHopScenario. + Steps to generate scenarios: 1. Find nodes with CHUNK type and entities property 2. Calculate the number of samples that should be created per node to get n samples in total