diff --git a/docs/concepts/metrics/available_metrics/agents.md b/docs/concepts/metrics/available_metrics/agents.md index e7ae779f0..156475a5a 100644 --- a/docs/concepts/metrics/available_metrics/agents.md +++ b/docs/concepts/metrics/available_metrics/agents.md @@ -103,7 +103,7 @@ sample = MultiTurnSample( ] ) -scorer = ToolCallAccuracy(llm = evaluator_llm) +scorer = ToolCallAccuracy() await scorer.multi_turn_ascore(sample) ``` Output @@ -119,7 +119,7 @@ By default the tool names and arguments are compared using exact string matching from ragas.metrics._string import NonLLMStringSimilarity from ragas.metrics._tool_call_accuracy import ToolCallAccuracy -metric = ToolCallAccuracy(llm = evaluator_llm) +metric = ToolCallAccuracy() metric.arg_comparison_metric = NonLLMStringSimilarity() ``` diff --git a/docs/howtos/integrations/_langgraph_agent_evaluation.md b/docs/howtos/integrations/_langgraph_agent_evaluation.md index a694db948..c32755ddc 100644 --- a/docs/howtos/integrations/_langgraph_agent_evaluation.md +++ b/docs/howtos/integrations/_langgraph_agent_evaluation.md @@ -335,7 +335,6 @@ sample = MultiTurnSample( ) tool_accuracy_scorer = ToolCallAccuracy() -tool_accuracy_scorer.llm = ChatOpenAI(model="gpt-4o-mini") await tool_accuracy_scorer.multi_turn_ascore(sample) ``` diff --git a/docs/howtos/integrations/langgraph_agent_evaluation.ipynb b/docs/howtos/integrations/langgraph_agent_evaluation.ipynb index a719c8511..473e03732 100644 --- a/docs/howtos/integrations/langgraph_agent_evaluation.ipynb +++ b/docs/howtos/integrations/langgraph_agent_evaluation.ipynb @@ -601,7 +601,6 @@ ")\n", "\n", "tool_accuracy_scorer = ToolCallAccuracy()\n", - "tool_accuracy_scorer.llm = ChatOpenAI(model=\"gpt-4o-mini\")\n", "await tool_accuracy_scorer.multi_turn_ascore(sample)" ] }, diff --git a/docs/howtos/integrations/swarm_agent_evaluation.md b/docs/howtos/integrations/swarm_agent_evaluation.md index d573be115..c654c4d9e 100644 --- a/docs/howtos/integrations/swarm_agent_evaluation.md +++ b/docs/howtos/integrations/swarm_agent_evaluation.md @@ -332,7 +332,6 @@ sample = MultiTurnSample( ) tool_accuracy_scorer = ToolCallAccuracy() -tool_accuracy_scorer.llm = ChatOpenAI(model="gpt-4o-mini") await tool_accuracy_scorer.multi_turn_ascore(sample) ``` Output @@ -381,7 +380,6 @@ sample = MultiTurnSample( ) tool_accuracy_scorer = ToolCallAccuracy() -tool_accuracy_scorer.llm = ChatOpenAI(model="gpt-4o-mini") await tool_accuracy_scorer.multi_turn_ascore(sample) ``` Output