Skip to content

Commit 3d8825f

Browse files
authored
feat(llma): send number of web searches (#359)
* feat(llma): send number of web searches * feat(llma): add more tests * chore(llma): bump version * fix(llma): feedback * fix(llma): fix Gemini * fix(llma): fix OpenAI's Chat Completions streaming
1 parent 3e52e7f commit 3d8825f

File tree

12 files changed

+1148
-71
lines changed

12 files changed

+1148
-71
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# 6.8.0 - 2025-11-03
2+
3+
- feat(llma): send web search calls to be used for LLM cost calculations
4+
15
# 6.7.14 - 2025-11-03
26

37
- fix(django): Handle request.user access in async middleware context to prevent SynchronousOnlyOperation errors in Django 5+ (fixes #355)

posthog/ai/anthropic/anthropic_async.py

Lines changed: 30 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -14,14 +14,9 @@
1414
from posthog.ai.types import StreamingContentBlock, TokenUsage, ToolInProgress
1515
from posthog.ai.utils import (
1616
call_llm_and_track_usage_async,
17-
extract_available_tool_calls,
18-
get_model_params,
19-
merge_system_prompt,
2017
merge_usage_stats,
21-
with_privacy_mode,
2218
)
2319
from posthog.ai.anthropic.anthropic_converter import (
24-
format_anthropic_streaming_content,
2520
extract_anthropic_usage_from_event,
2621
handle_anthropic_content_block_start,
2722
handle_anthropic_text_delta,
@@ -220,66 +215,34 @@ async def _capture_streaming_event(
220215
content_blocks: List[StreamingContentBlock],
221216
accumulated_content: str,
222217
):
223-
if posthog_trace_id is None:
224-
posthog_trace_id = str(uuid.uuid4())
225-
226-
# Format output using converter
227-
formatted_content = format_anthropic_streaming_content(content_blocks)
228-
formatted_output = []
229-
230-
if formatted_content:
231-
formatted_output = [{"role": "assistant", "content": formatted_content}]
232-
else:
233-
# Fallback to accumulated content if no blocks
234-
formatted_output = [
235-
{
236-
"role": "assistant",
237-
"content": [{"type": "text", "text": accumulated_content}],
238-
}
239-
]
240-
241-
event_properties = {
242-
"$ai_provider": "anthropic",
243-
"$ai_model": kwargs.get("model"),
244-
"$ai_model_parameters": get_model_params(kwargs),
245-
"$ai_input": with_privacy_mode(
246-
self._client._ph_client,
247-
posthog_privacy_mode,
248-
sanitize_anthropic(merge_system_prompt(kwargs, "anthropic")),
249-
),
250-
"$ai_output_choices": with_privacy_mode(
251-
self._client._ph_client,
252-
posthog_privacy_mode,
253-
formatted_output,
254-
),
255-
"$ai_http_status": 200,
256-
"$ai_input_tokens": usage_stats.get("input_tokens", 0),
257-
"$ai_output_tokens": usage_stats.get("output_tokens", 0),
258-
"$ai_cache_read_input_tokens": usage_stats.get(
259-
"cache_read_input_tokens", 0
260-
),
261-
"$ai_cache_creation_input_tokens": usage_stats.get(
262-
"cache_creation_input_tokens", 0
218+
from posthog.ai.types import StreamingEventData
219+
from posthog.ai.anthropic.anthropic_converter import (
220+
format_anthropic_streaming_input,
221+
format_anthropic_streaming_output_complete,
222+
)
223+
from posthog.ai.utils import capture_streaming_event
224+
225+
# Prepare standardized event data
226+
formatted_input = format_anthropic_streaming_input(kwargs)
227+
sanitized_input = sanitize_anthropic(formatted_input)
228+
229+
event_data = StreamingEventData(
230+
provider="anthropic",
231+
model=kwargs.get("model", "unknown"),
232+
base_url=str(self._client.base_url),
233+
kwargs=kwargs,
234+
formatted_input=sanitized_input,
235+
formatted_output=format_anthropic_streaming_output_complete(
236+
content_blocks, accumulated_content
263237
),
264-
"$ai_latency": latency,
265-
"$ai_trace_id": posthog_trace_id,
266-
"$ai_base_url": str(self._client.base_url),
267-
**(posthog_properties or {}),
268-
}
269-
270-
# Add tools if available
271-
available_tools = extract_available_tool_calls("anthropic", kwargs)
272-
273-
if available_tools:
274-
event_properties["$ai_tools"] = available_tools
275-
276-
if posthog_distinct_id is None:
277-
event_properties["$process_person_profile"] = False
278-
279-
if hasattr(self._client._ph_client, "capture"):
280-
self._client._ph_client.capture(
281-
distinct_id=posthog_distinct_id or posthog_trace_id,
282-
event="$ai_generation",
283-
properties=event_properties,
284-
groups=posthog_groups,
285-
)
238+
usage_stats=usage_stats,
239+
latency=latency,
240+
distinct_id=posthog_distinct_id,
241+
trace_id=posthog_trace_id,
242+
properties=posthog_properties,
243+
privacy_mode=posthog_privacy_mode,
244+
groups=posthog_groups,
245+
)
246+
247+
# Use the common capture function
248+
capture_streaming_event(self._client._ph_client, event_data)

posthog/ai/anthropic/anthropic_converter.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,32 @@ def format_anthropic_streaming_content(
163163
return formatted
164164

165165

166+
def extract_anthropic_web_search_count(response: Any) -> int:
167+
"""
168+
Extract web search count from Anthropic response.
169+
170+
Anthropic provides exact web search counts via usage.server_tool_use.web_search_requests.
171+
172+
Args:
173+
response: The response from Anthropic API
174+
175+
Returns:
176+
Number of web search requests (0 if none)
177+
"""
178+
if not hasattr(response, "usage"):
179+
return 0
180+
181+
if not hasattr(response.usage, "server_tool_use"):
182+
return 0
183+
184+
server_tool_use = response.usage.server_tool_use
185+
186+
if hasattr(server_tool_use, "web_search_requests"):
187+
return max(0, int(getattr(server_tool_use, "web_search_requests", 0)))
188+
189+
return 0
190+
191+
166192
def extract_anthropic_usage_from_response(response: Any) -> TokenUsage:
167193
"""
168194
Extract usage from a full Anthropic response (non-streaming).
@@ -191,6 +217,10 @@ def extract_anthropic_usage_from_response(response: Any) -> TokenUsage:
191217
if cache_creation and cache_creation > 0:
192218
result["cache_creation_input_tokens"] = cache_creation
193219

220+
web_search_count = extract_anthropic_web_search_count(response)
221+
if web_search_count > 0:
222+
result["web_search_count"] = web_search_count
223+
194224
return result
195225

196226

@@ -222,6 +252,16 @@ def extract_anthropic_usage_from_event(event: Any) -> TokenUsage:
222252
if hasattr(event, "usage") and event.usage:
223253
usage["output_tokens"] = getattr(event.usage, "output_tokens", 0)
224254

255+
# Extract web search count from usage
256+
if hasattr(event.usage, "server_tool_use"):
257+
server_tool_use = event.usage.server_tool_use
258+
if hasattr(server_tool_use, "web_search_requests"):
259+
web_search_count = int(
260+
getattr(server_tool_use, "web_search_requests", 0)
261+
)
262+
if web_search_count > 0:
263+
usage["web_search_count"] = web_search_count
264+
225265
return usage
226266

227267

posthog/ai/gemini/gemini_converter.py

Lines changed: 73 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,61 @@ def format_gemini_input(contents: Any) -> List[FormattedMessage]:
338338
return [_format_object_message(contents)]
339339

340340

341+
def extract_gemini_web_search_count(response: Any) -> int:
342+
"""
343+
Extract web search count from Gemini response.
344+
345+
Gemini bills per request that uses grounding, not per query.
346+
Returns 1 if grounding_metadata is present with actual search data, 0 otherwise.
347+
348+
Args:
349+
response: The response from Gemini API
350+
351+
Returns:
352+
1 if web search/grounding was used, 0 otherwise
353+
"""
354+
355+
# Check for grounding_metadata in candidates
356+
if hasattr(response, "candidates"):
357+
for candidate in response.candidates:
358+
if (
359+
hasattr(candidate, "grounding_metadata")
360+
and candidate.grounding_metadata
361+
):
362+
grounding_metadata = candidate.grounding_metadata
363+
364+
# Check if web_search_queries exists and is non-empty
365+
if hasattr(grounding_metadata, "web_search_queries"):
366+
queries = grounding_metadata.web_search_queries
367+
368+
if queries is not None and len(queries) > 0:
369+
return 1
370+
371+
# Check if grounding_chunks exists and is non-empty
372+
if hasattr(grounding_metadata, "grounding_chunks"):
373+
chunks = grounding_metadata.grounding_chunks
374+
375+
if chunks is not None and len(chunks) > 0:
376+
return 1
377+
378+
# Also check for google_search or grounding in function call names
379+
if hasattr(candidate, "content") and candidate.content:
380+
if hasattr(candidate.content, "parts") and candidate.content.parts:
381+
for part in candidate.content.parts:
382+
if hasattr(part, "function_call") and part.function_call:
383+
function_name = getattr(
384+
part.function_call, "name", ""
385+
).lower()
386+
387+
if (
388+
"google_search" in function_name
389+
or "grounding" in function_name
390+
):
391+
return 1
392+
393+
return 0
394+
395+
341396
def _extract_usage_from_metadata(metadata: Any) -> TokenUsage:
342397
"""
343398
Common logic to extract usage from Gemini metadata.
@@ -382,7 +437,14 @@ def extract_gemini_usage_from_response(response: Any) -> TokenUsage:
382437
if not hasattr(response, "usage_metadata") or not response.usage_metadata:
383438
return TokenUsage(input_tokens=0, output_tokens=0)
384439

385-
return _extract_usage_from_metadata(response.usage_metadata)
440+
usage = _extract_usage_from_metadata(response.usage_metadata)
441+
442+
# Add web search count if present
443+
web_search_count = extract_gemini_web_search_count(response)
444+
if web_search_count > 0:
445+
usage["web_search_count"] = web_search_count
446+
447+
return usage
386448

387449

388450
def extract_gemini_usage_from_chunk(chunk: Any) -> TokenUsage:
@@ -398,11 +460,19 @@ def extract_gemini_usage_from_chunk(chunk: Any) -> TokenUsage:
398460

399461
usage: TokenUsage = TokenUsage()
400462

463+
# Extract web search count from the chunk before checking for usage_metadata
464+
# Web search indicators can appear on any chunk, not just those with usage data
465+
web_search_count = extract_gemini_web_search_count(chunk)
466+
if web_search_count > 0:
467+
usage["web_search_count"] = web_search_count
468+
401469
if not hasattr(chunk, "usage_metadata") or not chunk.usage_metadata:
402470
return usage
403471

404-
# Use the shared helper to extract usage
405-
usage = _extract_usage_from_metadata(chunk.usage_metadata)
472+
usage_from_metadata = _extract_usage_from_metadata(chunk.usage_metadata)
473+
474+
# Merge the usage from metadata with any web search count we found
475+
usage.update(usage_from_metadata)
406476

407477
return usage
408478

posthog/ai/openai/openai_async.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,15 @@ async def _capture_streaming_event(
213213
**(posthog_properties or {}),
214214
}
215215

216+
# Add web search count if present
217+
web_search_count = usage_stats.get("web_search_count")
218+
if (
219+
web_search_count is not None
220+
and isinstance(web_search_count, int)
221+
and web_search_count > 0
222+
):
223+
event_properties["$ai_web_search_count"] = web_search_count
224+
216225
if available_tool_calls:
217226
event_properties["$ai_tools"] = available_tool_calls
218227

@@ -444,6 +453,16 @@ async def _capture_streaming_event(
444453
**(posthog_properties or {}),
445454
}
446455

456+
# Add web search count if present
457+
web_search_count = usage_stats.get("web_search_count")
458+
459+
if (
460+
web_search_count is not None
461+
and isinstance(web_search_count, int)
462+
and web_search_count > 0
463+
):
464+
event_properties["$ai_web_search_count"] = web_search_count
465+
447466
if available_tool_calls:
448467
event_properties["$ai_tools"] = available_tool_calls
449468

0 commit comments

Comments
 (0)