@@ -1565,9 +1565,9 @@ def test_anthropic_cache_write_and_read_tokens(mock_client):
15651565 AIMessage (
15661566 content = "Using cached analysis to provide quick response." ,
15671567 usage_metadata = {
1568- "input_tokens" : 200 ,
1568+ "input_tokens" : 1200 ,
15691569 "output_tokens" : 30 ,
1570- "total_tokens" : 1030 ,
1570+ "total_tokens" : 1230 ,
15711571 "cache_read_input_tokens" : 800 , # Anthropic cache read
15721572 },
15731573 )
@@ -1584,7 +1584,7 @@ def test_anthropic_cache_write_and_read_tokens(mock_client):
15841584 generation_props = generation_args ["properties" ]
15851585
15861586 assert generation_args ["event" ] == "$ai_generation"
1587- assert generation_props ["$ai_input_tokens" ] == 200
1587+ assert generation_props ["$ai_input_tokens" ] == 400
15881588 assert generation_props ["$ai_output_tokens" ] == 30
15891589 assert generation_props ["$ai_cache_creation_input_tokens" ] == 0
15901590 assert generation_props ["$ai_cache_read_input_tokens" ] == 800
@@ -1626,7 +1626,7 @@ def test_openai_cache_read_tokens(mock_client):
16261626 generation_props = generation_args ["properties" ]
16271627
16281628 assert generation_args ["event" ] == "$ai_generation"
1629- assert generation_props ["$ai_input_tokens" ] == 150
1629+ assert generation_props ["$ai_input_tokens" ] == 50
16301630 assert generation_props ["$ai_output_tokens" ] == 40
16311631 assert generation_props ["$ai_cache_read_input_tokens" ] == 100
16321632 assert generation_props ["$ai_cache_creation_input_tokens" ] == 0
@@ -1708,7 +1708,7 @@ def test_combined_reasoning_and_cache_tokens(mock_client):
17081708 generation_props = generation_args ["properties" ]
17091709
17101710 assert generation_args ["event" ] == "$ai_generation"
1711- assert generation_props ["$ai_input_tokens" ] == 500
1711+ assert generation_props ["$ai_input_tokens" ] == 200
17121712 assert generation_props ["$ai_output_tokens" ] == 100
17131713 assert generation_props ["$ai_cache_read_input_tokens" ] == 300
17141714 assert generation_props ["$ai_cache_creation_input_tokens" ] == 0
@@ -1879,6 +1879,212 @@ def test_tool_definition(mock_client):
18791879 assert props ["$ai_tools" ] == tools
18801880
18811881
1882+ def test_cache_read_tokens_subtraction_from_input_tokens (mock_client ):
1883+ """Test that cache_read_tokens are properly subtracted from input_tokens.
1884+
1885+ This tests the logic in callbacks.py lines 757-758:
1886+ if normalized_usage.input_tokens and normalized_usage.cache_read_tokens:
1887+ normalized_usage.input_tokens = max(normalized_usage.input_tokens - normalized_usage.cache_read_tokens, 0)
1888+ """
1889+ prompt = ChatPromptTemplate .from_messages (
1890+ [("user" , "Use the cached prompt for this request" )]
1891+ )
1892+
1893+ # Scenario 1: input_tokens includes cache_read_tokens (typical case)
1894+ # input_tokens=150 includes 100 cache_read tokens, so actual input is 50
1895+ model = FakeMessagesListChatModel (
1896+ responses = [
1897+ AIMessage (
1898+ content = "Response using cached prompt context." ,
1899+ usage_metadata = {
1900+ "input_tokens" : 150 , # Total includes cache reads
1901+ "output_tokens" : 40 ,
1902+ "total_tokens" : 190 ,
1903+ "cache_read_input_tokens" : 100 , # 100 tokens read from cache
1904+ },
1905+ )
1906+ ]
1907+ )
1908+
1909+ callbacks = [CallbackHandler (mock_client )]
1910+ chain = prompt | model
1911+ result = chain .invoke ({}, config = {"callbacks" : callbacks })
1912+
1913+ assert result .content == "Response using cached prompt context."
1914+ assert mock_client .capture .call_count == 3
1915+
1916+ generation_args = mock_client .capture .call_args_list [1 ][1 ]
1917+ generation_props = generation_args ["properties" ]
1918+
1919+ assert generation_args ["event" ] == "$ai_generation"
1920+ # Input tokens should be reduced: 150 - 100 = 50
1921+ assert generation_props ["$ai_input_tokens" ] == 50
1922+ assert generation_props ["$ai_output_tokens" ] == 40
1923+ assert generation_props ["$ai_cache_read_input_tokens" ] == 100
1924+
1925+
1926+ def test_cache_read_tokens_subtraction_prevents_negative (mock_client ):
1927+ """Test that cache_read_tokens subtraction doesn't result in negative input_tokens.
1928+
1929+ This tests the max(..., 0) part of the logic in callbacks.py lines 757-758.
1930+ """
1931+ prompt = ChatPromptTemplate .from_messages (
1932+ [("user" , "Edge case with large cache read" )]
1933+ )
1934+
1935+ # Edge case: cache_read_tokens >= input_tokens
1936+ # This could happen in some API responses where accounting differs
1937+ model = FakeMessagesListChatModel (
1938+ responses = [
1939+ AIMessage (
1940+ content = "Response with edge case token counts." ,
1941+ usage_metadata = {
1942+ "input_tokens" : 80 ,
1943+ "output_tokens" : 20 ,
1944+ "total_tokens" : 100 ,
1945+ "cache_read_input_tokens" : 100 , # More than input_tokens
1946+ },
1947+ )
1948+ ]
1949+ )
1950+
1951+ callbacks = [CallbackHandler (mock_client )]
1952+ chain = prompt | model
1953+ result = chain .invoke ({}, config = {"callbacks" : callbacks })
1954+
1955+ assert result .content == "Response with edge case token counts."
1956+ assert mock_client .capture .call_count == 3
1957+
1958+ generation_args = mock_client .capture .call_args_list [1 ][1 ]
1959+ generation_props = generation_args ["properties" ]
1960+
1961+ assert generation_args ["event" ] == "$ai_generation"
1962+ # Input tokens should be 0, not negative: max(80 - 100, 0) = 0
1963+ assert generation_props ["$ai_input_tokens" ] == 0
1964+ assert generation_props ["$ai_output_tokens" ] == 20
1965+ assert generation_props ["$ai_cache_read_input_tokens" ] == 100
1966+
1967+
1968+ def test_no_cache_read_tokens_no_subtraction (mock_client ):
1969+ """Test that when there are no cache_read_tokens, input_tokens remain unchanged.
1970+
1971+ This tests the conditional check before the subtraction in callbacks.py line 757.
1972+ """
1973+ prompt = ChatPromptTemplate .from_messages (
1974+ [("user" , "Normal request without cache" )]
1975+ )
1976+
1977+ # No cache usage - input_tokens should remain as-is
1978+ model = FakeMessagesListChatModel (
1979+ responses = [
1980+ AIMessage (
1981+ content = "Response without cache." ,
1982+ usage_metadata = {
1983+ "input_tokens" : 100 ,
1984+ "output_tokens" : 30 ,
1985+ "total_tokens" : 130 ,
1986+ # No cache_read_input_tokens
1987+ },
1988+ )
1989+ ]
1990+ )
1991+
1992+ callbacks = [CallbackHandler (mock_client )]
1993+ chain = prompt | model
1994+ result = chain .invoke ({}, config = {"callbacks" : callbacks })
1995+
1996+ assert result .content == "Response without cache."
1997+ assert mock_client .capture .call_count == 3
1998+
1999+ generation_args = mock_client .capture .call_args_list [1 ][1 ]
2000+ generation_props = generation_args ["properties" ]
2001+
2002+ assert generation_args ["event" ] == "$ai_generation"
2003+ # Input tokens should remain unchanged at 100
2004+ assert generation_props ["$ai_input_tokens" ] == 100
2005+ assert generation_props ["$ai_output_tokens" ] == 30
2006+ assert generation_props ["$ai_cache_read_input_tokens" ] == 0
2007+
2008+
2009+ def test_zero_input_tokens_with_cache_read (mock_client ):
2010+ """Test edge case where input_tokens is 0 but cache_read_tokens exist.
2011+
2012+ This tests the falsy check in the conditional (line 757).
2013+ """
2014+ prompt = ChatPromptTemplate .from_messages ([("user" , "Edge case query" )])
2015+
2016+ # Edge case: input_tokens is 0 (falsy), should skip subtraction
2017+ model = FakeMessagesListChatModel (
2018+ responses = [
2019+ AIMessage (
2020+ content = "Response." ,
2021+ usage_metadata = {
2022+ "input_tokens" : 0 ,
2023+ "output_tokens" : 10 ,
2024+ "total_tokens" : 10 ,
2025+ "cache_read_input_tokens" : 50 ,
2026+ },
2027+ )
2028+ ]
2029+ )
2030+
2031+ callbacks = [CallbackHandler (mock_client )]
2032+ chain = prompt | model
2033+ result = chain .invoke ({}, config = {"callbacks" : callbacks })
2034+
2035+ assert result .content == "Response."
2036+ assert mock_client .capture .call_count == 3
2037+
2038+ generation_args = mock_client .capture .call_args_list [1 ][1 ]
2039+ generation_props = generation_args ["properties" ]
2040+
2041+ assert generation_args ["event" ] == "$ai_generation"
2042+ # Input tokens should remain 0 (no subtraction because input_tokens is falsy)
2043+ assert generation_props ["$ai_input_tokens" ] == 0
2044+ assert generation_props ["$ai_output_tokens" ] == 10
2045+ assert generation_props ["$ai_cache_read_input_tokens" ] == 50
2046+
2047+
2048+ def test_cache_write_tokens_not_subtracted_from_input (mock_client ):
2049+ """Test that cache_creation_input_tokens (cache write) do NOT affect input_tokens.
2050+
2051+ Only cache_read_tokens should be subtracted from input_tokens, not cache_write_tokens.
2052+ """
2053+ prompt = ChatPromptTemplate .from_messages ([("user" , "Create cache" )])
2054+
2055+ # Cache creation without cache read
2056+ model = FakeMessagesListChatModel (
2057+ responses = [
2058+ AIMessage (
2059+ content = "Creating cache." ,
2060+ usage_metadata = {
2061+ "input_tokens" : 1000 ,
2062+ "output_tokens" : 20 ,
2063+ "total_tokens" : 1020 ,
2064+ "cache_creation_input_tokens" : 800 , # Cache write, not read
2065+ },
2066+ )
2067+ ]
2068+ )
2069+
2070+ callbacks = [CallbackHandler (mock_client )]
2071+ chain = prompt | model
2072+ result = chain .invoke ({}, config = {"callbacks" : callbacks })
2073+
2074+ assert result .content == "Creating cache."
2075+ assert mock_client .capture .call_count == 3
2076+
2077+ generation_args = mock_client .capture .call_args_list [1 ][1 ]
2078+ generation_props = generation_args ["properties" ]
2079+
2080+ assert generation_args ["event" ] == "$ai_generation"
2081+ # Input tokens should NOT be reduced by cache_creation_input_tokens
2082+ assert generation_props ["$ai_input_tokens" ] == 1000
2083+ assert generation_props ["$ai_output_tokens" ] == 20
2084+ assert generation_props ["$ai_cache_creation_input_tokens" ] == 800
2085+ assert generation_props ["$ai_cache_read_input_tokens" ] == 0
2086+
2087+
18822088def test_agent_action_and_finish_imports ():
18832089 """
18842090 Regression test for LangChain 1.0+ compatibility (Issue #362).
0 commit comments