|  | 
|  | 1 | +# Copyright (c) Meta Platforms, Inc. and affiliates. | 
|  | 2 | +# | 
|  | 3 | +# This source code is licensed under the MIT license found in the | 
|  | 4 | +# LICENSE file in the root directory of this source tree. | 
|  | 5 | + | 
|  | 6 | +"""Execute Python code using MCP server with mcp-run-python.""" | 
|  | 7 | + | 
|  | 8 | +import json | 
|  | 9 | +import os | 
|  | 10 | + | 
|  | 11 | +from tensordict import set_list_to_stack, TensorDict | 
|  | 12 | + | 
|  | 13 | +from torchrl.data.llm import History | 
|  | 14 | +from torchrl.envs.llm import ChatEnv | 
|  | 15 | +from torchrl.envs.llm.transforms import MCPToolTransform | 
|  | 16 | + | 
|  | 17 | +set_list_to_stack(True).set() | 
|  | 18 | + | 
|  | 19 | +deno_path = os.path.expanduser("~/.deno/bin") | 
|  | 20 | +if deno_path not in os.environ.get("PATH", ""): | 
|  | 21 | +    os.environ["PATH"] = f"{deno_path}:{os.environ['PATH']}" | 
|  | 22 | + | 
|  | 23 | +servers = { | 
|  | 24 | +    "python": { | 
|  | 25 | +        "command": "uvx", | 
|  | 26 | +        "args": ["mcp-run-python", "stdio"], | 
|  | 27 | +        "env": os.environ.copy(), | 
|  | 28 | +    } | 
|  | 29 | +} | 
|  | 30 | + | 
|  | 31 | +env = ChatEnv(batch_size=(1,)) | 
|  | 32 | +env = env.append_transform(MCPToolTransform(servers=servers)) | 
|  | 33 | + | 
|  | 34 | +reset_data = TensorDict(query="You are a helpful assistant", batch_size=(1,)) | 
|  | 35 | +td = env.reset(reset_data) | 
|  | 36 | + | 
|  | 37 | +history = td.get("history") | 
|  | 38 | + | 
|  | 39 | +code = """ | 
|  | 40 | +import math | 
|  | 41 | +result = math.sqrt(144) + math.pi | 
|  | 42 | +print(f"Result: {result}") | 
|  | 43 | +result | 
|  | 44 | +""" | 
|  | 45 | + | 
|  | 46 | +response = ( | 
|  | 47 | +    History( | 
|  | 48 | +        role="assistant", | 
|  | 49 | +        content=f'Let me calculate that.\n<tool>python.run_python_code\n{json.dumps({"python_code": code})}</tool>', | 
|  | 50 | +    ) | 
|  | 51 | +    .unsqueeze(0) | 
|  | 52 | +    .unsqueeze(0) | 
|  | 53 | +) | 
|  | 54 | + | 
|  | 55 | +history.full = history.prompt.extend(response, inplace=True, dim=-1) | 
|  | 56 | +history.response = response | 
|  | 57 | + | 
|  | 58 | +result = env.step(td.set("history", history)) | 
|  | 59 | + | 
|  | 60 | +print("Python code executed via MCP!") | 
|  | 61 | +print("\nTool response:") | 
|  | 62 | +tool_response = result["next", "history"].prompt[0, -1] | 
|  | 63 | +print(f"Role: {tool_response.role}") | 
|  | 64 | +print(f"Content: {tool_response.content}") | 
|  | 65 | + | 
|  | 66 | +fibonacci_code = """ | 
|  | 67 | +def fibonacci(n): | 
|  | 68 | +    if n <= 1: | 
|  | 69 | +        return n | 
|  | 70 | +    return fibonacci(n-1) + fibonacci(n-2) | 
|  | 71 | +
 | 
|  | 72 | +result = [fibonacci(i) for i in range(10)] | 
|  | 73 | +print(f"Fibonacci sequence: {result}") | 
|  | 74 | +result | 
|  | 75 | +""" | 
|  | 76 | + | 
|  | 77 | +history = result["next", "history"] | 
|  | 78 | +response2 = ( | 
|  | 79 | +    History( | 
|  | 80 | +        role="assistant", | 
|  | 81 | +        content=f'Now calculating Fibonacci.\n<tool>python.run_python_code\n{json.dumps({"python_code": fibonacci_code})}</tool>', | 
|  | 82 | +    ) | 
|  | 83 | +    .unsqueeze(0) | 
|  | 84 | +    .unsqueeze(0) | 
|  | 85 | +) | 
|  | 86 | + | 
|  | 87 | +history.full = history.prompt.extend(response2, inplace=True, dim=-1) | 
|  | 88 | +history.response = response2 | 
|  | 89 | + | 
|  | 90 | +result2 = env.step(result["next"].set("history", history)) | 
|  | 91 | + | 
|  | 92 | +print("\n\nSecond execution:") | 
|  | 93 | +print("\nTool response:") | 
|  | 94 | +tool_response2 = result2["next", "history"].prompt[0, -1] | 
|  | 95 | +print(f"Role: {tool_response2.role}") | 
|  | 96 | +print(f"Content: {tool_response2.content[:500]}...") | 
0 commit comments