diff --git a/pyproject.toml b/pyproject.toml index f41bdaf..48dea49 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,17 +13,28 @@ dependencies = [ "azure-search-documents>=11.5.2", "azure-cli>=2.60.0", "azure-ai-evaluation>=1.3.0", - "azure-ai-projects==1.0.0b10" + "azure-ai-projects==1.0.0b10", ] [dependency-groups] test = [ "pytest>=8.3.5", "pytest-asyncio>=0.26.0", + "tenacity>=9.1.2", + "tqdm>=4.67.1", + "tool-usage-evals>=0.1.4", ] + [tool.pytest.ini_options] asyncio_default_fixture_loop_scope = "function" # or "module", "session" based on my use case pythonpath = ["src"] [project.scripts] run-azure-ai-foundry-mcp = "mcp_foundry.__main__:main" + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src/mcp_foundry"] diff --git a/src/mcp_foundry/__main__.py b/src/mcp_foundry/__main__.py index 1fd17dd..5b14cde 100644 --- a/src/mcp_foundry/__main__.py +++ b/src/mcp_foundry/__main__.py @@ -5,7 +5,7 @@ from typing import Literal from dotenv import load_dotenv -from .mcp_server import mcp, auto_import_modules +from mcp_foundry.mcp_server import mcp, auto_import_modules # Configure logging @@ -16,15 +16,19 @@ ) logger = logging.getLogger("__main__") + def main() -> None: """Runs the MCP server""" parser = ArgumentParser(description="Start the MCP service with provided or default configuration.") - parser.add_argument('--transport', required=False, default='stdio', - help='Transport protocol (sse | stdio | streamable-http) (default: stdio)') - parser.add_argument('--envFile', required=False, default='.env', - help='Path to .env file (default: .env)') + parser.add_argument( + "--transport", + required=False, + default="stdio", + help="Transport protocol (sse | stdio | streamable-http) (default: stdio)", + ) + parser.add_argument("--envFile", required=False, default=".env", help="Path to .env file (default: .env)") # Parse the application arguments args = parser.parse_args() diff --git a/src/mcp_foundry/mcp_foundry_model/tools.py b/src/mcp_foundry/mcp_foundry_model/tools.py index be67ea5..a88fc79 100644 --- a/src/mcp_foundry/mcp_foundry_model/tools.py +++ b/src/mcp_foundry/mcp_foundry_model/tools.py @@ -35,8 +35,9 @@ ) logger = logging.getLogger("mcp_foundry_model") + @mcp.tool() -async def list_models_from_model_catalog(ctx: Context, search_for_free_playground: bool = False, publisher_name = "", license_name = "") -> str: +async def list_models_from_model_catalog(ctx: Context, search_for_free_playground: bool = False, publisher_name: str = "", license_name: str = "") -> str: """ Retrieves a list of supported models from the Azure AI Foundry catalog. diff --git a/tests/test_mcp.py b/tests/test_mcp.py index 4c27b40..940ecca 100644 --- a/tests/test_mcp.py +++ b/tests/test_mcp.py @@ -1,13 +1,57 @@ +from tool_usage_evals.multi_step import run_agent_turn +from pathlib import Path +from tool_usage_evals.mcp_handling import ( + mcp_session_context_manager, + extract_tool_definitions, + build_mcp_tool_caller, +) +import os import pytest from mcp import ClientSession, StdioServerParameters from mcp.client.stdio import stdio_client +from openai import AzureOpenAI + +from azure.identity import DefaultAzureCredential, get_bearer_token_provider +from dotenv import load_dotenv +import openai +from tenacity import ( + retry, + wait_random_exponential, + stop_after_attempt, + retry_if_exception_type, +) +from tqdm import tqdm + + +retry_decorator = retry( + retry=retry_if_exception_type(openai.RateLimitError), + wait=wait_random_exponential(min=10, max=90), + stop=stop_after_attempt(6), + reraise=True, +) + +load_dotenv() +MCP_SERVER_SCRIPT = Path(__file__).parent / "../src/mcp_foundry/__main__.py" + + +@pytest.fixture(scope="session") +def aoai_client() -> AzureOpenAI: + """Azure OpenAI client""" + token_provider = get_bearer_token_provider(DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default") + client = AzureOpenAI( + azure_ad_token_provider=token_provider, + azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"], + api_version=os.environ["AZURE_OPENAI_API_VERSION"], + ) + return client + @pytest.mark.integration @pytest.mark.asyncio -async def test_mcp_client_lists_tools(): +async def test_mcp_client_lists_tools_using_pipx(): server_params = StdioServerParameters( command="pipx", - args=["run", "--no-cache", "--spec", "..", "run-azure-foundry-mcp"], + args=["run", "--no-cache", "--spec", "..", "run-azure-ai-foundry-mcp"], ) async with stdio_client(server_params) as (stdio, write): @@ -17,9 +61,77 @@ async def test_mcp_client_lists_tools(): tools = response.tools assert tools, "Expected at least one tool from the MCP server" - -#TODO: Add tools that take prompts and test that the correct tool(s) are selected -#TODO: Find way to only create client once per test module or make it faster -#TODO: Add LLM to client -##TODO: Make LLM easily configurable -##TODO: Make it so we can test against multiple LLMs + +@pytest.mark.integration +@pytest.mark.asyncio +async def test_mcp_client_message_1(aoai_client) -> None: + """Test tool usage for a user message asking about foundry labs projects.""" + user_message = "What are the projects in Azure AI Foundry Labs?" + async with mcp_session_context_manager("python", [str(MCP_SERVER_SCRIPT)]) as session: + tools = await extract_tool_definitions(session) + call_tool_fn = await build_mcp_tool_caller(session) + + result = await retry_decorator(run_agent_turn)( + aoai_client=aoai_client, + model=os.environ["AZURE_OPENAI_DEPLOYMENT"], + tools=tools, + call_tool_fn=call_tool_fn, + user_message=user_message, + ) + + tool_call_names = [t.name for t in result.tool_calls] + assert "list_azure_ai_foundry_labs_projects" in tool_call_names + + +@pytest.mark.integration +@pytest.mark.asyncio +async def test_mcp_client_message_2(aoai_client) -> None: + """Test tool usage for a user message asking about prototyping with foundry labs projects.""" + user_message = "I want to prototype an app with Azure AI Foundry Labs. Where do I start?" + async with mcp_session_context_manager("python", [str(MCP_SERVER_SCRIPT)]) as session: + tools = await extract_tool_definitions(session) + call_tool_fn = await build_mcp_tool_caller(session) + + result = await retry_decorator(run_agent_turn)( + aoai_client=aoai_client, + model=os.environ["AZURE_OPENAI_DEPLOYMENT"], + tools=tools, + call_tool_fn=call_tool_fn, + user_message=user_message, + ) + + tool_call_names = [t.name for t in result.tool_calls] + assert "get_prototyping_instructions_for_github_and_labs" in tool_call_names + + +@pytest.mark.integration +@pytest.mark.asyncio +async def test_mcp_client_message_3(aoai_client) -> None: + """ + Test tool usage for a user message asking code/implementation details. + Because of stochasticity of response (sometimes uses the prototyping tool or list-projects + tool instead of intended code-samples tool), we do n repeated trials. + """ + user_message = "Give me code and implementation details for the Aurora model." + n_trials = 3 + async with mcp_session_context_manager("python", [str(MCP_SERVER_SCRIPT)]) as session: + tools = await extract_tool_definitions(session) + call_tool_fn = await build_mcp_tool_caller(session) + + results = [] + for trial in tqdm(range(n_trials)): + result = await retry_decorator(run_agent_turn)( + aoai_client=aoai_client, + model=os.environ["AZURE_OPENAI_DEPLOYMENT"], + tools=tools, + call_tool_fn=call_tool_fn, + user_message=user_message, + ) + results.append(result) + + all_tool_call_names = [[t.name for t in result.tool_calls] for result in results] + + n_found_correct_tool = sum(["get_model_details_and_code_samples" in names for names in all_tool_call_names]) + accuracy = n_found_correct_tool / n_trials + + assert accuracy > 0.5 diff --git a/uv.lock b/uv.lock index 6663508..b73f619 100644 --- a/uv.lock +++ b/uv.lock @@ -1920,7 +1920,7 @@ name = "exceptiongroup" version = "1.3.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/0b/9f/a65090624ecf468cdca03533906e7c69ed7588582240cfe7cc9e770b50eb/exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88", size = 29749, upload-time = "2025-05-10T17:42:51.123Z" } wheels = [ @@ -2599,7 +2599,7 @@ wheels = [ [[package]] name = "mcp" -version = "1.9.0" +version = "1.9.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -2612,15 +2612,15 @@ dependencies = [ { name = "starlette" }, { name = "uvicorn", marker = "sys_platform != 'emscripten'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/bc/8d/0f4468582e9e97b0a24604b585c651dfd2144300ecffd1c06a680f5c8861/mcp-1.9.0.tar.gz", hash = "sha256:905d8d208baf7e3e71d70c82803b89112e321581bcd2530f9de0fe4103d28749", size = 281432, upload-time = "2025-05-15T18:51:06.615Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e7/bc/54aec2c334698cc575ca3b3481eed627125fb66544152fa1af927b1a495c/mcp-1.9.1.tar.gz", hash = "sha256:19879cd6dde3d763297617242888c2f695a95dfa854386a6a68676a646ce75e4", size = 316247, upload-time = "2025-05-22T15:52:21.26Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a5/d5/22e36c95c83c80eb47c83f231095419cf57cf5cca5416f1c960032074c78/mcp-1.9.0-py3-none-any.whl", hash = "sha256:9dfb89c8c56f742da10a5910a1f64b0d2ac2c3ed2bd572ddb1cfab7f35957178", size = 125082, upload-time = "2025-05-15T18:51:04.916Z" }, + { url = "https://files.pythonhosted.org/packages/a6/c0/4ac795585a22a0a2d09cd2b1187b0252d2afcdebd01e10a68bbac4d34890/mcp-1.9.1-py3-none-any.whl", hash = "sha256:2900ded8ffafc3c8a7bfcfe8bc5204037e988e753ec398f371663e6a06ecd9a9", size = 130261, upload-time = "2025-05-22T15:52:19.702Z" }, ] [[package]] name = "mcp-foundry" version = "0.1.0" -source = { virtual = "." } +source = { editable = "." } dependencies = [ { name = "azure-ai-evaluation" }, { name = "azure-ai-projects" }, @@ -2637,6 +2637,9 @@ dependencies = [ test = [ { name = "pytest" }, { name = "pytest-asyncio" }, + { name = "tenacity" }, + { name = "tool-usage-evals" }, + { name = "tqdm" }, ] [package.metadata] @@ -2656,6 +2659,9 @@ requires-dist = [ test = [ { name = "pytest", specifier = ">=8.3.5" }, { name = "pytest-asyncio", specifier = ">=0.26.0" }, + { name = "tenacity", specifier = ">=9.1.2" }, + { name = "tool-usage-evals", specifier = ">=0.1.4" }, + { name = "tqdm", specifier = ">=4.67.1" }, ] [[package]] @@ -2923,7 +2929,7 @@ wheels = [ [[package]] name = "openai" -version = "1.79.0" +version = "1.82.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -2935,9 +2941,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/52/cf/4901077dbbfd0d82a814d721600fa0c3a61a093d7f0bf84d0e4732448dc9/openai-1.79.0.tar.gz", hash = "sha256:e3b627aa82858d3e42d16616edc22aa9f7477ee5eb3e6819e9f44a961d899a4c", size = 444736, upload-time = "2025-05-16T19:49:59.738Z" } +sdist = { url = "https://files.pythonhosted.org/packages/3f/19/6b09bb3132f7e1a7a2291fd46fb33659bbccca041f863abd682e14ba86d7/openai-1.82.0.tar.gz", hash = "sha256:b0a009b9a58662d598d07e91e4219ab4b1e3d8ba2db3f173896a92b9b874d1a7", size = 461092, upload-time = "2025-05-22T20:08:07.282Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/81/d2/e3992bb7c6641b765c1008e3c96e076e0b50381be2cce344e6ff177bad80/openai-1.79.0-py3-none-any.whl", hash = "sha256:d5050b92d5ef83f869cb8dcd0aca0b2291c3413412500eec40c66981b3966992", size = 683334, upload-time = "2025-05-16T19:49:57.445Z" }, + { url = "https://files.pythonhosted.org/packages/51/4b/a59464ee5f77822a81ee069b4021163a0174940a92685efc3cf8b4c443a3/openai-1.82.0-py3-none-any.whl", hash = "sha256:8c40647fea1816516cb3de5189775b30b5f4812777e40b8768f361f232b61b30", size = 720412, upload-time = "2025-05-22T20:08:05.637Z" }, ] [[package]] @@ -3404,7 +3410,7 @@ wheels = [ [[package]] name = "pydantic" -version = "2.11.4" +version = "2.11.5" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "annotated-types" }, @@ -3412,9 +3418,9 @@ dependencies = [ { name = "typing-extensions" }, { name = "typing-inspection" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/77/ab/5250d56ad03884ab5efd07f734203943c8a8ab40d551e208af81d0257bf2/pydantic-2.11.4.tar.gz", hash = "sha256:32738d19d63a226a52eed76645a98ee07c1f410ee41d93b4afbfa85ed8111c2d", size = 786540, upload-time = "2025-04-29T20:38:55.02Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f0/86/8ce9040065e8f924d642c58e4a344e33163a07f6b57f836d0d734e0ad3fb/pydantic-2.11.5.tar.gz", hash = "sha256:7f853db3d0ce78ce8bbb148c401c2cdd6431b3473c0cdff2755c7690952a7b7a", size = 787102, upload-time = "2025-05-22T21:18:08.761Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/e7/12/46b65f3534d099349e38ef6ec98b1a5a81f42536d17e0ba382c28c67ba67/pydantic-2.11.4-py3-none-any.whl", hash = "sha256:d9615eaa9ac5a063471da949c8fc16376a84afb5024688b3ff885693506764eb", size = 443900, upload-time = "2025-04-29T20:38:52.724Z" }, + { url = "https://files.pythonhosted.org/packages/b5/69/831ed22b38ff9b4b64b66569f0e5b7b97cf3638346eb95a2147fdb49ad5f/pydantic-2.11.5-py3-none-any.whl", hash = "sha256:f9c26ba06f9747749ca1e5c94d6a85cb84254577553c8785576fd38fa64dc0f7", size = 444229, upload-time = "2025-05-22T21:18:06.329Z" }, ] [[package]] @@ -4219,6 +4225,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252, upload-time = "2022-10-06T17:21:44.262Z" }, ] +[[package]] +name = "tenacity" +version = "9.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0a/d4/2b0cd0fe285e14b36db076e78c93766ff1d529d70408bd1d2a5a84f1d929/tenacity-9.1.2.tar.gz", hash = "sha256:1169d376c297e7de388d18b4481760d478b0e99a777cad3a9c86e556f4b697cb", size = 48036, upload-time = "2025-04-02T08:25:09.966Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e5/30/643397144bfbfec6f6ef821f36f33e57d35946c44a2352d3c9f0ae847619/tenacity-9.1.2-py3-none-any.whl", hash = "sha256:f77bf36710d8b73a50b2dd155c97b870017ad21afe6ab300326b0371b3b05138", size = 28248, upload-time = "2025-04-02T08:25:07.678Z" }, +] + [[package]] name = "tiktoken" version = "0.9.0" @@ -4294,6 +4309,23 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6e/c2/61d3e0f47e2b74ef40a68b9e6ad5984f6241a942f7cd3bbfbdbd03861ea9/tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc", size = 14257, upload-time = "2024-11-27T22:38:35.385Z" }, ] +[[package]] +name = "tool-usage-evals" +version = "0.1.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "azure-identity" }, + { name = "mcp" }, + { name = "openai" }, + { name = "pydantic" }, + { name = "python-dotenv" }, + { name = "tenacity" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/68/5c/57e6940cbb191a982dee71863c125aad943a84b4f30d4ce08a522642f764/tool_usage_evals-0.1.4.tar.gz", hash = "sha256:7abbeb257183a8fbe818b7f0e793a5afe669ff96e2dfc3ae28f87a4ae41b8731", size = 49210, upload-time = "2025-05-30T00:49:51.715Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/09/73/298e98da166fbd16820affbed4c213bc22af77fd2e68df3381add4f186db/tool_usage_evals-0.1.4-py3-none-any.whl", hash = "sha256:732b167927697bf19bb924c2341f27056e591ad6496023b288884e8081d11ce1", size = 5578, upload-time = "2025-05-30T00:49:50.49Z" }, +] + [[package]] name = "tqdm" version = "4.67.1"