Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,28 @@ dependencies = [
"azure-search-documents>=11.5.2",
"azure-cli>=2.60.0",
"azure-ai-evaluation>=1.3.0",
"azure-ai-projects==1.0.0b10"
"azure-ai-projects==1.0.0b10",
]

[dependency-groups]
test = [
"pytest>=8.3.5",
"pytest-asyncio>=0.26.0",
"tenacity>=9.1.2",
"tqdm>=4.67.1",
"tool-usage-evals>=0.1.4",
]

[tool.pytest.ini_options]
asyncio_default_fixture_loop_scope = "function" # or "module", "session" based on my use case
pythonpath = ["src"]

[project.scripts]
run-azure-ai-foundry-mcp = "mcp_foundry.__main__:main"

[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.hatch.build.targets.wheel]
packages = ["src/mcp_foundry"]
14 changes: 9 additions & 5 deletions src/mcp_foundry/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from typing import Literal
from dotenv import load_dotenv

from .mcp_server import mcp, auto_import_modules
from mcp_foundry.mcp_server import mcp, auto_import_modules


# Configure logging
Expand All @@ -16,15 +16,19 @@
)
logger = logging.getLogger("__main__")


def main() -> None:
"""Runs the MCP server"""

parser = ArgumentParser(description="Start the MCP service with provided or default configuration.")

parser.add_argument('--transport', required=False, default='stdio',
help='Transport protocol (sse | stdio | streamable-http) (default: stdio)')
parser.add_argument('--envFile', required=False, default='.env',
help='Path to .env file (default: .env)')
parser.add_argument(
"--transport",
required=False,
default="stdio",
help="Transport protocol (sse | stdio | streamable-http) (default: stdio)",
)
parser.add_argument("--envFile", required=False, default=".env", help="Path to .env file (default: .env)")

# Parse the application arguments
args = parser.parse_args()
Expand Down
3 changes: 2 additions & 1 deletion src/mcp_foundry/mcp_foundry_model/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,9 @@
)
logger = logging.getLogger("mcp_foundry_model")


@mcp.tool()
async def list_models_from_model_catalog(ctx: Context, search_for_free_playground: bool = False, publisher_name = "", license_name = "") -> str:
async def list_models_from_model_catalog(ctx: Context, search_for_free_playground: bool = False, publisher_name: str = "", license_name: str = "") -> str:
"""
Retrieves a list of supported models from the Azure AI Foundry catalog.

Expand Down
128 changes: 120 additions & 8 deletions tests/test_mcp.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,57 @@
from tool_usage_evals.multi_step import run_agent_turn
from pathlib import Path
from tool_usage_evals.mcp_handling import (
mcp_session_context_manager,
extract_tool_definitions,
build_mcp_tool_caller,
)
import os
import pytest
from mcp import ClientSession, StdioServerParameters
from mcp.client.stdio import stdio_client
from openai import AzureOpenAI

from azure.identity import DefaultAzureCredential, get_bearer_token_provider
from dotenv import load_dotenv
import openai
from tenacity import (
retry,
wait_random_exponential,
stop_after_attempt,
retry_if_exception_type,
)
from tqdm import tqdm


retry_decorator = retry(
retry=retry_if_exception_type(openai.RateLimitError),
wait=wait_random_exponential(min=10, max=90),
stop=stop_after_attempt(6),
reraise=True,
)

load_dotenv()
MCP_SERVER_SCRIPT = Path(__file__).parent / "../src/mcp_foundry/__main__.py"


@pytest.fixture(scope="session")
def aoai_client() -> AzureOpenAI:
"""Azure OpenAI client"""
token_provider = get_bearer_token_provider(DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default")
client = AzureOpenAI(
azure_ad_token_provider=token_provider,
azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
api_version=os.environ["AZURE_OPENAI_API_VERSION"],
)
return client


@pytest.mark.integration
@pytest.mark.asyncio
async def test_mcp_client_lists_tools():
async def test_mcp_client_lists_tools_using_pipx():
server_params = StdioServerParameters(
command="pipx",
args=["run", "--no-cache", "--spec", "..", "run-azure-foundry-mcp"],
args=["run", "--no-cache", "--spec", "..", "run-azure-ai-foundry-mcp"],
)

async with stdio_client(server_params) as (stdio, write):
Expand All @@ -17,9 +61,77 @@ async def test_mcp_client_lists_tools():
tools = response.tools
assert tools, "Expected at least one tool from the MCP server"


#TODO: Add tools that take prompts and test that the correct tool(s) are selected
#TODO: Find way to only create client once per test module or make it faster
#TODO: Add LLM to client
##TODO: Make LLM easily configurable
##TODO: Make it so we can test against multiple LLMs

@pytest.mark.integration
@pytest.mark.asyncio
async def test_mcp_client_message_1(aoai_client) -> None:
"""Test tool usage for a user message asking about foundry labs projects."""
user_message = "What are the projects in Azure AI Foundry Labs?"
async with mcp_session_context_manager("python", [str(MCP_SERVER_SCRIPT)]) as session:
tools = await extract_tool_definitions(session)
call_tool_fn = await build_mcp_tool_caller(session)

result = await retry_decorator(run_agent_turn)(
aoai_client=aoai_client,
model=os.environ["AZURE_OPENAI_DEPLOYMENT"],
tools=tools,
call_tool_fn=call_tool_fn,
user_message=user_message,
)

tool_call_names = [t.name for t in result.tool_calls]
assert "list_azure_ai_foundry_labs_projects" in tool_call_names


@pytest.mark.integration
@pytest.mark.asyncio
async def test_mcp_client_message_2(aoai_client) -> None:
"""Test tool usage for a user message asking about prototyping with foundry labs projects."""
user_message = "I want to prototype an app with Azure AI Foundry Labs. Where do I start?"
async with mcp_session_context_manager("python", [str(MCP_SERVER_SCRIPT)]) as session:
tools = await extract_tool_definitions(session)
call_tool_fn = await build_mcp_tool_caller(session)

result = await retry_decorator(run_agent_turn)(
aoai_client=aoai_client,
model=os.environ["AZURE_OPENAI_DEPLOYMENT"],
tools=tools,
call_tool_fn=call_tool_fn,
user_message=user_message,
)

tool_call_names = [t.name for t in result.tool_calls]
assert "get_prototyping_instructions_for_github_and_labs" in tool_call_names


@pytest.mark.integration
@pytest.mark.asyncio
async def test_mcp_client_message_3(aoai_client) -> None:
"""
Test tool usage for a user message asking code/implementation details.
Because of stochasticity of response (sometimes uses the prototyping tool or list-projects
tool instead of intended code-samples tool), we do n repeated trials.
"""
user_message = "Give me code and implementation details for the Aurora model."
n_trials = 3
async with mcp_session_context_manager("python", [str(MCP_SERVER_SCRIPT)]) as session:
tools = await extract_tool_definitions(session)
call_tool_fn = await build_mcp_tool_caller(session)

results = []
for trial in tqdm(range(n_trials)):
result = await retry_decorator(run_agent_turn)(
aoai_client=aoai_client,
model=os.environ["AZURE_OPENAI_DEPLOYMENT"],
tools=tools,
call_tool_fn=call_tool_fn,
user_message=user_message,
)
results.append(result)

all_tool_call_names = [[t.name for t in result.tool_calls] for result in results]

n_found_correct_tool = sum(["get_model_details_and_code_samples" in names for names in all_tool_call_names])
accuracy = n_found_correct_tool / n_trials

assert accuracy > 0.5
54 changes: 43 additions & 11 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.