-
Notifications
You must be signed in to change notification settings - Fork 14
MCP-8 Add: Asset Search with Custom Metadata Filters in Atlan MCP Server #116
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
22c024b
18c4b04
ff5f808
56d4e6f
99b8d55
8652d58
bf9ccb9
8f0eaf4
7eb389f
d73074a
8bfe222
fcd70a5
150d08b
042babe
0a52351
7512a6b
93cc2f6
017d738
dafac19
e2f2654
a08271c
069a2cc
abe1430
3fa116d
9960d50
b049283
2860c2f
37142c5
582a125
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ | |
get_assets_by_dsl, | ||
traverse_lineage, | ||
update_assets, | ||
get_custom_metadata_context, | ||
create_glossary_category_assets, | ||
create_glossary_assets, | ||
create_glossary_term_assets, | ||
|
@@ -42,6 +43,7 @@ | |
@mcp.tool() | ||
def search_assets_tool( | ||
conditions=None, | ||
custom_metadata_conditions=None, | ||
negative_conditions=None, | ||
some_conditions=None, | ||
min_somes=1, | ||
|
@@ -65,6 +67,8 @@ def search_assets_tool( | |
Args: | ||
conditions (Dict[str, Any], optional): Dictionary of attribute conditions to match. | ||
Format: {"attribute_name": value} or {"attribute_name": {"operator": operator, "value": value}} | ||
custom_metadata_conditions (List[Dict[str, Any]], optional): List of custom metadata conditions to match. | ||
Format: [{"custom_metadata_filter": {"display_name": "Business Metadata Name", "property_filters": [{"property_name": "property", "property_value": "value", "operator": "eq"}]}}] | ||
negative_conditions (Dict[str, Any], optional): Dictionary of attribute conditions to exclude. | ||
Format: {"attribute_name": value} or {"attribute_name": {"operator": operator, "value": value}} | ||
some_conditions (Dict[str, Any], optional): Conditions for where_some() queries that require min_somes of them to match. | ||
|
@@ -110,6 +114,86 @@ def search_assets_tool( | |
include_attributes=["owner_users", "owner_groups"] | ||
) | ||
|
||
# Search for assets with custom metadata having a specific property filter (eq) | ||
assets = search_assets( | ||
custom_metadata_conditions=[{ | ||
"custom_metadata_filter": { | ||
"display_name": "Business Ownership", # This is the display name of the business metadata | ||
"property_filters": [{ | ||
"property_name": "business_owner", # This is the display name of the property | ||
"property_value": "John", # This is the value of the property | ||
"operator": "eq" | ||
}] | ||
} | ||
}], | ||
include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] | ||
) | ||
|
||
# Search for assets with custom metadata having a specific property filter (gt) | ||
assets = search_assets( | ||
custom_metadata_conditions=[{ | ||
"custom_metadata_filter": { | ||
"display_name": "Data Quality", | ||
"property_filters": [{ | ||
"property_name": "quality_score", | ||
"property_value": 80, | ||
"operator": "gt" | ||
}] | ||
} | ||
}], | ||
include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] | ||
) | ||
|
||
# Search for assets with custom metadata having multiple property filters (eq and gte) | ||
assets = search_assets( | ||
custom_metadata_conditions=[{ | ||
"custom_metadata_filter": { | ||
"display_name": "Data Governance", | ||
"property_filters": [ | ||
{ | ||
"property_name": "data_owner", | ||
"property_value": "John Smith", | ||
"operator": "eq" | ||
}, | ||
{ | ||
"property_name": "retention_period", | ||
"property_value": 365, | ||
"operator": "gte" | ||
} | ||
] | ||
} | ||
}], | ||
include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] | ||
) | ||
|
||
# Search for assets with custom metadata having multiple business metadata filters (eq and gte) | ||
assets = search_assets( | ||
custom_metadata_conditions=[ | ||
{ | ||
"custom_metadata_filter": { | ||
"display_name": "Data Classification", | ||
"property_filters": [{ | ||
"property_name": "sensitivity_level", | ||
"property_value": "sensitive", | ||
"operator": "eq" | ||
}] | ||
} | ||
}, | ||
{ | ||
"custom_metadata_filter": { | ||
"display_name": "Data Quality", | ||
"property_filters": [{ | ||
"property_name": "quality_score", | ||
"property_value": 80, | ||
"operator": "gte" | ||
}] | ||
} | ||
} | ||
], | ||
include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] | ||
) | ||
|
||
|
||
# Search for columns with specific certificate status | ||
columns = search_assets( | ||
asset_type="Column", | ||
|
@@ -234,6 +318,7 @@ def search_assets_tool( | |
try: | ||
# Parse JSON string parameters if needed | ||
conditions = parse_json_parameter(conditions) | ||
custom_metadata_conditions = parse_json_parameter(custom_metadata_conditions) | ||
negative_conditions = parse_json_parameter(negative_conditions) | ||
some_conditions = parse_json_parameter(some_conditions) | ||
date_range = parse_json_parameter(date_range) | ||
|
@@ -244,6 +329,7 @@ def search_assets_tool( | |
|
||
return search_assets( | ||
conditions, | ||
custom_metadata_conditions, | ||
negative_conditions, | ||
some_conditions, | ||
min_somes, | ||
|
@@ -694,6 +780,107 @@ def create_glossary_categories(categories) -> List[Dict[str, Any]]: | |
return create_glossary_category_assets(categories) | ||
|
||
|
||
@mcp.tool() | ||
def get_custom_metadata_context_tool() -> Dict[str, Any]: | ||
""" | ||
Fetch the custom metadata context for all business metadata definitions in the Atlan instance. | ||
|
||
This tool is used to get the custom metadata context for all business metadata definitions | ||
present in the Atlan instance. | ||
|
||
Eventually, this tool helps to prepare the payload for search_assets tool, when users | ||
want to search for assets with filters on custom metadata. | ||
|
||
This tool can only be called once in a chat conversation. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this required? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wanted to make sure through docstring that the If by default the context is maintained in a chat window by the LLM ( MCP Clients ), we can remove this |
||
|
||
Returns: | ||
List[Dict[str, Any]]: List of business metadata definitions, each containing: | ||
- prompt: Formatted string prompt for the business metadata definition | ||
- metadata: Dictionary with business metadata details including: | ||
- name: Internal name of the business metadata | ||
- display_name: Display name of the business metadata | ||
- description: Description of the business metadata | ||
- attributes: List of attribute definitions with name, display_name, data_type, description, and optional enumEnrichment | ||
- id: GUID of the business metadata definition | ||
|
||
Raises: | ||
Exception: If there's an error retrieving the custom metadata context | ||
|
||
Examples: | ||
SatabrataPaul-GitAc marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# Step 1: Get custom metadata context to understand available business metadata | ||
context = get_custom_metadata_context_tool() | ||
|
||
# Step 2: Use the context to prepare custom_metadata_conditions for search_assets_tool | ||
# Example context result might show business metadata like "Data Classification" with attributes | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also is there a need for adding these here compared to the search tool? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are generic examples which are added as a part of the |
||
|
||
# Example 1: Equality operator (eq) - exact match | ||
assets = search_assets_tool( | ||
asset_type="Table", | ||
custom_metadata_conditions=[{ | ||
"custom_metadata_filter": { | ||
"display_name": "Data Classification", # This is the display name of the business metadata | ||
"property_filters": [{ | ||
"property_name": "sensitivity_level", # This is the display name of the property | ||
"property_value": "sensitive", # This is the value of the property | ||
"operator": "eq" | ||
}] | ||
} | ||
}], | ||
include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If they are searching on the CMs add them to the include attributes as well |
||
) | ||
|
||
# Example 2: Equality with case insensitive matching | ||
assets = search_assets_tool( | ||
custom_metadata_conditions=[{ | ||
"custom_metadata_filter": { | ||
"display_name": "Data Classification", | ||
"property_filters": [{ | ||
"property_name": "sensitivity_level", | ||
"property_value": "SENSITIVE", | ||
"operator": "eq", | ||
"case_insensitive": True | ||
}] | ||
} | ||
}], | ||
include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] | ||
) | ||
|
||
# Example 3: Starts with operator with case insensitive matching | ||
assets = search_assets_tool( | ||
custom_metadata_conditions=[{ | ||
"custom_metadata_filter": { | ||
"display_name": "Business Ownership", | ||
"property_filters": [{ | ||
"property_name": "business_owner", | ||
"property_value": "john", | ||
"operator": "startswith", | ||
"case_insensitive": True | ||
}] | ||
} | ||
}], | ||
include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] | ||
) | ||
|
||
# Example 4: Has any value operator (has_any_value) - check if field is populated | ||
assets = search_assets_tool( | ||
custom_metadata_conditions=[{ | ||
"custom_metadata_filter": { | ||
"display_name": "Business Ownership", | ||
"property_filters": [{ | ||
"property_name": "business_owner", | ||
"operator": "has_any_value" | ||
}] | ||
} | ||
}], | ||
include_attributes=["name", "qualified_name", "type_name", "description", "certificate_status"] | ||
) | ||
""" | ||
try: | ||
return get_custom_metadata_context() | ||
except Exception as e: | ||
return {"error": f"Error getting custom metadata context: {str(e)}"} | ||
|
||
|
||
def main(): | ||
mcp.run() | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,11 @@ | ||
"""Configuration settings for the application.""" | ||
|
||
import requests | ||
from typing import Any, Dict, Optional | ||
from urllib.parse import urlencode | ||
|
||
from pydantic_settings import BaseSettings | ||
|
||
from version import __version__ as MCP_VERSION | ||
|
||
|
||
|
@@ -12,6 +17,7 @@ class Settings(BaseSettings): | |
ATLAN_AGENT_ID: str = "NA" | ||
ATLAN_AGENT: str = "atlan-mcp" | ||
ATLAN_MCP_USER_AGENT: str = f"Atlan MCP Server {MCP_VERSION}" | ||
ATLAN_TYPEDEF_API_ENDPOINT: Optional[str] = "/api/meta/types/typedefs/" | ||
|
||
@property | ||
def headers(self) -> dict: | ||
|
@@ -23,6 +29,70 @@ def headers(self) -> dict: | |
"X-Atlan-Client-Origin": self.ATLAN_AGENT, | ||
} | ||
|
||
@staticmethod | ||
def build_api_url(path: str, query_params: Optional[Dict[str, Any]] = None) -> str: | ||
current_settings = Settings() | ||
if not current_settings: | ||
raise ValueError( | ||
"Atlan API URL (ATLAN_API_URL) is not configured in settings." | ||
) | ||
|
||
base_url = current_settings.ATLAN_BASE_URL.rstrip("/") | ||
|
||
if ( | ||
path | ||
and not path.startswith("/") | ||
and not base_url.endswith("/") | ||
and not path.startswith(("http://", "https://")) | ||
): | ||
full_path = f"{base_url}/{path.lstrip('/')}" | ||
elif path.startswith(("http://", "https://")): | ||
full_path = path | ||
else: | ||
full_path = f"{base_url}{path}" | ||
|
||
if query_params: | ||
active_query_params = { | ||
k: v for k, v in query_params.items() if v is not None | ||
} | ||
if active_query_params: | ||
query_string = urlencode(active_query_params) | ||
return f"{full_path}?{query_string}" | ||
return full_path | ||
SatabrataPaul-GitAc marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
@staticmethod | ||
def get_atlan_typedef_api_endpoint(param: str) -> str: | ||
current_settings = Settings() | ||
if not current_settings.ATLAN_TYPEDEF_API_ENDPOINT: | ||
raise ValueError( | ||
"Default API endpoint for typedefs (api_endpoint) is not configured in settings." | ||
) | ||
|
||
return Settings.build_api_url( | ||
path=current_settings.ATLAN_TYPEDEF_API_ENDPOINT, | ||
query_params={"type": param}, | ||
) | ||
|
||
@staticmethod | ||
def make_request(url: str) -> Optional[Dict[str, Any]]: | ||
current_settings = Settings() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is this initialization required? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The following variables are defined as class variables : ATLAN_BASE_URL: str The values for In the following @staticmethods :
Hence, the initialization ( instance creation ) is necessary |
||
headers = { | ||
"Authorization": f"Bearer {current_settings.ATLAN_API_KEY}", | ||
"x-atlan-client-origin": "atlan-search-app", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why add these and not just leverage the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The API call mechanism is added to get additional context of custom metadata attributes which are of Enum type ( i.e.: Options ) -> which have a fixed set of values The CustomMetadataCache does have method to get information of on all custom metadata definitions, including attribute definitions, but no context of enum defs can be retrieved Hence, the API call mechanism addresses both custom metadata definitions ( with attribute defs ) and provide additional context of attribute defs which are of Enum Type There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
} | ||
try: | ||
response = requests.get( | ||
url, | ||
headers=headers, | ||
) | ||
if response.status_code != 200: | ||
raise Exception( | ||
f"Failed to make request to {url}: {response.status_code} {response.text}" | ||
) | ||
return response.json() | ||
except Exception as e: | ||
raise Exception(f"Failed to make request to {url}: {e}") | ||
|
||
class Config: | ||
env_file = ".env" | ||
env_file_encoding = "utf-8" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why create a specific filter for this here? Why not make it part of the normal conditions?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you wouldn't have to define all the context and convertors as well. If the LLM understands how to use the CM context, they can use the unique ids as well
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Current Search Implementation
The search implementation in
/Users/satabrata.paul/Desktop/atlan-github-repos/agent-toolkit-internal/modelcontextprotocol/tools/search.py
processes conditions through:Standard Asset Attributes Processing
SearchUtils._get_asset_attribute(attr_name)
which callsgetattr(Asset, attr_name.upper(), None)
to get built-in Asset class attributesSearchUtils._process_condition()
which applies operators likeeq
,contains
,startswith
, etc. directly on Asset attributesWhy Custom Metadata Conditions Need Separate Handling
The separation between normal conditions and
custom_metadata_conditions
is necessary because they use fundamentally different PyAtlan APIs and attribute resolution mechanisms:Comparison Table
Asset.NAME
,Asset.DESCRIPTION
(direct class attributes)
CustomMetadataField(set_name="...", attribute_name="...")
(requires set name + field name)
Asset
class attributes directlyCustomMetadataField
class instantiationThis architectural difference in PyAtlan necessitates separate processing logic
@firecast