From be46051e20d8c0d892e673291cd45d7b40355ff1 Mon Sep 17 00:00:00 2001 From: Ben Wilson <39283302+BenWilson2@users.noreply.github.com> Date: Tue, 3 Dec 2024 21:57:16 -0500 Subject: [PATCH] =?UTF-8?q?Add=20utility=20methods=20for=20catalog=20and?= =?UTF-8?q?=20schema=20creation=20to=20the=20AI=20Functio=E2=80=A6=20(#765?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …n client for OSS **PR Checklist** - [X] A description of the changes is added to the description of this PR. - [ ] If there is a related issue, make sure it is linked to this PR. - [ ] If you've fixed a bug or added code that should be tested, add tests! - [ ] If you've added or modified a feature, documentation in `docs` is updated **Description of changes** Adds the `create_catalog` and `create_schema` utility methods to the client to simplify the experience for users of the AI function client (along with the base async API handlers for these sync methods). Updates the quickstart to greatly simplify the process for running the tutorials for getting started (the previous examples were a tad intimidating). --------- Signed-off-by: Ben Wilson --- .../UnityCatalog_Functions_Quickstart.ipynb | 132 ++++------ ai/core/src/unitycatalog/ai/core/oss.py | 247 +++++++++++++++++- ai/core/tests/core/oss/test_oss_client.py | 182 +++++++++++-- docs/ai/client.md | 33 ++- 4 files changed, 480 insertions(+), 114 deletions(-) diff --git a/ai/core/UnityCatalog_Functions_Quickstart.ipynb b/ai/core/UnityCatalog_Functions_Quickstart.ipynb index 723ce0727..cca28edb8 100644 --- a/ai/core/UnityCatalog_Functions_Quickstart.ipynb +++ b/ai/core/UnityCatalog_Functions_Quickstart.ipynb @@ -12,7 +12,7 @@ "**Prerequisites for this tutorial**:\n", "\n", "1. A clone of the [Unity Catalog repository](https://github.com/unitycatalog/unitycatalog).\n", - " \n", + "\n", " ```sh\n", " git clone https://github.com/unitycatalog/unitycatalog\n", " ```\n", @@ -85,7 +85,7 @@ "\n", "This next cell sets up access to your running UnityCatalog server, creates a Catalog and a Schema that we will be using throughout the remainder of this tutorial. \n", "\n", - "> Note: The unitycatalog-client is an aiohttp-based package. When directly interfacing with the APIs in that package, make sure to use (as shown below with the `create_catalog` function) async interfaces when making calls. " + "> Note: The unitycatalog-client is an aiohttp-based package. When directly interfacing with the APIs in that package, make sure to use async interfaces when making calls. The `UnitycatalogFunctionClient` API offers synchronous (shown below) convenience methods for creating catalogs and schemas, though." ] }, { @@ -100,70 +100,30 @@ "SchemaInfo(name='AISchema', catalog_name='AICatalog', comment='This is a schema used for storing GenAI functions.', properties={}, full_name='AICatalog.AISchema', owner=None, created_at=1732297125510, created_by=None, updated_at=1732297125510, updated_by=None, schema_id='4925f53c-216a-44e7-9185-34c39fb9f51f')" ] }, - "execution_count": 13, + "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from unitycatalog.ai.core.oss import UnitycatalogFunctionClient\n", - "from unitycatalog.client import ApiClient, CatalogsApi, Configuration, SchemasApi\n", - "from unitycatalog.client.exceptions import NotFoundException\n", - "from unitycatalog.client.models import CreateCatalog, CreateSchema\n", + "from unitycatalog.client import ApiClient, Configuration\n", "\n", - "CATALOG = \"AICatalog\"\n", - "SCHEMA = \"AISchema\"\n", + "CATALOG = \"AICatalogDemonstration\"\n", + "SCHEMA = \"AISchemaDemonstration\"\n", "\n", - "config = Configuration()\n", - "config.host = \"http://localhost:8080/api/2.1/unity-catalog\"\n", + "config = Configuration(host=\"http://localhost:8080/api/2.1/unity-catalog\")\n", "client = ApiClient(configuration=config)\n", "\n", - "catalogs_api = CatalogsApi(api_client=client)\n", - "schemas_api = SchemasApi(api_client=client)\n", "uc_client = UnitycatalogFunctionClient(api_client=client)\n", "\n", - "\n", - "async def create_catalog(catalog_name, catalog_api, comment=None):\n", - " catalog = None\n", - " try:\n", - " catalog = await catalog_api.get_catalog(name=catalog_name)\n", - " except NotFoundException:\n", - " pass\n", - " if not catalog:\n", - " new_catalog = CreateCatalog(name=catalog_name, comment=comment or \"\")\n", - " return await catalog_api.create_catalog(create_catalog=new_catalog)\n", - " else:\n", - " return catalog\n", - "\n", - "\n", - "async def create_schema(schema_name, catalog_name, schema_api, comment=None):\n", - " schema = None\n", - " try:\n", - " schema = await schema_api.get_schema(f\"{catalog_name}.{schema_name}\")\n", - " except NotFoundException:\n", - " pass\n", - " if not schema:\n", - " new_schema = CreateSchema(\n", - " name=schema_name, catalog_name=catalog_name, comment=comment or \"\"\n", - " )\n", - " return await schema_api.create_schema(create_schema=new_schema)\n", - " else:\n", - " return schema\n", - "\n", - "\n", - "# Create the catalog if it doesn't exist\n", - "await create_catalog(\n", - " catalog_name=CATALOG,\n", - " catalog_api=catalogs_api,\n", - " comment=\"This is a catalog that is used for GenAI functions storage.\",\n", + "uc_client.uc.create_catalog(\n", + " name=CATALOG, comment=\"A demonstration catalog for the AI functionality in Unity Catalog.\"\n", ")\n", - "\n", - "# Create a schema if it doesn't exist\n", - "await create_schema(\n", - " schema_name=SCHEMA,\n", + "uc_client.uc.create_schema(\n", + " name=SCHEMA,\n", " catalog_name=CATALOG,\n", - " schema_api=schemas_api,\n", - " comment=\"This is a schema used for storing GenAI functions.\",\n", + " comment=\"A demonstration schema for holding tutorial Python functions for GenAI usage.\",\n", ")" ] }, @@ -187,17 +147,17 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 2, "id": "e30b66a8-3157-431a-8b5a-5795ec2d79d5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "FunctionInfo(name='my_test_func', catalog_name='AICatalog', schema_name='AISchema', input_params=FunctionParameterInfos(parameters=[FunctionParameterInfo(name='a', type_text='STRING', type_json='{\"name\": \"a\", \"type\": \"string\", \"nullable\": false, \"metadata\": {\"comment\": \"the first string\"}}', type_name=, type_precision=None, type_scale=None, type_interval_type=None, position=0, parameter_mode=None, parameter_type=None, parameter_default=None, comment='the first string'), FunctionParameterInfo(name='b', type_text='STRING', type_json='{\"name\": \"b\", \"type\": \"string\", \"nullable\": false, \"metadata\": {\"comment\": \"the second string\"}}', type_name=, type_precision=None, type_scale=None, type_interval_type=None, position=1, parameter_mode=None, parameter_type=None, parameter_default=None, comment='the second string')]), data_type=, full_data_type='STRING', return_params=None, routine_body='EXTERNAL', routine_definition='concatenated = f\"{a} {b}\"\\n\\n# Convert the concatenated string to uppercase\\nuppercased = concatenated.upper()\\n\\nreturn uppercased', routine_dependencies=None, parameter_style='S', is_deterministic=True, sql_data_access='NO_SQL', is_null_call=False, security_type='DEFINER', specific_name='my_test_func', comment='Returns an upper case concatenation of two strings separated by a space.', properties='null', full_name='AICatalog.AISchema.my_test_func', owner=None, created_at=1732303075179, created_by=None, updated_at=1732303075179, updated_by=None, function_id='c1e93917-6536-47cb-a2b7-549decadcdf5', external_language='PYTHON')" + "FunctionInfo(name='my_test_func', catalog_name='AICatalog', schema_name='AISchema', input_params=FunctionParameterInfos(parameters=[FunctionParameterInfo(name='a', type_text='STRING', type_json='{\"name\": \"a\", \"type\": \"string\", \"nullable\": false, \"metadata\": {\"comment\": \"the first string\"}}', type_name=, type_precision=None, type_scale=None, type_interval_type=None, position=0, parameter_mode=None, parameter_type=None, parameter_default=None, comment='the first string'), FunctionParameterInfo(name='b', type_text='STRING', type_json='{\"name\": \"b\", \"type\": \"string\", \"nullable\": false, \"metadata\": {\"comment\": \"the second string\"}}', type_name=, type_precision=None, type_scale=None, type_interval_type=None, position=1, parameter_mode=None, parameter_type=None, parameter_default=None, comment='the second string')]), data_type=, full_data_type='STRING', return_params=None, routine_body='EXTERNAL', routine_definition='concatenated = f\"{a} {b}\"\\n\\n# Convert the concatenated string to uppercase\\nuppercased = concatenated.upper()\\n\\nreturn uppercased', routine_dependencies=None, parameter_style='S', is_deterministic=True, sql_data_access='NO_SQL', is_null_call=False, security_type='DEFINER', specific_name='my_test_func', comment='Returns an upper case concatenation of two strings separated by a space.', properties='null', full_name='AICatalog.AISchema.my_test_func', owner=None, created_at=1732302867741, created_by=None, updated_at=1732302867741, updated_by=None, function_id='dffc72ad-72ad-4000-8a5a-89389c93c45f', external_language='PYTHON')" ] }, - "execution_count": 14, + "execution_count": 2, "metadata": {}, "output_type": "execute_result" } @@ -247,7 +207,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 3, "id": "2fb2e36e-4c6f-4302-a488-958f1885094b", "metadata": {}, "outputs": [ @@ -257,7 +217,7 @@ "FunctionExecutionResult(error=None, format='SCALAR', value='HI THERE', truncated=None)" ] }, - "execution_count": 15, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -282,7 +242,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 4, "id": "60c2ab5c-3a7a-4a01-bd20-5be20adc6450", "metadata": {}, "outputs": [], @@ -327,7 +287,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 5, "id": "2d1b0a6e-7dd1-46dd-81b9-89192897a1f4", "metadata": {}, "outputs": [], @@ -361,7 +321,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 6, "id": "f560078e-ab4b-4d36-bc3f-0e553a61c69d", "metadata": {}, "outputs": [ @@ -371,7 +331,7 @@ "['calculate_humidex_temperature', 'fahrenheit_to_celsius', 'my_test_func']" ] }, - "execution_count": 18, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -398,7 +358,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 7, "id": "e0034d89-0f88-4ac1-81a7-45bdebfd20a3", "metadata": {}, "outputs": [ @@ -408,7 +368,7 @@ "FunctionExecutionResult(error=None, format='SCALAR', value='31.27777777777778', truncated=None)" ] }, - "execution_count": 19, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -424,7 +384,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 8, "id": "a1a1d079-55ef-4491-b293-01f140df2ad0", "metadata": {}, "outputs": [ @@ -434,7 +394,7 @@ "FunctionExecutionResult(error=None, format='SCALAR', value='44.61870689130167', truncated=None)" ] }, - "execution_count": 20, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -465,7 +425,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 9, "id": "49862c35-959e-468b-a560-242504d9ab5c", "metadata": {}, "outputs": [], @@ -475,7 +435,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 10, "id": "65ba5a54-56b1-4fc7-aea0-671739300f80", "metadata": {}, "outputs": [ @@ -509,7 +469,7 @@ " 'description': 'Calculates the Humidex temperature based on the actual temperature in Celsius and relative humidity. High temperatures with high humidity feel hotter, while low temperatures with low humidity feel colder. This function uses the Humidex formula to compute the perceived temperature.'}}]" ] }, - "execution_count": 22, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -534,7 +494,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 11, "id": "ba3970c4-8f97-4f58-ad89-ddeade75f919", "metadata": {}, "outputs": [], @@ -561,17 +521,17 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 12, "id": "bfdd0480-70a2-4477-97b7-f34ff0e3f60c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "ChatCompletion(id='chatcmpl-AWTS7edxHUxaGkAsCzpBZ0Ls9S6Yj', choices=[Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_GMgF6LV2G9Rwr6EUazvkPsBX', function=Function(arguments='{\"fahrenheit\": 97.3}', name='AICatalog__AISchema__fahrenheit_to_celsius'), type='function'), ChatCompletionMessageToolCall(id='call_LYIVDRPXO0XuOuiynWwYldPO', function=Function(arguments='{\"temperature_c\": 36.33, \"humidity\": 80.6}', name='AICatalog__AISchema__calculate_humidex_temperature'), type='function')]))], created=1732303075, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier=None, system_fingerprint='fp_0705bf87c0', usage=CompletionUsage(completion_tokens=77, prompt_tokens=270, total_tokens=347, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))" + "ChatCompletion(id='chatcmpl-AWTOmZx4lJvMMnegSC9J4DKh7SCyQ', choices=[Choice(finish_reason='tool_calls', index=0, logprobs=None, message=ChatCompletionMessage(content=None, refusal=None, role='assistant', audio=None, function_call=None, tool_calls=[ChatCompletionMessageToolCall(id='call_8Fi4ufZL2sA2zC5fr5Af7EMO', function=Function(arguments='{\"fahrenheit\": 97.3}', name='AICatalog__AISchema__fahrenheit_to_celsius'), type='function'), ChatCompletionMessageToolCall(id='call_GxRGWyzCtTIXnzEp8MMRGx6w', function=Function(arguments='{\"temperature_c\": 36.333333333333336, \"humidity\": 80.6}', name='AICatalog__AISchema__calculate_humidex_temperature'), type='function')]))], created=1732302868, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier=None, system_fingerprint='fp_0705bf87c0', usage=CompletionUsage(completion_tokens=81, prompt_tokens=270, total_tokens=351, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))" ] }, - "execution_count": 24, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -615,7 +575,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 13, "id": "eaa00e4c-f487-464e-b802-a886f8b21d77", "metadata": {}, "outputs": [ @@ -625,23 +585,23 @@ "[{'content': None,\n", " 'refusal': None,\n", " 'role': 'assistant',\n", - " 'tool_calls': [{'id': 'call_GMgF6LV2G9Rwr6EUazvkPsBX',\n", + " 'tool_calls': [{'id': 'call_8Fi4ufZL2sA2zC5fr5Af7EMO',\n", " 'function': {'arguments': '{\"fahrenheit\": 97.3}',\n", " 'name': 'AICatalog__AISchema__fahrenheit_to_celsius'},\n", " 'type': 'function'},\n", - " {'id': 'call_LYIVDRPXO0XuOuiynWwYldPO',\n", - " 'function': {'arguments': '{\"temperature_c\": 36.33, \"humidity\": 80.6}',\n", + " {'id': 'call_GxRGWyzCtTIXnzEp8MMRGx6w',\n", + " 'function': {'arguments': '{\"temperature_c\": 36.333333333333336, \"humidity\": 80.6}',\n", " 'name': 'AICatalog__AISchema__calculate_humidex_temperature'},\n", " 'type': 'function'}]},\n", " {'role': 'tool',\n", " 'content': '{\"content\": \"36.27777777777778\"}',\n", - " 'tool_call_id': 'call_GMgF6LV2G9Rwr6EUazvkPsBX'},\n", + " 'tool_call_id': 'call_8Fi4ufZL2sA2zC5fr5Af7EMO'},\n", " {'role': 'tool',\n", - " 'content': '{\"content\": \"58.82548958032328\"}',\n", - " 'tool_call_id': 'call_LYIVDRPXO0XuOuiynWwYldPO'}]" + " 'content': '{\"content\": \"58.83411235106172\"}',\n", + " 'tool_call_id': 'call_GxRGWyzCtTIXnzEp8MMRGx6w'}]" ] }, - "execution_count": 25, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -668,17 +628,17 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 14, "id": "10a50405-6304-46ea-8d9b-97fd643b7e06", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "ChatCompletion(id='chatcmpl-AWTS9wFw30XUGLi14mY1KWgIJvJwW', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='The forecast for tomorrow indicates a temperature of approximately **36.3°C** (97.3°F) with a high humidity level of **80.6%**. \\n\\n### Humidex Calculation\\nThe perceived temperature (Humidex) due to the high humidity is around **58.8°C** (or **137.8°F**). This means that it will feel significantly hotter than the actual temperature, which can lead to discomfort and potential heat-related issues.\\n\\n### Recommendations for Hiking:\\n- **Clothing**: Wear lightweight, light-colored, breathable, and moisture-wicking clothing. A wide-brimmed hat can also help protect you from direct sunlight.\\n- **Hydration**: Bring plenty of water and consider electrolyte-replacement drinks. Staying hydrated is crucial in such high humidity and temperature.\\n- **Timing**: If you decide to go hiking, consider starting early in the morning or later in the evening to avoid the peak heat.\\n- **Trail Choice**: Select a trail with shade and possibly some elevation gain where you might experience cooler temperatures.\\n- **Pace**: Take it slow and listen to your body. Be aware of signs of heat exhaustion or heat stroke.\\n\\nGiven the extreme conditions, if you can postpone your hike to a cooler day, it might be a safer option.', refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1732303077, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier=None, system_fingerprint='fp_0705bf87c0', usage=CompletionUsage(completion_tokens=265, prompt_tokens=383, total_tokens=648, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))" + "ChatCompletion(id='chatcmpl-AWTOoKvQA9AIRfrKjBuoM5ypbheU7', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"The temperature is approximately 36.3°C (97.3°F), and with a high humidity of 80.6%, the humidex (perceived temperature) feels like about 58.8°C (137.8°F). This is extremely high and can be dangerous for outdoor activities like hiking.\\n\\n**Recommendations:**\\n- **Clothing:** Wear lightweight, breathable, and moisture-wicking clothing to help with sweat evaporation. A wide-brimmed hat and sunglasses are also important for sun protection.\\n- **Hydration:** Carry plenty of water to stay hydrated. The heat and humidity can lead to dehydration quickly.\\n- **Timing:** If you decide to hike, consider going very early in the morning or later in the evening when temperatures are cooler.\\n- **Pace:** Take breaks often, and listen to your body. If you feel dizzy, weak, or excessively tired, it's best to stop and seek shade or a cool environment.\\n- **Safety:** Be aware of the signs of heat exhaustion and heat stroke.\\n\\nGiven the extreme weather conditions, it may be safer to postpone your hike for a cooler day if possible.\", refusal=None, role='assistant', audio=None, function_call=None, tool_calls=None))], created=1732302870, model='gpt-4o-mini-2024-07-18', object='chat.completion', service_tier=None, system_fingerprint='fp_0705bf87c0', usage=CompletionUsage(completion_tokens=228, prompt_tokens=387, total_tokens=615, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0), prompt_tokens_details=PromptTokensDetails(audio_tokens=0, cached_tokens=0)))" ] }, - "execution_count": 26, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -698,17 +658,17 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 15, "id": "0f21b701-919a-4727-b77b-d0f6260e15eb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'The forecast for tomorrow indicates a temperature of approximately **36.3°C** (97.3°F) with a high humidity level of **80.6%**. \\n\\n### Humidex Calculation\\nThe perceived temperature (Humidex) due to the high humidity is around **58.8°C** (or **137.8°F**). This means that it will feel significantly hotter than the actual temperature, which can lead to discomfort and potential heat-related issues.\\n\\n### Recommendations for Hiking:\\n- **Clothing**: Wear lightweight, light-colored, breathable, and moisture-wicking clothing. A wide-brimmed hat can also help protect you from direct sunlight.\\n- **Hydration**: Bring plenty of water and consider electrolyte-replacement drinks. Staying hydrated is crucial in such high humidity and temperature.\\n- **Timing**: If you decide to go hiking, consider starting early in the morning or later in the evening to avoid the peak heat.\\n- **Trail Choice**: Select a trail with shade and possibly some elevation gain where you might experience cooler temperatures.\\n- **Pace**: Take it slow and listen to your body. Be aware of signs of heat exhaustion or heat stroke.\\n\\nGiven the extreme conditions, if you can postpone your hike to a cooler day, it might be a safer option.'" + "\"The temperature is approximately 36.3°C (97.3°F), and with a high humidity of 80.6%, the humidex (perceived temperature) feels like about 58.8°C (137.8°F). This is extremely high and can be dangerous for outdoor activities like hiking.\\n\\n**Recommendations:**\\n- **Clothing:** Wear lightweight, breathable, and moisture-wicking clothing to help with sweat evaporation. A wide-brimmed hat and sunglasses are also important for sun protection.\\n- **Hydration:** Carry plenty of water to stay hydrated. The heat and humidity can lead to dehydration quickly.\\n- **Timing:** If you decide to hike, consider going very early in the morning or later in the evening when temperatures are cooler.\\n- **Pace:** Take breaks often, and listen to your body. If you feel dizzy, weak, or excessively tired, it's best to stop and seek shade or a cool environment.\\n- **Safety:** Be aware of the signs of heat exhaustion and heat stroke.\\n\\nGiven the extreme weather conditions, it may be safer to postpone your hike for a cooler day if possible.\"" ] }, - "execution_count": 27, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } diff --git a/ai/core/src/unitycatalog/ai/core/oss.py b/ai/core/src/unitycatalog/ai/core/oss.py index 981cb3de1..9a4e205ee 100644 --- a/ai/core/src/unitycatalog/ai/core/oss.py +++ b/ai/core/src/unitycatalog/ai/core/oss.py @@ -20,12 +20,18 @@ ) from unitycatalog.client import ( ApiClient, + CatalogInfo, + CatalogsApi, + CreateCatalog, CreateFunction, CreateFunctionRequest, + CreateSchema, FunctionInfo, FunctionParameterInfo, FunctionParameterInfos, FunctionsApi, + SchemaInfo, + SchemasApi, ) from unitycatalog.client.exceptions import NotFoundException, ServiceException @@ -103,6 +109,228 @@ def wrapper(self, *args, **kwargs): return wrapper +class UnitycatalogClient: + def __init__(self, api_client: ApiClient): + self.api_client = api_client + self.functions_client = FunctionsApi(api_client=api_client) + self.catalogs_client = CatalogsApi(api_client=api_client) + self.schemas_client = SchemasApi(api_client=api_client) + + # Clean up the ApiClient instance for aiohttp to ensure that we're not leaking resources + # and preventing Python's GC operation as well as to ensure that multiple instances of + # this client are not present within a thread (eliminate a potential memory leak). + atexit.register(self.close) + + async def close_async(self): + """Asynchronously close the underlying ApiClient.""" + if getattr(self, "_closed", None): + return + self._closed = True + try: + await self.api_client.close() + _logger.info("ApiClient successfully closed.") + except Exception as e: + _logger.error(f"Error while closing ApiClient: {e}") + + def close(self): + """Synchronously close the underlying ApiClient.""" + if getattr(self, "_closed", None): + return + self._closed = True + try: + loop = asyncio.get_event_loop() + if loop.is_running(): + asyncio.ensure_future(self.close_async()) + else: + loop.run_until_complete(self.close_async()) + except Exception as e: + _logger.error(f"Error while closing ApiClient: {e}") + + def __enter__(self): + """Enter the context manager.""" + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """Exit the context manager and close the ApiClient.""" + self.close() + + async def create_catalog_async( + self, + name: str, + comment: Optional[str] = None, + properties: Optional[Dict[str, Any]] = None, + **kwargs, + ) -> CatalogInfo: + """ + Create a catalog in Unity Catalog asynchronously. + + Args: + name: The name of the catalog to create. + comment: An optional comment (description) for the catalog. + properties: Optional dictionary of properties for the catalog. Property options are validated + through the REST interface and can be discovered via the Unity Catalog Server API documentation. + **kwargs: Additional keyword arguments for the creation call (defined as overridable private methods) + the optional values (i.e., _request_timeout, _headers, _host_index) can be found in the + Unity Catalog Server API documentation. These parameters are in an experimental state and are + subject to change. + + Returns: + The created CatalogInfo object. + + Raises: + ValueError: If the catalog already exists. + """ + + catalog = None + try: + catalog = await self._get_catalog(name) + except NotFoundException: + pass + + if catalog: + _logger.info(f"The catalog '{name}' already exists.") + return catalog + + catalog_create_request = CreateCatalog( + name=name, + comment=comment, + properties=properties, + ) + + return await self.catalogs_client.create_catalog( + create_catalog=catalog_create_request, **kwargs + ) + + @syncify_method + def create_catalog( + self, + name: str, + comment: Optional[str] = None, + properties: Optional[Dict[str, Any]] = None, + **kwargs, + ) -> CatalogInfo: + """ + Create a catalog in Unity Catalog. + + This is a synchronous version of `create_catalog_async`. + + Args: + name: The name of the catalog to create. + comment: An optional comment (description) for the catalog. + properties: Optional dictionary of properties for the catalog. Property options are validated + through the REST interface and can be discovered via the Unity Catalog Server API documentation. + **kwargs: Additional keyword arguments for the creation call (defined as overridable private methods) + the optional values (i.e., _request_timeout, _headers, _host_index) can be found in the + Unity Catalog Server API documentation. These parameters are in an experimental state and are + subject to change. + + Returns: + The created CatalogInfo object. + """ + + pass + + async def create_schema_async( + self, + name: str, + catalog_name: str, + comment: Optional[str] = None, + properties: Optional[Dict[str, Any]] = None, + **kwargs, + ) -> SchemaInfo: + """ + Create a schema within a catalog in Unity Catalog asynchronously. + + Args: + name: The name of the schema to create. + catalog_name: The name of the catalog where the schema will be created. + comment: An optional comment (description) for the schema. + properties: Optional dictionary of properties for the schema. Property options are validated + through the REST interface and can be discovered through the Unity Catalog Server API + documentation. + **kwargs: Additional keyword arguments for the creation call (defined as overridable private methods) + the optional values (i.e., _request_timeout, _headers, _host_index) can be found in the + Unity Catalog Server API documentation. These parameters are in an experimental state and are + subject to change. + + Returns: + The created SchemaInfo object. + + Raises: + ValueError: If the specified catalog does not exist. + """ + + try: + await self._get_catalog(catalog_name) + except NotFoundException as e: + raise ValueError( + f"The Catalog that you specified: '{catalog_name}' does not exist on this server." + ) from e + + schema = None + try: + schema = await self._get_schema(name=name, catalog_name=catalog_name) + except NotFoundException: + pass + + if schema: + _logger.info( + f"The schema '{name}' already exists in the catalog '{catalog_name}'", + ) + return schema + + schema_create_request = CreateSchema( + name=name, + catalog_name=catalog_name, + comment=comment, + properties=properties, + ) + + return await self.schemas_client.create_schema( + create_schema=schema_create_request, **kwargs + ) + + @syncify_method + def create_schema( + self, + name: str, + catalog_name: str, + comment: Optional[str] = None, + properties: Optional[Dict[str, Any]] = None, + **kwargs, + ) -> SchemaInfo: + """ + Create a schema within a catalog in Unity Catalog. + + This is a synchronous version of `create_schema_async`. + + Args: + name: The name of the schema to create. + catalog_name: The name of the catalog where the schema will be created. + comment: An optional comment (description) for the schema. + properties: Optional dictionary of properties for the schema. Property options are validated + through the REST interface and can be discovered through the Unity Catalog Server API + documentation. + **kwargs: Additional keyword arguments for the creation call (defined as overridable private methods) + the optional values (i.e., _request_timeout, _headers, _host_index) can be found in the + Unity Catalog Server API documentation. These parameters are in an experimental state and are + subject to change. + + Returns: + The created SchemaInfo object. + """ + + pass + + async def _get_schema(self, name: str, catalog_name: str) -> SchemaInfo: + """ """ + return await self.schemas_client.get_schema(full_name=f"{catalog_name}.{name}") + + async def _get_catalog(self, name: str) -> CatalogInfo: + """ """ + return await self.catalogs_client.get_catalog(name=name) + + class UnitycatalogFunctionClient(BaseFunctionClient): """ Unity Catalog function client for managing and executing functions in Unity Catalog OSS. @@ -122,12 +350,12 @@ def __init__(self, api_client: ApiClient, **kwargs: Any) -> None: "The 'api_client' must be an instance of unitycatalog.client.ApiClient" ) - self.uc = FunctionsApi(api_client=api_client) + self.uc = UnitycatalogClient(api_client) self.func_cache = {} super().__init__() # Clean up the ApiClient instance for aiohttp to ensure that we're not leaking resources - # and preventing Python's GC operator as well as to ensure that multiple instances of + # and preventing Python's GC operation as well as to ensure that multiple instances of # this client are not present within a thread (eliminate a potential memory leak). atexit.register(self.close) @@ -242,7 +470,9 @@ async def create_function_async( comment=comment, ) function_request = CreateFunctionRequest(function_info=function_info) - return await self.uc.create_function(function_request, _request_timeout=timeout) + return await self.uc.functions_client.create_function( + function_request, _request_timeout=timeout + ) @override @syncify_method @@ -379,7 +609,9 @@ async def get_function_async( """ try: - return await self.uc.get_function(function_name, _request_timeout=timeout) + return await self.uc.functions_client.get_function( + function_name, _request_timeout=timeout + ) except NotFoundException as e: _logger.warning( f"Failed to retrieve function {function_name} from Unity Catalog, the function may not exist. " @@ -426,7 +658,7 @@ async def list_functions_async( Returns: A PagedList of FunctionInfo objects. """ - resp = await self.uc.list_functions( + resp = await self.uc.functions_client.list_functions( catalog_name=catalog, schema_name=schema, max_results=max_results, @@ -513,7 +745,7 @@ async def delete_function_async( None """ - await self.uc.delete_function(function_name, _request_timeout=timeout) + await self.uc.functions_client.delete_function(function_name, _request_timeout=timeout) @override @syncify_method @@ -536,7 +768,8 @@ def delete_function(self, function_name: str, timeout: Optional[float] = None) - @override def to_dict(self) -> Dict[str, Any]: - return {k: getattr(self, k) for k in ["uc"] if getattr(self, k) is not None} + elements = ["uc"] + return {k: getattr(self, k) for k in elements if getattr(self, k) is not None} def dynamically_construct_python_function(function_info: FunctionInfo) -> str: diff --git a/ai/core/tests/core/oss/test_oss_client.py b/ai/core/tests/core/oss/test_oss_client.py index 3bb1bfb58..d230f5ffa 100644 --- a/ai/core/tests/core/oss/test_oss_client.py +++ b/ai/core/tests/core/oss/test_oss_client.py @@ -12,6 +12,7 @@ from pydantic import ValidationError from unitycatalog.ai.core.oss import ( + UnitycatalogClient, UnitycatalogFunctionClient, validate_input_parameter, validate_param, @@ -21,8 +22,6 @@ ApiClient, CatalogsApi, Configuration, - CreateCatalog, - CreateSchema, FunctionParameterInfo, FunctionsApi, SchemasApi, @@ -86,22 +85,9 @@ async def uc_client(): config.host = "http://localhost:8080/api/2.1/unity-catalog" uc_api_client = ApiClient(configuration=config) - catalog_api = CatalogsApi(api_client=uc_api_client) - schema_api = SchemasApi(api_client=uc_api_client) - - try: - await catalog_api.get_catalog(name=CATALOG) - except Exception: - create_catalog = CreateCatalog(name=CATALOG, comment="") - await catalog_api.create_catalog(create_catalog=create_catalog) - - try: - await schema_api.get_schema(full_name=f"{CATALOG}.{SCHEMA}") - except Exception: - create_schema = CreateSchema(name=SCHEMA, catalog_name=CATALOG, comment="") - await schema_api.create_schema(create_schema=create_schema) - uc_client = UnitycatalogFunctionClient(api_client=uc_api_client) + uc_client.uc.create_catalog(name=CATALOG) + uc_client.uc.create_schema(name=SCHEMA, catalog_name=CATALOG) yield uc_client @@ -109,12 +95,171 @@ async def uc_client(): await uc_api_client.close() +@pytest_asyncio.fixture +async def core_client(): + core_catalog_name = "CoreCatalog" + core_schema_name = "CoreSchema" + config = Configuration() + config.host = "http://localhost:8080/api/2.1/unity-catalog" + uc_api_client = ApiClient(configuration=config) + + core_client = UnitycatalogClient(api_client=uc_api_client) + core_client.create_catalog( + name=core_catalog_name, comment="Testing the catalog creation", properties={"test": "test"} + ) + core_client.create_schema( + name=core_schema_name, + catalog_name=core_catalog_name, + comment="Testing the schema creation", + properties={"test": "test"}, + ) + + yield core_client + + await core_client.catalogs_client.delete_catalog(name=core_catalog_name, force=True) + await core_client.close_async() + await uc_api_client.close() + + @pytest.mark.asyncio async def test_handle_invalid_client(): with pytest.raises(ValueError, match="The 'api_client' must be an instance of"): UnitycatalogFunctionClient(api_client="client") +@pytest.mark.asyncio +async def test_create_unitycatalog_client(core_client): + assert isinstance(core_client.catalogs_client, CatalogsApi) + assert isinstance(core_client.schemas_client, SchemasApi) + assert isinstance(core_client.functions_client, FunctionsApi) + + core_client.create_catalog(name="test_catalog", comment="test") + core_client.create_schema(name="test_schema", catalog_name="test_catalog", comment="test") + + catalog_info = await core_client.catalogs_client.get_catalog(name="test_catalog") + + assert catalog_info.name == "test_catalog" + assert catalog_info.comment == "test" + + schema_info = await core_client.schemas_client.get_schema(full_name="test_catalog.test_schema") + + assert schema_info.name == "test_schema" + assert schema_info.catalog_name == "test_catalog" + assert schema_info.comment == "test" + + +@pytest.mark.asyncio +async def test_catalog_idempotent_creation(core_client, caplog): + test_catalog = "CoreCatalog" + catalog_comment = "Testing the catalog creation" + properties = {"test": "test"} + + with caplog.at_level(logging.INFO): + catalog_info = core_client.create_catalog( + name=test_catalog, comment=catalog_comment, properties=properties + ) + + assert catalog_info.name == test_catalog + assert catalog_info.comment == catalog_comment + assert catalog_info.properties == properties + + expected_message = f"The catalog '{test_catalog}' already exists." + assert expected_message in caplog.text + + +@pytest.mark.asyncio +async def test_schema_idempotent_creation(core_client, caplog): + test_catalog = "CoreCatalog" + test_schema = "CoreSchema" + schema_comment = "Testing the schema creation" + properties = {"test": "test"} + + core_client.create_catalog(name=test_catalog, comment="Catalog for schema", properties={}) + + core_client.create_schema( + name=test_schema, catalog_name=test_catalog, comment=schema_comment, properties=properties + ) + + with caplog.at_level(logging.INFO): + schema_info = core_client.create_schema( + name=test_schema, + catalog_name=test_catalog, + comment=schema_comment, + properties=properties, + ) + + assert schema_info.name == test_schema + assert schema_info.catalog_name == test_catalog + assert schema_info.comment == schema_comment + assert schema_info.properties == properties + + expected_message = f"The schema '{test_schema}' already exists in the catalog '{test_catalog}'" + assert expected_message in caplog.text + + +@pytest.mark.asyncio +async def test_create_catalog(uc_client, caplog): + test_catalog = "CatalogCreate" + catalog_comment = "Testing the catalog creation" + properties = {"test": "test"} + uc_client.uc.create_catalog(name=test_catalog, comment=catalog_comment, properties=properties) + + with caplog.at_level(logging.INFO): + catalog_info = uc_client.uc.create_catalog( + name=test_catalog, comment=catalog_comment, properties=properties + ) + + assert catalog_info.name == test_catalog + assert catalog_info.comment == catalog_comment + assert catalog_info.properties == properties + + expected_message = f"The catalog '{test_catalog}' already exists." + assert expected_message in caplog.text + + await uc_client.uc.catalogs_client.delete_catalog(name=test_catalog, force=True) + + +@pytest.mark.asyncio +async def test_create_schema(uc_client, caplog): + test_catalog = "SchemaCreate" + test_schema = "TestSchema" + schema_comment = "Testing the schema creation" + properties = {"test": "test"} + + uc_client.uc.create_catalog(name=test_catalog) + + with caplog.at_level(logging.INFO): + schema_info = uc_client.uc.create_schema( + name=test_schema, + catalog_name=test_catalog, + comment=schema_comment, + properties=properties, + ) + + assert schema_info.name == test_schema + assert schema_info.catalog_name == test_catalog + assert schema_info.comment == schema_comment + assert schema_info.properties == properties + + with caplog.at_level(logging.INFO): + existing = uc_client.uc.create_schema(name=test_schema, catalog_name=test_catalog) + + expected_message = f"The schema '{test_schema}' already exists in the catalog '{test_catalog}'" + assert expected_message in caplog.text + + assert existing.name == test_schema + assert existing.comment == schema_comment + + non_existent_catalog = "NotARealCatalog" + with pytest.raises( + ValueError, + match=f"The Catalog that you specified: '{non_existent_catalog}' does not exist on this server.", + ): + uc_client.uc.create_schema(name="MySchema", catalog_name=non_existent_catalog) + + await uc_client.uc.catalogs_client.delete_catalog(name=test_catalog, force=True) + + @pytest.mark.asyncio async def test_create_function(uc_client): function_name = f"{CATALOG}.{SCHEMA}.test_function" @@ -709,8 +854,7 @@ async def test_function_caching(uc_client): async def test_to_dict(uc_client): client_dict = uc_client.to_dict() assert isinstance(client_dict, dict) - if "uc" in client_dict: - assert isinstance(client_dict["uc"], FunctionsApi) + assert isinstance(client_dict["uc"], UnitycatalogClient) @pytest.mark.asyncio diff --git a/docs/ai/client.md b/docs/ai/client.md index ad1c69bef..1ba295acc 100644 --- a/docs/ai/client.md +++ b/docs/ai/client.md @@ -69,6 +69,35 @@ CATALOG = "my_catalog" SCHEMA = "my_schema" ``` +##### Catalog and Schema Handlers + +As a measure of convenience, a catalog and schema handler class is available. This can either be instantiated independently: + +```python +from unitycatalog.ai.core.oss import UnitycatalogClient +from unitycatalog.client import ApiClient, Configuration + +config = Configuration(host="http://localhost:8080/api/2.1/unity-catalog") +api_client = ApiClient(configuration=config) + +# Instantiate the catalog and schema handler directly to create catalogs and schemas as needed +core_client = UnitycatalogClient(api_client=api_client) + +catalog_info = core_client.create_catalog( + name="MyTestCatalog", + comment="A catalog used for testing purposes", + properties={"key": "value"}, +) +schema_info = core_client.create_schema( + name="MyTestSchema", + catalog_name="MyTestCatalog", + comment="A schema for testing", + properties={"key": "value"}, +) +``` + +Alternatively, these same APIs are available by accessing the `uc` property on the instance of `UnitycatalogFunctionClient`, as shown below. + > Tip: Ensure that you have created a catalog and a schema before attempting to create functions. The `UnitycatalogFunctionClient` provides helper methods for creating both Catalogs and Schemas in Unity Catalog. For full @@ -77,7 +106,7 @@ API-based CRUD operations, you will need to use the `unitycatalog-client` packag To create a Catalog, you can call the `create_catalog` method: ```python -uc_client.create_catalog( +uc_client.uc.create_catalog( name=CATALOG, comment="A catalog for demonstrating the use of Unity Catalog function usage in GenAI applications", ) @@ -88,7 +117,7 @@ uc_client.create_catalog( To create a Schema, you can call the `create_schema` method: ```python -uc_client.create_schema( +uc_client.uc.create_schema( name=SCHEMA, catalog_name=CATALOG, comment="A schema for holding UC functions for GenAI use cases",