Merge branch 'main' into ODSC-64498/langchain_embedding_plugin

oracle · Feb 4, 2025 · a26aee3 · a26aee3
2 parents 5d70267 + fbb8d8f
commit a26aee3
Show file tree

Hide file tree

Showing 11 changed files with 214 additions and 46 deletions.
diff --git a/ads/aqua/app.py b/ads/aqua/app.py
@@ -4,6 +4,7 @@
 
 import json
 import os
+import traceback
 from dataclasses import fields
 from typing import Dict, Union
 
@@ -23,7 +24,7 @@
 from ads.aqua.constants import UNKNOWN
 from ads.common import oci_client as oc
 from ads.common.auth import default_signer
-from ads.common.utils import extract_region
+from ads.common.utils import extract_region, is_path_exists
 from ads.config import (
     AQUA_TELEMETRY_BUCKET,
     AQUA_TELEMETRY_BUCKET_NS,
@@ -296,33 +297,44 @@ def get_config(self, model_id: str, config_file_name: str) -> Dict:
             raise AquaRuntimeError(f"Target model {oci_model.id} is not Aqua model.")
 
         config = {}
-        artifact_path = get_artifact_path(oci_model.custom_metadata_list)
+        # if the current model has a service model tag, then
+        if Tags.AQUA_SERVICE_MODEL_TAG in oci_model.freeform_tags:
+            base_model_ocid = oci_model.freeform_tags[Tags.AQUA_SERVICE_MODEL_TAG]
+            logger.info(
+                f"Base model found for the model: {oci_model.id}. "
+                f"Loading {config_file_name} for base model {base_model_ocid}."
+            )
+            base_model = self.ds_client.get_model(base_model_ocid).data
+            artifact_path = get_artifact_path(base_model.custom_metadata_list)
+            config_path = f"{os.path.dirname(artifact_path)}/config/"
+        else:
+            logger.info(f"Loading {config_file_name} for model {oci_model.id}...")
+            artifact_path = get_artifact_path(oci_model.custom_metadata_list)
+            config_path = f"{artifact_path.rstrip('/')}/config/"
+
         if not artifact_path:
             logger.debug(
                 f"Failed to get artifact path from custom metadata for the model: {model_id}"
             )
             return config
 
-        try:
-            config_path = f"{os.path.dirname(artifact_path)}/config/"
-            config = load_config(
-                config_path,
-                config_file_name=config_file_name,
-            )
-        except Exception:
-            # todo: temp fix for issue related to config load for byom models, update logic to choose the right path
+        config_file_path = f"{config_path}{config_file_name}"
+        if is_path_exists(config_file_path):
             try:
-                config_path = f"{artifact_path.rstrip('/')}/config/"
                 config = load_config(
                     config_path,
                     config_file_name=config_file_name,
                 )
             except Exception:
-                pass
+                logger.debug(
+                    f"Error loading the {config_file_name} at path {config_path}.\n"
+                    f"{traceback.format_exc()}"
+                )
 
         if not config:
-            logger.error(
-                f"{config_file_name} is not available for the model: {model_id}. Check if the custom metadata has the artifact path set."
+            logger.debug(
+                f"{config_file_name} is not available for the model: {model_id}. "
+                f"Check if the custom metadata has the artifact path set."
             )
             return config
 

diff --git a/ads/aqua/finetuning/entities.py b/ads/aqua/finetuning/entities.py
@@ -122,6 +122,8 @@ class CreateFineTuningDetails(Serializable):
         The log group id for fine tuning job infrastructure.
     log_id: (str, optional). Defaults to `None`.
         The log id for fine tuning job infrastructure.
+    watch_logs: (bool, optional). Defaults to `False`.
+        The flag to watch the job run logs when a fine-tuning job is created.
     force_overwrite: (bool, optional). Defaults to `False`.
         Whether to force overwrite the existing file in object storage.
     freeform_tags: (dict, optional)
@@ -148,6 +150,7 @@ class CreateFineTuningDetails(Serializable):
     subnet_id: Optional[str] = None
     log_id: Optional[str] = None
     log_group_id: Optional[str] = None
+    watch_logs: Optional[bool] = False
     force_overwrite: Optional[bool] = False
     freeform_tags: Optional[dict] = None
     defined_tags: Optional[dict] = None

diff --git a/ads/aqua/finetuning/finetuning.py b/ads/aqua/finetuning/finetuning.py
@@ -4,6 +4,8 @@
 
 import json
 import os
+import time
+import traceback
 from typing import Dict
 
 from oci.data_science.models import (
@@ -149,6 +151,15 @@ def create(
                 f"Logging is required for fine tuning if replica is larger than {DEFAULT_FT_REPLICA}."
             )
 
+        if create_fine_tuning_details.watch_logs and not (
+            create_fine_tuning_details.log_id
+            and create_fine_tuning_details.log_group_id
+        ):
+            raise AquaValueError(
+                "Logging is required for fine tuning if watch_logs is set to True. "
+                "Please provide log_id and log_group_id with the request parameters."
+            )
+
         ft_parameters = self._get_finetuning_params(
             create_fine_tuning_details.ft_parameters
         )
@@ -422,6 +433,20 @@ def create(
             value=source.display_name,
         )
 
+        if create_fine_tuning_details.watch_logs:
+            logger.info(
+                f"Watching fine-tuning job run logs for {ft_job_run.id}. Press Ctrl+C to stop watching logs.\n"
+            )
+            try:
+                ft_job_run.watch()
+            except KeyboardInterrupt:
+                logger.info(f"\nStopped watching logs for {ft_job_run.id}.\n")
+                time.sleep(1)
+            except Exception:
+                logger.debug(
+                    f"Something unexpected occurred while watching logs.\n{traceback.format_exc()}"
+                )
+
         return AquaFineTuningSummary(
             id=ft_model.id,
             name=ft_model.display_name,

diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py
@@ -29,7 +29,6 @@
     LifecycleStatus,
     _build_resource_identifier,
     cleanup_local_hf_model_artifact,
-    copy_model_config,
     create_word_icon,
     generate_tei_cmd_var,
     get_artifact_path,
@@ -969,24 +968,6 @@ def _create_model_catalog_entry(
                 )
                 tags[Tags.LICENSE] = validation_result.tags.get(Tags.LICENSE, UNKNOWN)
 
-        try:
-            # If verified model already has a artifact json, use that.
-            artifact_path = metadata.get(MODEL_BY_REFERENCE_OSS_PATH_KEY).value
-            logger.info(
-                f"Found model artifact in the service bucket. "
-                f"Using artifact from service bucket instead of {os_path}."
-            )
-
-            # todo: implement generic copy_folder method
-            # copy model config from artifact path to user bucket
-            copy_model_config(
-                artifact_path=artifact_path, os_path=os_path, auth=default_signer()
-            )
-        except Exception:
-            logger.debug(
-                f"Proceeding with model registration without copying model config files at {os_path}. "
-                f"Default configuration will be used for deployment and fine-tuning."
-            )
         # Set artifact location to user bucket, and replace existing key if present.
         metadata.add(
             key=MODEL_BY_REFERENCE_OSS_PATH_KEY,

diff --git a/ads/cli.py b/ads/cli.py
@@ -1,14 +1,15 @@
 #!/usr/bin/env python
-
 # Copyright (c) 2021, 2025 Oracle and/or its affiliates.
 # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
 
+import json
 import logging
 import sys
 import traceback
 import uuid
 
 import fire
+from pydantic import BaseModel
 
 from ads.common import logger
 
@@ -84,7 +85,13 @@ def serialize(data):
         The string representation of each dataclass object.
     """
     if isinstance(data, list):
-        [print(str(item)) for item in data]
+        for item in data:
+            if isinstance(item, BaseModel):
+                print(json.dumps(item.dict(), indent=4))
+            else:
+                print(str(item))
+    elif isinstance(data, BaseModel):
+        print(json.dumps(data.dict(), indent=4))
     else:
         print(str(data))
 

diff --git a/ads/opctl/operator/lowcode/common/transformations.py b/ads/opctl/operator/lowcode/common/transformations.py
@@ -15,6 +15,7 @@
     InvalidParameterError,
 )
 from ads.opctl.operator.lowcode.common.utils import merge_category_columns
+from ads.opctl.operator.lowcode.forecast.operator_config import ForecastOperatorSpec
 
 
 class Transformations(ABC):
@@ -34,6 +35,7 @@ def __init__(self, dataset_info, name="historical_data"):
         self.dataset_info = dataset_info
         self.target_category_columns = dataset_info.target_category_columns
         self.target_column_name = dataset_info.target_column
+        self.raw_column_names = None
         self.dt_column_name = (
             dataset_info.datetime_column.name if dataset_info.datetime_column else None
         )
@@ -60,7 +62,8 @@ def run(self, data):
 
         """
         clean_df = self._remove_trailing_whitespace(data)
-        # clean_df = self._normalize_column_names(clean_df)
+        if isinstance(self.dataset_info, ForecastOperatorSpec):
+            clean_df = self._clean_column_names(clean_df)
         if self.name == "historical_data":
             self._check_historical_dataset(clean_df)
         clean_df = self._set_series_id_column(clean_df)
@@ -98,8 +101,36 @@ def run(self, data):
     def _remove_trailing_whitespace(self, df):
         return df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)
 
-    # def _normalize_column_names(self, df):
-    #     return df.rename(columns=lambda x: re.sub("[^A-Za-z0-9_]+", "", x))
+    def _clean_column_names(self, df):
+        """
+        Remove all whitespaces from column names in a DataFrame and store the original names.
+
+        Parameters:
+        df (pd.DataFrame): The DataFrame whose column names need to be cleaned.
+
+        Returns:
+        pd.DataFrame: The DataFrame with cleaned column names.
+        """
+
+        self.raw_column_names = {
+            col: col.replace(" ", "") for col in df.columns if " " in col
+        }
+        df.columns = [self.raw_column_names.get(col, col) for col in df.columns]
+
+        if self.target_column_name:
+            self.target_column_name = self.raw_column_names.get(
+                self.target_column_name, self.target_column_name
+            )
+        self.dt_column_name = self.raw_column_names.get(
+            self.dt_column_name, self.dt_column_name
+        )
+
+        if self.target_category_columns:
+            self.target_category_columns = [
+                self.raw_column_names.get(col, col)
+                for col in self.target_category_columns
+            ]
+        return df
 
     def _set_series_id_column(self, df):
         self._target_category_columns_map = {}
@@ -233,6 +264,10 @@ def _check_historical_dataset(self, df):
         expected_names = [self.target_column_name, self.dt_column_name] + (
             self.target_category_columns if self.target_category_columns else []
         )
+
+        if self.raw_column_names:
+            expected_names.extend(list(self.raw_column_names.values()))
+
         if set(df.columns) != set(expected_names):
             raise DataMismatchError(
                 f"Expected {self.name} to have columns: {expected_names}, but instead found column names: {df.columns}. Is the {self.name} path correct?"

diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
@@ -2,6 +2,28 @@
 Release Notes
 =============
 
+2.12.10
+-------
+Release date: Feb 5th, 2024
+
+* Support inference container URI to be edited for unverified models in AI Quick Actions.
+* Logging improvements for all CRUD operations in AI Quick Actions.
+* Expand allowable parameters for fine-tuning operation in AI Quick Actions.
+* Allow local model cache delete while registering models in AI Quick Actions.
+* Add validation flag ``ignore-model-artifact-check`` to ignore ``config.json`` check while registering model in AI Quick Actions.
+* Allow user to watch fine-tuning job run logs in AI Quick Actions when using CLI mode.
+* Load base model configuration files by default for verified models in AI Quick Actions.
+* Added a framework to support embedding onnx models.
+* Added headers parameters for ADS Langchain.
+* Addition of ``SessionLogger`` and ``MetricLogger`` AutoGen loggers for logging and reporting.
+* Support for a centralized and reusable client for interacting with OCI Model Deployment.
+* Bug fix for ADS telemetry logging.
+* Set defaults for optional parameters in ChatOCIModelDeploymentVLLM.
+* Real-Time What-If Analysis with Model Catalog and Model Deployment Integration.
+* Support for AutoMLx internal explainability mode.
+* Standardize outputs & report for single-series forecasts without target_category_columns.
+
+
 2.12.9
 -------
 Release date: December 18, 2024

diff --git a/pyproject.toml b/pyproject.toml
@@ -21,7 +21,7 @@ build-backend = "flit_core.buildapi"
 
 # Required
 name = "oracle_ads" # the install (PyPI) name; name for local build in [tool.flit.module] section below
-version = "2.12.9"
+version = "2.12.10"
 
 # Optional
 description = "Oracle Accelerated Data Science SDK"