Skip to content

Commit

Permalink
Merge branch 'main' into ODSC-64498/langchain_embedding_plugin
Browse files Browse the repository at this point in the history
  • Loading branch information
lu-ohai authored Feb 4, 2025
2 parents 5d70267 + fbb8d8f commit a26aee3
Show file tree
Hide file tree
Showing 11 changed files with 214 additions and 46 deletions.
40 changes: 26 additions & 14 deletions ads/aqua/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import json
import os
import traceback
from dataclasses import fields
from typing import Dict, Union

Expand All @@ -23,7 +24,7 @@
from ads.aqua.constants import UNKNOWN
from ads.common import oci_client as oc
from ads.common.auth import default_signer
from ads.common.utils import extract_region
from ads.common.utils import extract_region, is_path_exists
from ads.config import (
AQUA_TELEMETRY_BUCKET,
AQUA_TELEMETRY_BUCKET_NS,
Expand Down Expand Up @@ -296,33 +297,44 @@ def get_config(self, model_id: str, config_file_name: str) -> Dict:
raise AquaRuntimeError(f"Target model {oci_model.id} is not Aqua model.")

config = {}
artifact_path = get_artifact_path(oci_model.custom_metadata_list)
# if the current model has a service model tag, then
if Tags.AQUA_SERVICE_MODEL_TAG in oci_model.freeform_tags:
base_model_ocid = oci_model.freeform_tags[Tags.AQUA_SERVICE_MODEL_TAG]
logger.info(
f"Base model found for the model: {oci_model.id}. "
f"Loading {config_file_name} for base model {base_model_ocid}."
)
base_model = self.ds_client.get_model(base_model_ocid).data
artifact_path = get_artifact_path(base_model.custom_metadata_list)
config_path = f"{os.path.dirname(artifact_path)}/config/"
else:
logger.info(f"Loading {config_file_name} for model {oci_model.id}...")
artifact_path = get_artifact_path(oci_model.custom_metadata_list)
config_path = f"{artifact_path.rstrip('/')}/config/"

if not artifact_path:
logger.debug(
f"Failed to get artifact path from custom metadata for the model: {model_id}"
)
return config

try:
config_path = f"{os.path.dirname(artifact_path)}/config/"
config = load_config(
config_path,
config_file_name=config_file_name,
)
except Exception:
# todo: temp fix for issue related to config load for byom models, update logic to choose the right path
config_file_path = f"{config_path}{config_file_name}"
if is_path_exists(config_file_path):
try:
config_path = f"{artifact_path.rstrip('/')}/config/"
config = load_config(
config_path,
config_file_name=config_file_name,
)
except Exception:
pass
logger.debug(
f"Error loading the {config_file_name} at path {config_path}.\n"
f"{traceback.format_exc()}"
)

if not config:
logger.error(
f"{config_file_name} is not available for the model: {model_id}. Check if the custom metadata has the artifact path set."
logger.debug(
f"{config_file_name} is not available for the model: {model_id}. "
f"Check if the custom metadata has the artifact path set."
)
return config

Expand Down
3 changes: 3 additions & 0 deletions ads/aqua/finetuning/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ class CreateFineTuningDetails(Serializable):
The log group id for fine tuning job infrastructure.
log_id: (str, optional). Defaults to `None`.
The log id for fine tuning job infrastructure.
watch_logs: (bool, optional). Defaults to `False`.
The flag to watch the job run logs when a fine-tuning job is created.
force_overwrite: (bool, optional). Defaults to `False`.
Whether to force overwrite the existing file in object storage.
freeform_tags: (dict, optional)
Expand All @@ -148,6 +150,7 @@ class CreateFineTuningDetails(Serializable):
subnet_id: Optional[str] = None
log_id: Optional[str] = None
log_group_id: Optional[str] = None
watch_logs: Optional[bool] = False
force_overwrite: Optional[bool] = False
freeform_tags: Optional[dict] = None
defined_tags: Optional[dict] = None
Expand Down
25 changes: 25 additions & 0 deletions ads/aqua/finetuning/finetuning.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

import json
import os
import time
import traceback
from typing import Dict

from oci.data_science.models import (
Expand Down Expand Up @@ -149,6 +151,15 @@ def create(
f"Logging is required for fine tuning if replica is larger than {DEFAULT_FT_REPLICA}."
)

if create_fine_tuning_details.watch_logs and not (
create_fine_tuning_details.log_id
and create_fine_tuning_details.log_group_id
):
raise AquaValueError(
"Logging is required for fine tuning if watch_logs is set to True. "
"Please provide log_id and log_group_id with the request parameters."
)

ft_parameters = self._get_finetuning_params(
create_fine_tuning_details.ft_parameters
)
Expand Down Expand Up @@ -422,6 +433,20 @@ def create(
value=source.display_name,
)

if create_fine_tuning_details.watch_logs:
logger.info(
f"Watching fine-tuning job run logs for {ft_job_run.id}. Press Ctrl+C to stop watching logs.\n"
)
try:
ft_job_run.watch()
except KeyboardInterrupt:
logger.info(f"\nStopped watching logs for {ft_job_run.id}.\n")
time.sleep(1)
except Exception:
logger.debug(
f"Something unexpected occurred while watching logs.\n{traceback.format_exc()}"
)

return AquaFineTuningSummary(
id=ft_model.id,
name=ft_model.display_name,
Expand Down
19 changes: 0 additions & 19 deletions ads/aqua/model/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
LifecycleStatus,
_build_resource_identifier,
cleanup_local_hf_model_artifact,
copy_model_config,
create_word_icon,
generate_tei_cmd_var,
get_artifact_path,
Expand Down Expand Up @@ -969,24 +968,6 @@ def _create_model_catalog_entry(
)
tags[Tags.LICENSE] = validation_result.tags.get(Tags.LICENSE, UNKNOWN)

try:
# If verified model already has a artifact json, use that.
artifact_path = metadata.get(MODEL_BY_REFERENCE_OSS_PATH_KEY).value
logger.info(
f"Found model artifact in the service bucket. "
f"Using artifact from service bucket instead of {os_path}."
)

# todo: implement generic copy_folder method
# copy model config from artifact path to user bucket
copy_model_config(
artifact_path=artifact_path, os_path=os_path, auth=default_signer()
)
except Exception:
logger.debug(
f"Proceeding with model registration without copying model config files at {os_path}. "
f"Default configuration will be used for deployment and fine-tuning."
)
# Set artifact location to user bucket, and replace existing key if present.
metadata.add(
key=MODEL_BY_REFERENCE_OSS_PATH_KEY,
Expand Down
11 changes: 9 additions & 2 deletions ads/cli.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
#!/usr/bin/env python

# Copyright (c) 2021, 2025 Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/

import json
import logging
import sys
import traceback
import uuid

import fire
from pydantic import BaseModel

from ads.common import logger

Expand Down Expand Up @@ -84,7 +85,13 @@ def serialize(data):
The string representation of each dataclass object.
"""
if isinstance(data, list):
[print(str(item)) for item in data]
for item in data:
if isinstance(item, BaseModel):
print(json.dumps(item.dict(), indent=4))
else:
print(str(item))
elif isinstance(data, BaseModel):
print(json.dumps(data.dict(), indent=4))
else:
print(str(data))

Expand Down
41 changes: 38 additions & 3 deletions ads/opctl/operator/lowcode/common/transformations.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
InvalidParameterError,
)
from ads.opctl.operator.lowcode.common.utils import merge_category_columns
from ads.opctl.operator.lowcode.forecast.operator_config import ForecastOperatorSpec


class Transformations(ABC):
Expand All @@ -34,6 +35,7 @@ def __init__(self, dataset_info, name="historical_data"):
self.dataset_info = dataset_info
self.target_category_columns = dataset_info.target_category_columns
self.target_column_name = dataset_info.target_column
self.raw_column_names = None
self.dt_column_name = (
dataset_info.datetime_column.name if dataset_info.datetime_column else None
)
Expand All @@ -60,7 +62,8 @@ def run(self, data):
"""
clean_df = self._remove_trailing_whitespace(data)
# clean_df = self._normalize_column_names(clean_df)
if isinstance(self.dataset_info, ForecastOperatorSpec):
clean_df = self._clean_column_names(clean_df)
if self.name == "historical_data":
self._check_historical_dataset(clean_df)
clean_df = self._set_series_id_column(clean_df)
Expand Down Expand Up @@ -98,8 +101,36 @@ def run(self, data):
def _remove_trailing_whitespace(self, df):
return df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)

# def _normalize_column_names(self, df):
# return df.rename(columns=lambda x: re.sub("[^A-Za-z0-9_]+", "", x))
def _clean_column_names(self, df):
"""
Remove all whitespaces from column names in a DataFrame and store the original names.
Parameters:
df (pd.DataFrame): The DataFrame whose column names need to be cleaned.
Returns:
pd.DataFrame: The DataFrame with cleaned column names.
"""

self.raw_column_names = {
col: col.replace(" ", "") for col in df.columns if " " in col
}
df.columns = [self.raw_column_names.get(col, col) for col in df.columns]

if self.target_column_name:
self.target_column_name = self.raw_column_names.get(
self.target_column_name, self.target_column_name
)
self.dt_column_name = self.raw_column_names.get(
self.dt_column_name, self.dt_column_name
)

if self.target_category_columns:
self.target_category_columns = [
self.raw_column_names.get(col, col)
for col in self.target_category_columns
]
return df

def _set_series_id_column(self, df):
self._target_category_columns_map = {}
Expand Down Expand Up @@ -233,6 +264,10 @@ def _check_historical_dataset(self, df):
expected_names = [self.target_column_name, self.dt_column_name] + (
self.target_category_columns if self.target_category_columns else []
)

if self.raw_column_names:
expected_names.extend(list(self.raw_column_names.values()))

if set(df.columns) != set(expected_names):
raise DataMismatchError(
f"Expected {self.name} to have columns: {expected_names}, but instead found column names: {df.columns}. Is the {self.name} path correct?"
Expand Down
22 changes: 22 additions & 0 deletions docs/source/release_notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,28 @@
Release Notes
=============

2.12.10
-------
Release date: Feb 5th, 2024

* Support inference container URI to be edited for unverified models in AI Quick Actions.
* Logging improvements for all CRUD operations in AI Quick Actions.
* Expand allowable parameters for fine-tuning operation in AI Quick Actions.
* Allow local model cache delete while registering models in AI Quick Actions.
* Add validation flag ``ignore-model-artifact-check`` to ignore ``config.json`` check while registering model in AI Quick Actions.
* Allow user to watch fine-tuning job run logs in AI Quick Actions when using CLI mode.
* Load base model configuration files by default for verified models in AI Quick Actions.
* Added a framework to support embedding onnx models.
* Added headers parameters for ADS Langchain.
* Addition of ``SessionLogger`` and ``MetricLogger`` AutoGen loggers for logging and reporting.
* Support for a centralized and reusable client for interacting with OCI Model Deployment.
* Bug fix for ADS telemetry logging.
* Set defaults for optional parameters in ChatOCIModelDeploymentVLLM.
* Real-Time What-If Analysis with Model Catalog and Model Deployment Integration.
* Support for AutoMLx internal explainability mode.
* Standardize outputs & report for single-series forecasts without target_category_columns.


2.12.9
-------
Release date: December 18, 2024
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ build-backend = "flit_core.buildapi"

# Required
name = "oracle_ads" # the install (PyPI) name; name for local build in [tool.flit.module] section below
version = "2.12.9"
version = "2.12.10"

# Optional
description = "Oracle Accelerated Data Science SDK"
Expand Down
Loading

0 comments on commit a26aee3

Please sign in to comment.