From d3997902ca328e948f7724c66ee862505e0ef981 Mon Sep 17 00:00:00 2001 From: Baris Can Durak Date: Wed, 13 Nov 2024 20:44:44 +0100 Subject: [PATCH 1/2] removing deprecated calls --- .github/workflows/ci.yml | 2 +- template/steps/log_metadata.py | 9 +++++++-- template/steps/prepare_datasets.py | 18 +++++++++++------- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c0cee8a..14ed448 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -55,5 +55,5 @@ jobs: uses: ./.github/actions/llm_finetuning_template_test with: python-version: ${{ matrix.python-version }} - ref-zenml: ${{ inputs.ref-zenml || 'develop' }} + ref-zenml: ${{ inputs.ref-zenml || 'feature/followup-run-metadata' }} ref-template: ${{ inputs.ref-template || github.ref }} diff --git a/template/steps/log_metadata.py b/template/steps/log_metadata.py index 53a0e16..d947241 100644 --- a/template/steps/log_metadata.py +++ b/template/steps/log_metadata.py @@ -2,7 +2,7 @@ from typing import Any, Dict -from zenml import log_model_metadata, step, get_step_context +from zenml import log_metadata, step, get_step_context @step(enable_cache=False) @@ -24,4 +24,9 @@ def log_metadata_from_step_artifact( metadata = {artifact_name: metadata_dict} - log_model_metadata(metadata) \ No newline at end of file + if context.model: + log_metadata( + metadata=metadata, + model_name=context.model.name, + model_version=context.model.version, + ) diff --git a/template/steps/prepare_datasets.py b/template/steps/prepare_datasets.py index 69fb44c..b9e78a8 100644 --- a/template/steps/prepare_datasets.py +++ b/template/steps/prepare_datasets.py @@ -6,7 +6,7 @@ from materializers.directory_materializer import DirectoryMaterializer from typing_extensions import Annotated from utils.tokenizer import generate_and_tokenize_prompt, load_tokenizer -from zenml import log_model_metadata, step +from zenml import get_step_context, log_metadata, step from zenml.materializers import BuiltInMaterializer from zenml.utils.cuda_utils import cleanup_gpu_memory @@ -33,12 +33,16 @@ def prepare_data( cleanup_gpu_memory(force=True) - log_model_metadata( - { - "system_prompt": system_prompt, - "base_model_id": base_model_id, - } - ) + context = get_step_context() + if context.model: + log_metadata( + metadata={ + "system_prompt": system_prompt, + "base_model_id": base_model_id, + }, + model_name=context.model.name, + model_version=context.model.version, + ) tokenizer = load_tokenizer(base_model_id, False, use_fast) gen_and_tokenize = partial( From ece1d4651fb56e05591bc9c01834500e1eaaac9b Mon Sep 17 00:00:00 2001 From: Baris Can Durak Date: Thu, 28 Nov 2024 15:04:07 +0100 Subject: [PATCH 2/2] fixing the call --- template/steps/prepare_datasets.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/template/steps/prepare_datasets.py b/template/steps/prepare_datasets.py index b9e78a8..9313b83 100644 --- a/template/steps/prepare_datasets.py +++ b/template/steps/prepare_datasets.py @@ -6,7 +6,7 @@ from materializers.directory_materializer import DirectoryMaterializer from typing_extensions import Annotated from utils.tokenizer import generate_and_tokenize_prompt, load_tokenizer -from zenml import get_step_context, log_metadata, step +from zenml import log_metadata, step from zenml.materializers import BuiltInMaterializer from zenml.utils.cuda_utils import cleanup_gpu_memory @@ -33,16 +33,13 @@ def prepare_data( cleanup_gpu_memory(force=True) - context = get_step_context() - if context.model: - log_metadata( - metadata={ - "system_prompt": system_prompt, - "base_model_id": base_model_id, - }, - model_name=context.model.name, - model_version=context.model.version, - ) + log_metadata( + metadata={ + "system_prompt": system_prompt, + "base_model_id": base_model_id, + }, + infer_model=True, + ) tokenizer = load_tokenizer(base_model_id, False, use_fast) gen_and_tokenize = partial(