From 4ceae364264dfaa2ed992d7938682588b2cf4435 Mon Sep 17 00:00:00 2001 From: Gorkem Ercan Date: Thu, 22 Feb 2024 14:18:41 -0500 Subject: [PATCH] Update model to be a single object on kitfile Updates the build and export to adjust to single object. --- pkg/artifact/kit-file.go | 12 ++-- pkg/artifact/kit-file.md | 8 +-- pkg/cmd/build/build.go | 21 +++--- pkg/cmd/build/kit-file.md | 141 ++++++++++++++++++++++++++++++++++++++ pkg/cmd/export/export.go | 5 +- pkg/cmd/list/list.go | 8 ++- pkg/cmd/list/list_test.go | 2 +- 7 files changed, 171 insertions(+), 26 deletions(-) create mode 100644 pkg/cmd/build/kit-file.md diff --git a/pkg/artifact/kit-file.go b/pkg/artifact/kit-file.go index da2a17e7..c3508cbb 100644 --- a/pkg/artifact/kit-file.go +++ b/pkg/artifact/kit-file.go @@ -10,14 +10,14 @@ import ( type ( KitFile struct { - ManifestVersion string `json:"manifestVersion"` - Package Package `json:"package,omitempty"` - Code []Code `json:"code,omitempty"` - DataSets []DataSet `json:"datasets,omitempty"` - Models []TrainedModel `json:"models,omitempty"` + ManifestVersion string `json:"manifestVersion"` + Kit ModelKit `json:"package,omitempty"` + Code []Code `json:"code,omitempty"` + DataSets []DataSet `json:"datasets,omitempty"` + Model TrainedModel `json:"model,omitempty"` } - Package struct { + ModelKit struct { Name string `json:"name,omitempty"` Version string `json:"version,omitempty"` Description string `json:"description,omitempty"` diff --git a/pkg/artifact/kit-file.md b/pkg/artifact/kit-file.md index 4436eeb1..0c4d57f5 100644 --- a/pkg/artifact/kit-file.md +++ b/pkg/artifact/kit-file.md @@ -1,10 +1,10 @@ # KitOps AI/ML Packaging Manifest Format Reference -The Kit manifest for AI/ML is a YAML file designed to encapsulate all the necessary information about the package, including code, datasets, models, and their metadata. This reference documentation outlines the structure and specifications of the manifest format. +The Kitfile manifest for AI/ML is a YAML file designed to encapsulate all the necessary information about the package, including code, datasets, model, and their metadata. This reference documentation outlines the structure and specifications of the manifest format. ## Overview -The manifest is structured into several key sections: `version`, `package`,`code`, `datasets` and `models`. Each section serves a specific purpose in describing the AI/ML package components and requirements. +The manifest is structured into several key sections: `version`, `package`,`code`, `datasets` and `model`. Each section serves a specific purpose in describing the AI/ML package components and requirements. ### `ManifestVersion` @@ -12,7 +12,7 @@ The manifest is structured into several key sections: `version`, `package`,`code - **Type**: String - **Example**: `1.0` -### `package` +### `modelkit` This section provides general information about the AI/ML project. @@ -56,7 +56,7 @@ This section provides general information about the AI/ML project. - `license`: SPDX license identifier for the dataset. - `preprocessing`: Reference to preprocessing steps. -#### `models` +#### `model` - **Description**: Details of the trained models included in the package. - **Type**: Object Array diff --git a/pkg/cmd/build/build.go b/pkg/cmd/build/build.go index 1a0e9085..9dd36835 100644 --- a/pkg/cmd/build/build.go +++ b/pkg/cmd/build/build.go @@ -128,18 +128,17 @@ func (options *BuildOptions) RunBuild() error { model.Layers = append(model.Layers, *layer) } - // 4. package the TrainedModels - for _, trainedModel := range kitfile.Models { - modelPath, err := filesystem.VerifySubpath(options.ContextDir, trainedModel.Path) - if err != nil { - return err - } - layer := &artifact.ModelLayer{ - BaseDir: modelPath, - MediaType: constants.ModelLayerMediaType, - } - model.Layers = append(model.Layers, *layer) + // 4. package the TrainedModel + + modelPath, err := filesystem.VerifySubpath(options.ContextDir, kitfile.Model.Path) + if err != nil { + return err + } + layer := &artifact.ModelLayer{ + BaseDir: modelPath, + MediaType: constants.ModelLayerMediaType, } + model.Layers = append(model.Layers, *layer) modelStorePath := options.storageHome repo := "" diff --git a/pkg/cmd/build/kit-file.md b/pkg/cmd/build/kit-file.md new file mode 100644 index 00000000..ae84fe37 --- /dev/null +++ b/pkg/cmd/build/kit-file.md @@ -0,0 +1,141 @@ +# Kitfile AI/ML Packaging Manifest Format Reference + +The Kitfile manifest for AI/ML is a YAML file designed to encapsulate all the necessary information about the package, including code, datasets, model, and their metadata. This reference documentation outlines the structure and specifications of the manifest format. + +## Overview + +The manifest is structured into several key sections: `version`, `package`,`code`, `datasets` and `model`. Each section serves a specific purpose in describing the AI/ML package components and requirements. + +### `ManifestVersion` + +- **Description**: Specifies the manifest format version. +- **Type**: String +- **Example**: `1.0` + +### `modelkit` + +This section provides general information about the AI/ML project. + +#### `name` + +- **Description**: The name of the AI/ML project. +- **Type**: String + +#### `version` + +- **Description**: The current version of the project. +- **Type**: String +- **Example**: `1.2.3` + +#### `description` + +- **Description**: A brief overview of the project's purpose and capabilities. +- **Type**: String + +#### `authors` + +- **Description**: A list of individuals or entities that have contributed to the project. +- **Type**: Array of Strings + + +#### `code` + +- **Description**: Information about the source code. +- **Type**: Object Array + - `path`: Location of the source code files or directory relative to the context + - `description`: Description of what the code does. + - `license`: SPDX license identifier for the code. + +#### `datasets` + +- **Description**: Information about the datasets used. +- **Type**: Object Array + - `name`: Name of the dataset. + - `path`: Location of the dataset file or directory relative to the context. + - `description`: Overview of the dataset. + - `license`: SPDX license identifier for the dataset. + - `preprocessing`: Reference to preprocessing steps. + +#### `model` + +- **Description**: Details of the trained models included in the package. +- **Type**: Object + - `name`: Name of the model + - `path`: Location of the model file or directory relative to the context + - `framework`: AI/ML framework + - `version`: Version of the model + - `description`: Overview of the model + - `license`: SPDX license identifier for the dataset. + - `training`: + - `dataset`: Name of the dataset + - `parameters`: name value pairs + - `validation`: + - `dataset`: Name of the dataset + - `metrics`: name value pairs + + +## Example + +```yaml +manifestVersion: 1.0 +package: + name: AIProjectName + version: 1.2.3 + description: >- + A brief description of the AI/ML project. + authors: [Author Name, Contributor Name] +code: + - path: src/ + description: Source code for the AI models. + license: Apache-2.0 +datasets: + - name: DatasetName + path: data/dataset.csv + description: Description of the dataset. + license: CC-BY-4.0 + preprocessing: Preprocessing steps. +models: + - name: ModelName + path: models/model.h5 + framework: TensorFlow + version: 1.0 + description: Model description. + license: Apache-2.0 + training: + dataset: DatasetName + parameters: + learning_rate: 0.001 + epochs: 100 + batch_size: 32 + validation: + - dataset: DatasetName + metrics: + accuracy: 0.95 + f1_score: 0.94 +``` + + +## Future Considerations + +This section is for collecting future ideas. + +### `dependencies` + +**This is a possible future section that may be used for creating BOM.** + +- **Description**: Lists the project's external dependencies. +- **Type**: Object Array + - `name`: Name of the dependency. + - `version`: Version of the dependency. + - `license`: SPDX license identifier for the dependency. + +##### Example for dependencies +```yaml + dependencies: + - name: numpy + version: 1.19.2 + license: BSD-3-Clause + - name: pandas + version: 1.1.3 + license: BSD-3-Clause +``` \ No newline at end of file diff --git a/pkg/cmd/export/export.go b/pkg/cmd/export/export.go index ee716c08..5b19dbce 100644 --- a/pkg/cmd/export/export.go +++ b/pkg/cmd/export/export.go @@ -39,7 +39,7 @@ func ExportModel(ctx context.Context, store oras.Target, ref *registry.Reference // Since there might be multiple models, etc. we need to synchronously iterate // through the config's relevant field to get the correct path for exporting - var modelIdx, codeIdx, datasetIdx int + var codeIdx, datasetIdx int for _, layerDesc := range manifest.Layers { layerDir := "" switch layerDesc.MediaType { @@ -47,10 +47,9 @@ func ExportModel(ctx context.Context, store oras.Target, ref *registry.Reference if !options.exportConf.ExportModels { continue } - modelEntry := config.Models[modelIdx] + modelEntry := config.Model layerDir = filepath.Join(options.exportDir, modelEntry.Path) fmt.Printf("Exporting model %s to %s\n", modelEntry.Name, layerDir) - modelIdx += 1 case constants.CodeLayerMediaType: if !options.exportConf.ExportCode { diff --git a/pkg/cmd/list/list.go b/pkg/cmd/list/list.go index b714a3f9..7db9d76a 100644 --- a/pkg/cmd/list/list.go +++ b/pkg/cmd/list/list.go @@ -110,8 +110,14 @@ func getManifestInfoLine(repo string, desc ocispec.Descriptor, manifest *ocispec size += layer.Size } sizeStr := formatBytes(size) + var author string + if len(config.Kit.Authors) > 0 { + author = config.Kit.Authors[0] + } else { + author = "" + } - info := fmt.Sprintf(listTableFmt, repo, ref, config.Package.Authors[0], config.Package.Name, sizeStr, desc.Digest) + info := fmt.Sprintf(listTableFmt, repo, ref, author, config.Kit.Name, sizeStr, desc.Digest) return info } diff --git a/pkg/cmd/list/list_test.go b/pkg/cmd/list/list_test.go index 250141b6..7acc331b 100644 --- a/pkg/cmd/list/list_test.go +++ b/pkg/cmd/list/list_test.go @@ -224,7 +224,7 @@ func Manifest(configDigest string, layerDigests ...string) ocispec.Manifest { func Config(maintainer, name string) artifact.KitFile { config := artifact.KitFile{ - Package: artifact.Package{Authors: []string{maintainer}, Name: name}, + Kit: artifact.ModelKit{Authors: []string{maintainer}, Name: name}, } return config