diff --git a/rubicon_ml/client/artifact.py b/rubicon_ml/client/artifact.py index 02082df5..21709ca7 100644 --- a/rubicon_ml/client/artifact.py +++ b/rubicon_ml/client/artifact.py @@ -9,7 +9,7 @@ import fsspec from rubicon_ml.client.base import Base -from rubicon_ml.client.mixin import TagMixin +from rubicon_ml.client.mixin import CommentMixin, TagMixin from rubicon_ml.client.utils.exception_handling import failsafe if TYPE_CHECKING: @@ -17,7 +17,7 @@ from rubicon_ml.domain import Artifact as ArtifactDomain -class Artifact(Base, TagMixin): +class Artifact(Base, TagMixin, CommentMixin): """A client artifact. An `artifact` is a catch-all for any other type of diff --git a/rubicon_ml/client/dataframe.py b/rubicon_ml/client/dataframe.py index facef63f..7b6caa08 100644 --- a/rubicon_ml/client/dataframe.py +++ b/rubicon_ml/client/dataframe.py @@ -2,7 +2,7 @@ from typing import TYPE_CHECKING, Callable, Literal, Optional, Union -from rubicon_ml.client import Base, TagMixin +from rubicon_ml.client import Base, CommentMixin, TagMixin from rubicon_ml.client.utils.exception_handling import failsafe from rubicon_ml.exceptions import RubiconException @@ -11,7 +11,7 @@ from rubicon_ml.domain import Dataframe as DataframeDomain -class Dataframe(Base, TagMixin): +class Dataframe(Base, TagMixin, CommentMixin): """A client dataframe. A `dataframe` is a two-dimensional, tabular dataset with diff --git a/rubicon_ml/client/experiment.py b/rubicon_ml/client/experiment.py index 2422837d..8f1185e4 100644 --- a/rubicon_ml/client/experiment.py +++ b/rubicon_ml/client/experiment.py @@ -56,7 +56,15 @@ def _get_identifiers(self): return self.project.name, self.id @failsafe - def log_metric(self, name, value, directionality="score", description=None, tags=[]): + def log_metric( + self, + name: str, + value: float, + directionality: str = "score", + description: str = None, + tags: list[str] = [], + comments: list[str] = [], + ) -> Metric: """Create a metric under the experiment. Parameters @@ -76,6 +84,9 @@ def log_metric(self, name, value, directionality="score", description=None, tags tags : list of str, optional Values to tag the experiment with. Use tags to organize and filter your metrics. + comments : list of str, optional + Values to comment the experiment with. Use comments to organize and + filter your metrics. Returns ------- @@ -85,8 +96,18 @@ def log_metric(self, name, value, directionality="score", description=None, tags if not isinstance(tags, list) or not all([isinstance(tag, str) for tag in tags]): raise ValueError("`tags` must be `list` of type `str`") + if not isinstance(comments, list) or not all( + [isinstance(comment, str) for comment in comments] + ): + raise ValueError("`comments` must be `list` of type `str`") + metric = domain.Metric( - name, value, directionality=directionality, description=description, tags=tags + name, + value, + directionality=directionality, + description=description, + tags=tags, + comments=comments, ) for repo in self.repositories: repo.create_metric(metric, self.project.name, self.id) @@ -161,7 +182,14 @@ def metric(self, name=None, id=None): return [m for m in self.metrics() if m.id == id][0] @failsafe - def log_feature(self, name, description=None, importance=None, tags=[]): + def log_feature( + self, + name: str, + description: str = None, + importance: float = None, + tags: list[str] = [], + comments: list[str] = [], + ) -> Feature: """Create a feature under the experiment. Parameters @@ -176,6 +204,9 @@ def log_feature(self, name, description=None, importance=None, tags=[]): tags : list of str, optional Values to tag the experiment with. Use tags to organize and filter your features. + comments : list of str, optional + Values to comment the experiment with. Use comments to organize and + filter your features. Returns ------- @@ -185,7 +216,14 @@ def log_feature(self, name, description=None, importance=None, tags=[]): if not isinstance(tags, list) or not all([isinstance(tag, str) for tag in tags]): raise ValueError("`tags` must be `list` of type `str`") - feature = domain.Feature(name, description=description, importance=importance, tags=tags) + if not isinstance(comments, list) or not all( + [isinstance(comment, str) for comment in comments] + ): + raise ValueError("`comments` must be `list` of type `str`") + + feature = domain.Feature( + name, description=description, importance=importance, tags=tags, comments=comments + ) for repo in self.repositories: repo.create_feature(feature, self.project.name, self.id) @@ -260,7 +298,14 @@ def feature(self, name=None, id=None): return [f for f in self.features() if f.id == id][0] @failsafe - def log_parameter(self, name, value=None, description=None, tags=[]): + def log_parameter( + self, + name: str, + value: object = None, + description: str = None, + tags: list[str] = [], + comments: list[str] = [], + ) -> Parameter: """Create a parameter under the experiment. Parameters @@ -277,6 +322,9 @@ def log_parameter(self, name, value=None, description=None, tags=[]): tags : list of str, optional Values to tag the parameter with. Use tags to organize and filter your parameters. + comments : list of str, optional + Values to comment the experiment with. Use comments to organize and + filter your features. Returns ------- @@ -286,7 +334,14 @@ def log_parameter(self, name, value=None, description=None, tags=[]): if not isinstance(tags, list) or not all([isinstance(tag, str) for tag in tags]): raise ValueError("`tags` must be `list` of type `str`") - parameter = domain.Parameter(name, value=value, description=description, tags=tags) + if not isinstance(comments, list) or not all( + [isinstance(comment, str) for comment in comments] + ): + raise ValueError("`comments` must be `list` of type `str`") + + parameter = domain.Parameter( + name, value=value, description=description, tags=tags, comments=comments + ) for repo in self.repositories: repo.create_parameter(parameter, self.project.name, self.id) diff --git a/rubicon_ml/client/feature.py b/rubicon_ml/client/feature.py index 505d2803..31525cc1 100644 --- a/rubicon_ml/client/feature.py +++ b/rubicon_ml/client/feature.py @@ -3,14 +3,14 @@ from datetime import datetime from typing import TYPE_CHECKING, Optional -from rubicon_ml.client import Base, TagMixin +from rubicon_ml.client import Base, CommentMixin, TagMixin if TYPE_CHECKING: from rubicon_ml.client import Experiment from rubicon_ml.domain import Feature as FeatureDomain -class Feature(Base, TagMixin): +class Feature(Base, TagMixin, CommentMixin): """A client feature. A `feature` is an input to an `experiment` (model run) diff --git a/rubicon_ml/client/metric.py b/rubicon_ml/client/metric.py index d5390c26..99befab2 100644 --- a/rubicon_ml/client/metric.py +++ b/rubicon_ml/client/metric.py @@ -3,14 +3,14 @@ from datetime import datetime from typing import TYPE_CHECKING, Optional -from rubicon_ml.client import Base, TagMixin +from rubicon_ml.client import Base, CommentMixin, TagMixin if TYPE_CHECKING: from rubicon_ml.client import Experiment from rubicon_ml.domain import Metric as MetricDomain -class Metric(Base, TagMixin): +class Metric(Base, TagMixin, CommentMixin): """A client metric. A `metric` is a single-value output of an `experiment` that diff --git a/rubicon_ml/client/mixin.py b/rubicon_ml/client/mixin.py index 01d9802e..fc66f128 100644 --- a/rubicon_ml/client/mixin.py +++ b/rubicon_ml/client/mixin.py @@ -69,6 +69,7 @@ def log_artifact( name: Optional[str] = None, description: Optional[str] = None, tags: Optional[List[str]] = None, + comments: Optional[List[str]] = None, ) -> Artifact: """Log an artifact to this client object. @@ -93,6 +94,9 @@ def log_artifact( tags : list of str, optional Values to tag the experiment with. Use tags to organize and filter your artifacts. + comments : list of str, optional + Values to comment the experiment with. Use comments to organize and + filter your artifacts. Notes ----- @@ -128,6 +132,13 @@ def log_artifact( if not isinstance(tags, list) or not all([isinstance(tag, str) for tag in tags]): raise ValueError("`tags` must be `list` of type `str`") + if comments is None: + comments = [] + if not isinstance(comments, list) or not all( + [isinstance(comment, str) for comment in comments] + ): + raise ValueError("`comments` must be `list` of type `str`") + data_bytes, name = self._validate_data(data_bytes, data_file, data_object, data_path, name) artifact = domain.Artifact( @@ -135,6 +146,7 @@ def log_artifact( description=description, parent_id=self._domain.id, tags=tags, + comments=comments, ) project_name, experiment_id = self._get_identifiers() @@ -413,6 +425,7 @@ def log_dataframe( description: Optional[str] = None, name: Optional[str] = None, tags: Optional[List[str]] = None, + comments: Optional[List[str]] = None, ) -> Dataframe: """Log a dataframe to this client object. @@ -425,6 +438,8 @@ def log_dataframe( additional context. tags : list of str The values to tag the dataframe with. + comments: list of str + The values to comment the dataframe with. Returns ------- @@ -436,11 +451,19 @@ def log_dataframe( if not isinstance(tags, list) or not all([isinstance(tag, str) for tag in tags]): raise ValueError("`tags` must be `list` of type `str`") + if comments is None: + comments = [] + if not isinstance(comments, list) or not all( + [isinstance(comment, str) for comment in comments] + ): + raise ValueError("`comments` must be `list` of type `str`") + dataframe = domain.Dataframe( parent_id=self._domain.id, description=description, name=name, tags=tags, + comments=comments, ) project_name, experiment_id = self._get_identifiers() diff --git a/rubicon_ml/client/parameter.py b/rubicon_ml/client/parameter.py index e17a20ad..43d8c4d9 100644 --- a/rubicon_ml/client/parameter.py +++ b/rubicon_ml/client/parameter.py @@ -3,14 +3,14 @@ from datetime import datetime from typing import TYPE_CHECKING, Optional, Union -from rubicon_ml.client import Base, TagMixin +from rubicon_ml.client import Base, CommentMixin, TagMixin if TYPE_CHECKING: from rubicon_ml.client import Experiment from rubicon_ml.domain import Parameter as ParameterDomain -class Parameter(Base, TagMixin): +class Parameter(Base, TagMixin, CommentMixin): """A client parameter. A `parameter` is an input to an `experiment` (model run) diff --git a/rubicon_ml/domain/artifact.py b/rubicon_ml/domain/artifact.py index 14dcf01b..0b078aff 100644 --- a/rubicon_ml/domain/artifact.py +++ b/rubicon_ml/domain/artifact.py @@ -4,17 +4,18 @@ from datetime import datetime from typing import List, Optional -from rubicon_ml.domain.mixin import TagMixin +from rubicon_ml.domain.mixin import CommentMixin, TagMixin from rubicon_ml.domain.utils import uuid @dataclass -class Artifact(TagMixin): +class Artifact(TagMixin, CommentMixin): name: str id: str = field(default_factory=uuid.uuid4) description: Optional[str] = None created_at: datetime = field(default_factory=datetime.utcnow) tags: List[str] = field(default_factory=list) + comments: List[str] = field(default_factory=list) parent_id: Optional[str] = None diff --git a/rubicon_ml/domain/dataframe.py b/rubicon_ml/domain/dataframe.py index e2f085b8..196298e6 100644 --- a/rubicon_ml/domain/dataframe.py +++ b/rubicon_ml/domain/dataframe.py @@ -4,16 +4,17 @@ from datetime import datetime from typing import List, Optional -from rubicon_ml.domain.mixin import TagMixin +from rubicon_ml.domain.mixin import CommentMixin, TagMixin from rubicon_ml.domain.utils import uuid @dataclass -class Dataframe(TagMixin): +class Dataframe(TagMixin, CommentMixin): id: str = field(default_factory=uuid.uuid4) name: Optional[str] = None description: Optional[str] = None tags: List[str] = field(default_factory=list) + comments: List[str] = field(default_factory=list) created_at: datetime = field(default_factory=datetime.utcnow) parent_id: Optional[str] = None diff --git a/rubicon_ml/domain/feature.py b/rubicon_ml/domain/feature.py index fd4215d4..eaaa6f9d 100644 --- a/rubicon_ml/domain/feature.py +++ b/rubicon_ml/domain/feature.py @@ -2,16 +2,17 @@ from datetime import datetime from typing import List, Optional -from rubicon_ml.domain.mixin import TagMixin +from rubicon_ml.domain.mixin import CommentMixin, TagMixin from rubicon_ml.domain.utils import uuid @dataclass -class Feature(TagMixin): +class Feature(TagMixin, CommentMixin): name: str id: str = field(default_factory=uuid.uuid4) description: Optional[str] = None importance: Optional[float] = None tags: List[str] = field(default_factory=list) + comments: List[str] = field(default_factory=list) created_at: datetime = field(default_factory=datetime.utcnow) diff --git a/rubicon_ml/domain/metric.py b/rubicon_ml/domain/metric.py index 9efd338d..c1d98aa5 100644 --- a/rubicon_ml/domain/metric.py +++ b/rubicon_ml/domain/metric.py @@ -2,14 +2,14 @@ from datetime import datetime from typing import List, Optional -from rubicon_ml.domain.mixin import TagMixin +from rubicon_ml.domain.mixin import CommentMixin, TagMixin from rubicon_ml.domain.utils import uuid DIRECTIONALITY_VALUES = ["score", "loss"] @dataclass -class Metric(TagMixin): +class Metric(TagMixin, CommentMixin): name: str value: float @@ -18,6 +18,7 @@ class Metric(TagMixin): directionality: str = "score" created_at: datetime = field(default_factory=datetime.utcnow) tags: List[str] = field(default_factory=list) + comments: List[str] = field(default_factory=list) def __post_init__(self): if self.directionality not in DIRECTIONALITY_VALUES: diff --git a/rubicon_ml/domain/parameter.py b/rubicon_ml/domain/parameter.py index 1238c5cc..d248ee8d 100644 --- a/rubicon_ml/domain/parameter.py +++ b/rubicon_ml/domain/parameter.py @@ -2,16 +2,17 @@ from datetime import datetime from typing import List, Optional -from rubicon_ml.domain.mixin import TagMixin +from rubicon_ml.domain.mixin import CommentMixin, TagMixin from rubicon_ml.domain.utils import uuid @dataclass -class Parameter(TagMixin): +class Parameter(TagMixin, CommentMixin): name: str id: str = field(default_factory=uuid.uuid4) value: object = None description: Optional[str] = None tags: List[str] = field(default_factory=list) + comments: List[str] = field(default_factory=list) created_at: datetime = field(default_factory=datetime.utcnow) diff --git a/tests/unit/client/test_artifact_client.py b/tests/unit/client/test_artifact_client.py index 5b037b97..713785bc 100644 --- a/tests/unit/client/test_artifact_client.py +++ b/tests/unit/client/test_artifact_client.py @@ -14,12 +14,18 @@ def test_properties(project_client): parent = project_client - domain_artifact = domain.Artifact(name="test.txt") + domain_artifact = domain.Artifact( + name="test.txt", + tags=["x"], + comments=["this is a comment"], + ) artifact = Artifact(domain_artifact, parent) assert artifact.id == domain_artifact.id assert artifact.name == domain_artifact.name assert artifact.description == domain_artifact.description + assert artifact.tags == domain_artifact.tags + assert artifact.comments == domain_artifact.comments assert artifact.created_at == domain_artifact.created_at assert artifact.parent == parent diff --git a/tests/unit/client/test_dataframe_client.py b/tests/unit/client/test_dataframe_client.py index 80ff090e..cd4037a6 100644 --- a/tests/unit/client/test_dataframe_client.py +++ b/tests/unit/client/test_dataframe_client.py @@ -9,7 +9,10 @@ def test_properties(project_client): parent = project_client domain_dataframe = domain.Dataframe( - description="some description", tags=["x"], name="test title" + description="some description", + tags=["x"], + comments=["this is a comment"], + name="test title", ) dataframe = Dataframe(domain_dataframe, parent) @@ -17,6 +20,7 @@ def test_properties(project_client): assert dataframe.name == domain_dataframe.name assert dataframe.description == domain_dataframe.description assert dataframe.tags == domain_dataframe.tags + assert dataframe.comments == domain_dataframe.comments assert dataframe.created_at == domain_dataframe.created_at assert dataframe.parent == parent diff --git a/tests/unit/client/test_experiment_client.py b/tests/unit/client/test_experiment_client.py index 561eb63d..90a70fc6 100644 --- a/tests/unit/client/test_experiment_client.py +++ b/tests/unit/client/test_experiment_client.py @@ -23,6 +23,7 @@ def test_properties(project_client): commit_hash="a-commit-hash", training_metadata=domain.utils.TrainingMetadata([("test/path", "SELECT * FROM test")]), tags=["x"], + comments=["this is a comment"], ) experiment = Experiment(domain_experiment, project) @@ -35,6 +36,7 @@ def test_properties(project_client): assert experiment.commit_hash == domain_experiment.commit_hash assert experiment.training_metadata == domain_experiment.training_metadata.training_metadata[0] assert experiment.tags == domain_experiment.tags + assert experiment.comments == domain_experiment.comments assert experiment.created_at == domain_experiment.created_at assert experiment.id == domain_experiment.id assert experiment.project == project diff --git a/tests/unit/client/test_feature_client.py b/tests/unit/client/test_feature_client.py index cfaf622b..af21408a 100644 --- a/tests/unit/client/test_feature_client.py +++ b/tests/unit/client/test_feature_client.py @@ -1,16 +1,24 @@ -from unittest.mock import MagicMock - from rubicon_ml import domain from rubicon_ml.client import Feature -def test_properties(): - domain_feature = domain.Feature("year", description="year feature", importance=0.5) - feature = Feature(domain_feature, MagicMock()) +def test_properties(project_client): + parent = project_client + domain_feature = domain.Feature( + name="year", + description="year feature", + importance=0.5, + tags=["x"], + comments=["this is a comment"], + ) + feature = Feature(domain_feature, parent) + print(feature.tags) assert feature.name == "year" assert feature.description == "year feature" assert feature.importance == 0.5 assert feature.id == domain_feature.id + assert feature.tags == domain_feature.tags + assert feature.comments == domain_feature.comments assert feature.created_at == domain_feature.created_at assert hasattr(feature, "parent") diff --git a/tests/unit/client/test_metric_client.py b/tests/unit/client/test_metric_client.py index 1e3bd5b3..b776b853 100644 --- a/tests/unit/client/test_metric_client.py +++ b/tests/unit/client/test_metric_client.py @@ -1,17 +1,24 @@ -from unittest.mock import MagicMock - from rubicon_ml import domain from rubicon_ml.client import Metric -def test_properties(): - domain_metric = domain.Metric("Accuracy", 99, description="some description") - metric = Metric(domain_metric, MagicMock()) +def test_properties(project_client): + parent = project_client + domain_metric = domain.Metric( + "Accuracy", + 99, + description="some description", + tags=["x"], + comments=["this is a comment"], + ) + metric = Metric(domain_metric, parent) assert metric.name == "Accuracy" assert metric.value == 99 assert metric.directionality == "score" assert metric.description == "some description" assert metric.id == domain_metric.id + assert metric.tags == domain_metric.tags + assert metric.comments == domain_metric.comments assert metric.created_at == domain_metric.created_at assert hasattr(metric, "parent") diff --git a/tests/unit/client/test_mixin_client.py b/tests/unit/client/test_mixin_client.py index 2933741f..1f40bd54 100644 --- a/tests/unit/client/test_mixin_client.py +++ b/tests/unit/client/test_mixin_client.py @@ -25,22 +25,30 @@ def _raise_error(): # ArtifactMixin def test_log_artifact_from_bytes(project_client): project = project_client - artifact = ArtifactMixin.log_artifact(project, data_bytes=b"content", name="test.txt") + artifact = ArtifactMixin.log_artifact( + project, data_bytes=b"content", name="test.txt", tags=["x"], comments=["this is a comment"] + ) assert artifact.id in [a.id for a in project.artifacts()] assert artifact.name == "test.txt" assert artifact.data == b"content" + assert artifact.tags == ["x"] + assert artifact.comments == ["this is a comment"] def test_log_artifact_from_file(project_client): project = project_client mock_file = MagicMock() mock_file.__enter__().read.side_effect = [b"content"] - artifact = ArtifactMixin.log_artifact(project, data_file=mock_file, name="test.txt") + artifact = ArtifactMixin.log_artifact( + project, data_file=mock_file, name="test.txt", tags=["x"], comments=["this is a comment"] + ) assert artifact.id in [a.id for a in project.artifacts()] assert artifact.name == "test.txt" assert artifact.data == b"content" + assert artifact.tags == ["x"] + assert artifact.comments == ["this is a comment"] @patch("fsspec.implementations.local.LocalFileSystem.open") @@ -49,11 +57,15 @@ def test_log_artifact_from_path(mock_open, project_client): mock_file = MagicMock() mock_file().read.side_effect = [b"content"] mock_open.side_effect = mock_file - artifact = ArtifactMixin.log_artifact(project, data_path="/path/to/test.txt") + artifact = ArtifactMixin.log_artifact( + project, data_path="/path/to/test.txt", tags=["x"], comments=["this is a comment"] + ) assert artifact.id in [a.id for a in project.artifacts()] assert artifact.name == "test.txt" assert artifact.data == b"content" + assert artifact.tags == ["x"] + assert artifact.comments == ["this is a comment"] def test_log_artifact_throws_error_if_data_missing(project_client): @@ -339,9 +351,15 @@ def test_log_dataframe(project_client, test_dataframe): project = project_client df = test_dataframe test_df_name = "test_df" - dataframe = DataframeMixin.log_dataframe(project, df, name=test_df_name, tags=["x"]) - DataframeMixin.log_dataframe(project, df, name="secondary test df", tags=["x"]) + dataframe = DataframeMixin.log_dataframe( + project, df, name=test_df_name, tags=["x"], comments=["this is a comment"] + ) + DataframeMixin.log_dataframe( + project, df, name="secondary test df", tags=["x"], comments=["this is a comment"] + ) assert dataframe.name == test_df_name + assert dataframe.tags == ["x"] + assert dataframe.comments == ["this is a comment"] assert dataframe.id in [df.id for df in project.dataframes()] diff --git a/tests/unit/client/test_parameter_client.py b/tests/unit/client/test_parameter_client.py index f87f9189..1ea9367e 100644 --- a/tests/unit/client/test_parameter_client.py +++ b/tests/unit/client/test_parameter_client.py @@ -1,16 +1,23 @@ -from unittest.mock import MagicMock - from rubicon_ml import domain from rubicon_ml.client import Parameter -def test_properties(): - domain_parameter = domain.Parameter("name", value="value", description="description") - parameter = Parameter(domain_parameter, MagicMock()) +def test_properties(project_client): + parent = project_client + domain_parameter = domain.Parameter( + "name", + value="value", + description="description", + tags=["x"], + comments=["this is a comment"], + ) + parameter = Parameter(domain_parameter, parent) assert parameter.id == domain_parameter.id assert parameter.name == domain_parameter.name assert parameter.value == domain_parameter.value assert parameter.description == domain_parameter.description + assert parameter.tags == domain_parameter.tags + assert parameter.comments == domain_parameter.comments assert parameter.created_at == domain_parameter.created_at assert hasattr(parameter, "parent")