Skip to content

Commit

Permalink
Automatically set a seed for data pipeline
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 715752916
  • Loading branch information
Conchylicultor authored and The kauldron Authors committed Jan 15, 2025
1 parent b007732 commit cb60016
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 3 deletions.
10 changes: 10 additions & 0 deletions kauldron/data/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import abc
import dataclasses
import functools
import random
from typing import Any, Optional, TypeAlias

from etils import edc
Expand Down Expand Up @@ -76,3 +77,12 @@ def __iter__(self) -> iterators.Iterator:
raise NotImplementedError()

__repr__ = edc.repr

# Fox convenience, it can be annoying to have to manually set the seed when
# debugging on Colab.
def _assert_root_cfg_resolved(self) -> None:
# If the seed is not set when the pipeline is created, we create an
# arbitrary random seed.
if isinstance(self.seed, config_util._FakeRootCfg): # pylint: disable=protected-access
object.__setattr__(self, 'seed', random.randint(0, 1000000000))
super()._assert_root_cfg_resolved()
4 changes: 1 addition & 3 deletions kauldron/utils/config_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,5 @@ def _assert_root_cfg_resolved_value(self) -> None:
raise ValueError(
f'{type(self).__qualname__}.{f.name} is an unresolved'
f' `ROOT_CFG_REF` value ({value}).\nTo resolve the value, either'
f' explicitly set `{f.name}` in `__init__`, or call'
' `obj.update_from_root_cfg(root_cfg)` to copy the value from the'
' root `kd.train.Trainer` object.'
f' explicitly set `{f.name}` in `__init__`.'
)

0 comments on commit cb60016

Please sign in to comment.