Skip to content

Commit

Permalink
Merge branch 'main' into ablations/soldni-gantry
Browse files Browse the repository at this point in the history
  • Loading branch information
soldni committed Jul 13, 2023
2 parents baee669 + 5508c04 commit bfa3106
Showing 1 changed file with 5 additions and 3 deletions.
8 changes: 5 additions & 3 deletions olmo/data/iterable_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,11 @@ def __init__(
def _build_global_indices(self) -> List[int]:
if self.shuffle:
# Deterministically shuffle based on epoch and seed
g = torch.Generator()
g.manual_seed(self.seed)
indices = torch.randperm(len(self.dataset), generator=g).tolist() # type: ignore[arg-type]
# Torch built-in randomness is not very random, so we use numpy.
rng = np.random.Generator(np.random.PCG64(seed=self.seed))
indices = np.arange(len(self.dataset))
rng.shuffle(indices)
indices = list(indices)
else:
indices = list(range(len(self.dataset))) # type: ignore[arg-type]

Expand Down

0 comments on commit bfa3106

Please sign in to comment.