-
Notifications
You must be signed in to change notification settings - Fork 111
/
balancedsampler.py
57 lines (43 loc) · 1.69 KB
/
balancedsampler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
from torch.utils.data.sampler import Sampler
import torch
class RandomBalancedSampler(Sampler):
"""Samples elements randomly, with an arbitrary size, independant from dataset length.
this is a balanced sampling that will sample the whole dataset with a random permutation.
Arguments:
data_source (Dataset): dataset to sample from
"""
def __init__(self, data_source, epoch_size):
self.data_size = len(data_source)
self.epoch_size = epoch_size
self.index = 0
def __next__(self):
if self.index == 0:
#re-shuffle the sampler
self.indices = torch.randperm(self.data_size)
self.index = (self.index+1)%self.data_size
return self.indices[self.index]
def next(self):
return self.__next__()
def __iter__(self):
return self
def __len__(self):
return min(self.data_size,self.epoch_size) if self.epoch_size>0 else self.data_size
class SequentialBalancedSampler(Sampler):
"""Samples elements dequentially, with an arbitrary size, independant from dataset length.
this is a balanced sampling that will sample the whole dataset before resetting it.
Arguments:
data_source (Dataset): dataset to sample from
"""
def __init__(self, data_source, epoch_size):
self.data_size = len(data_source)
self.epoch_size = epoch_size
self.index = 0
def __next__(self):
self.index = (self.index+1)%self.data_size
return self.index
def next(self):
return self.__next__()
def __iter__(self):
return self
def __len__(self):
return min(self.data_size,self.epoch_size) if self.epoch_size>0 else self.data_size