forked from vkkhare/RecoEdge
-
Notifications
You must be signed in to change notification settings - Fork 0
/
preprocess_data.py
52 lines (39 loc) · 1.49 KB
/
preprocess_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
from argparse import ArgumentParser
from fedrec.utilities import random_state, registry
import fl_strategies
import experiments
import yaml
class Processor:
def __init__(self, config, log_dir) -> None:
self.log_dir = log_dir
self.model_random = random_state.RandomContext(
config.get("model_seed", None))
with self.model_random:
# 1. Construct model
modelCls = registry.lookup('model', config['model'])
self.model_preproc = registry.instantiate(
modelCls.Preproc,
config['model']['preproc'])
def process(self):
self.model_preproc.preprocess_data()
def main():
parser = ArgumentParser()
parser.add_argument("--config", type=str)
parser.add_argument("--logdir", type=str)
parser.add_argument(
"--dataset-multiprocessing",
action="store_true",
default=True,
help="The Kaggle dataset can be multiprocessed in an environment \
with more than 7 CPU cores and more than 20 GB of memory. \n \
The Terabyte dataset can be multiprocessed in an environment \
with more than 24 CPU cores and at least 1 TB of memory.",
)
# gpu
parser.add_argument("--use-gpu", action="store_true", default=False)
args = parser.parse_args()
with open(args.config, 'r') as stream:
config = yaml.safe_load(stream)
Processor(config, args.logdir).process()
if __name__ == "__main__":
main()