-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathomniglot_tasks.py
118 lines (91 loc) · 3.4 KB
/
omniglot_tasks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
"""
This file was modified from: Giacomo Spigler. Meta-learnt priors slow down catastrophic forgetting in neural networks. arXiv preprint arXiv:1909.04170, 2019.
Utility functions to create Tasks from the Omniglot dataset.
The created tasks will be derived from CB_OCCTask, and can be aggregated in a TaskDistribution object.
"""
import numpy as np
import pickle
from task import CB_OCCTask, OCCTask
from task_distribution import TaskDistribution
charomniglot_trainX = []
charomniglot_trainY = []
charomniglot_valX = []
charomniglot_valY = []
charomniglot_testX = []
charomniglot_testY = []
def get_omniglot_allcharacters_data_split(
path_to_pkl,
):
"""
Returns a TaskDistribution that, on each reset, samples a different set of Omniglot characters.
Arguments:
path_to_pkl: string
Path to the pkl wrapped Omniglot dataset.
Returns:
trX : array
features of the meta-training examples
trY : array
labels of the meta-training examples
valX : array
features of the meta-validation examples
valY : array
labels of the meta-validation examples
teX : array
features of the meta-testing examples
teY : array
labels of the meta-testing examples
"""
with open(path_to_pkl, "rb") as f:
d = pickle.load(f)
trainX_ = d["trainX"]
trainY_ = d["trainY"]
testX_ = d["testX"]
testY_ = d["testY"]
trainX_.extend(testX_)
trainY_.extend(testY_)
global charomniglot_trainX
global charomniglot_trainY
global charomniglot_valX
global charomniglot_valY
global charomniglot_testX
global charomniglot_testY
cutoff_tr, cutoff_val = 25, 30
charomniglot_trainX = trainX_[:cutoff_tr]
charomniglot_trainY = trainY_[:cutoff_tr]
charomniglot_valX = trainX_[cutoff_tr:cutoff_val]
charomniglot_valY = trainY_[cutoff_tr:cutoff_val]
charomniglot_testX = trainX_[cutoff_val:]
charomniglot_testY = trainY_[cutoff_val:]
# Create a single large dataset with all characters, each for train and
# test, and rename the targets appropriately
trX = []
trY = []
valX = []
valY = []
teX = []
teY = []
cur_label_start = 0
for alphabet_i in range(len(charomniglot_trainY)):
charomniglot_trainY[alphabet_i] += cur_label_start
trX.extend(charomniglot_trainX[alphabet_i])
trY.extend(charomniglot_trainY[alphabet_i])
cur_label_start += len(set(charomniglot_trainY[alphabet_i]))
cur_label_start = 0
for alphabet_i in range(len(charomniglot_valY)):
charomniglot_valY[alphabet_i] += cur_label_start
valX.extend(charomniglot_valX[alphabet_i])
valY.extend(charomniglot_valY[alphabet_i])
cur_label_start += len(set(charomniglot_valY[alphabet_i]))
cur_label_start = 0
for alphabet_i in range(len(charomniglot_testY)):
charomniglot_testY[alphabet_i] += cur_label_start
teX.extend(charomniglot_testX[alphabet_i])
teY.extend(charomniglot_testY[alphabet_i])
cur_label_start += len(set(charomniglot_testY[alphabet_i]))
trX = np.asarray(trX, dtype=np.float32) / 255.0
trY = np.asarray(trY, dtype=np.float32)
valX = np.asarray(valX, dtype=np.float32) / 255.0
valY = np.asarray(valY, dtype=np.float32)
teX = np.asarray(teX, dtype=np.float32) / 255.0
teY = np.asarray(teY, dtype=np.float32)
return trX, trY, valX, valY, teX, teY