forked from CC2Vec/CC2Vec
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbfp_utils.py
113 lines (101 loc) · 5.1 KB
/
bfp_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import numpy as np
import math
import random
import os
import torch
def save(model, save_dir, save_prefix, epochs):
if not os.path.isdir(save_dir):
os.makedirs(save_dir)
save_prefix = os.path.join(save_dir, save_prefix)
save_path = '{}_{}.pt'.format(save_prefix, epochs)
torch.save(model.state_dict(), save_path)
def convert_msg_to_label(pad_msg, dict_msg):
nrows, ncols = pad_msg.shape
labels = list()
for i in range(nrows):
column = list(set(list(pad_msg[i, :])))
label = np.zeros(len(dict_msg))
for c in column:
label[c] = 1
labels.append(label)
return np.array(labels)
def mini_batches(X_added_code, X_removed_code, Y, mini_batch_size=64, seed=0, shuffled=True):
m = Y.shape[0] # number of training examples
mini_batches = []
np.random.seed(seed)
if shuffled == True:
permutation = list(np.random.permutation(m))
shuffled_X_added = X_added_code[permutation, :, :, :, :]
shuffled_X_removed = X_removed_code[permutation, :, :, :, :]
if len(Y.shape) == 1:
shuffled_Y = Y[permutation]
else:
shuffled_Y = Y[permutation, :]
else:
shuffled_X_added = X_added_code
shuffled_X_removed = X_removed_code
shuffled_Y = Y
num_complete_minibatches = math.floor(
m / float(mini_batch_size)) # number of mini batches of size mini_batch_size in your partitionning
num_complete_minibatches = int(num_complete_minibatches)
for k in range(0, num_complete_minibatches):
mini_batch_X_added = shuffled_X_added[k * mini_batch_size: k * mini_batch_size + mini_batch_size, :, :, :, :]
mini_batch_X_removed = shuffled_X_removed[k * mini_batch_size: k * mini_batch_size + mini_batch_size, :, :, :, :]
if len(Y.shape) == 1:
mini_batch_Y = shuffled_Y[k * mini_batch_size: k * mini_batch_size + mini_batch_size]
else:
mini_batch_Y = shuffled_Y[k * mini_batch_size: k * mini_batch_size + mini_batch_size, :]
mini_batch = (mini_batch_X_added, mini_batch_X_removed, mini_batch_Y)
mini_batches.append(mini_batch)
return mini_batches
def mini_batches_PNExtended(X_ftr, X_msg, X_added_code, X_removed_code, Y, shuffled=False, mini_batch_size=64, seed=0):
m = Y.shape[0] # number of training examples
mini_batches = []
np.random.seed(seed)
# Step 1: Shuffle (X, Y)
if shuffled == True:
permutation = list(np.random.permutation(m))
shuffled_X_ftr = X_ftr[permutation, :]
shuffled_X_msg = X_msg[permutation, :]
shuffled_X_added = X_added_code[permutation, :, :, :, :]
shuffled_X_removed = X_removed_code[permutation, :, :, :, :]
else:
shuffled_X_ftr = X_ftr
shuffled_X_msg = X_msg
shuffled_X_added = X_added_code
shuffled_X_removed = X_removed_code
if shuffled == True:
if len(Y.shape) == 1:
shuffled_Y = Y[permutation]
else:
shuffled_Y = Y[permutation, :]
else:
shuffled_Y = Y
# Step 2: Partition (X, Y). Minus the end case.
num_complete_minibatches = math.floor(
m / float(mini_batch_size)) # number of mini batches of size mini_batch_size in your partitionning
num_complete_minibatches = int(num_complete_minibatches)
for k in range(0, num_complete_minibatches):
mini_batch_X_ftr = shuffled_X_ftr[k * mini_batch_size: k * mini_batch_size + mini_batch_size, :]
mini_batch_X_msg = shuffled_X_msg[k * mini_batch_size: k * mini_batch_size + mini_batch_size, :]
mini_batch_X_added = shuffled_X_added[k * mini_batch_size: k * mini_batch_size + mini_batch_size, :, :, :, :]
mini_batch_X_removed = shuffled_X_removed[k * mini_batch_size: k * mini_batch_size + mini_batch_size, :, :, :, :]
if len(Y.shape) == 1:
mini_batch_Y = shuffled_Y[k * mini_batch_size: k * mini_batch_size + mini_batch_size]
else:
mini_batch_Y = shuffled_Y[k * mini_batch_size: k * mini_batch_size + mini_batch_size, :]
mini_batch = (mini_batch_X_ftr, mini_batch_X_msg, mini_batch_X_added, mini_batch_X_removed, mini_batch_Y)
mini_batches.append(mini_batch)
# Handling the end case (last mini-batch < mini_batch_size)
if m % mini_batch_size != 0:
mini_batch_X_ftr = shuffled_X_ftr[num_complete_minibatches * mini_batch_size: m, :]
mini_batch_X_msg = shuffled_X_msg[num_complete_minibatches * mini_batch_size: m, :]
mini_batch_X_added = shuffled_X_added[num_complete_minibatches * mini_batch_size: m, :, :, :]
mini_batch_X_removed = shuffled_X_removed[num_complete_minibatches * mini_batch_size: m, :, :, :]
if len(Y.shape) == 1:
mini_batch_Y = shuffled_Y[num_complete_minibatches * mini_batch_size: m]
else:
mini_batch_Y = shuffled_Y[num_complete_minibatches * mini_batch_size: m, :]
mini_batch = (mini_batch_X_ftr, mini_batch_X_msg, mini_batch_X_added, mini_batch_X_removed, mini_batch_Y)
mini_batches.append(mini_batch)
return mini_batches