-
Notifications
You must be signed in to change notification settings - Fork 46
/
Copy pathdenoise.py
110 lines (97 loc) · 3.48 KB
/
denoise.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
"""
Yizhe Zhang
Perturbation to the input
"""
import numpy as np
import os
import scipy.io as sio
from math import floor
import pdb
def add_noise(sents, opt):
if opt.substitution == 's':
sents_permutated= substitute_sent(sents, opt)
elif opt.substitution == 'p':
sents_permutated= permutate_sent(sents, opt)
elif opt.substitution == 'a':
sents_permutated= add_sent(sents, opt)
elif opt.substitution == 'd':
sents_permutated= delete_sent(sents, opt)
elif opt.substitution == 'm':
sents_permutated= mixed_noise_sent(sents, opt)
elif opt.substitution == 'sc':
sents_permutated = substitute_sent_char(sents, opt)
else:
sents_permutated= sents
return sents_permutated
def permutate_sent(sents, opt):
sents_p = []
for ss in range(len(sents)):
sent_temp = sents[ss][:]
if len(sent_temp) <= 1:
sents_p.append(sent_temp)
continue
idx_s= np.random.choice(len(sent_temp)-1, size=opt.permutation, replace=True)
temp = sent_temp[idx_s[0]]
for ii in range(opt.permutation-1):
sent_temp[idx_s[ii]] = sent_temp[idx_s[ii+1]]
sent_temp[idx_s[opt.permutation-1]] = temp
sents_p.append(sent_temp)
return sents_p
def substitute_sent(sents, opt):
# substitute single word
sents_p = []
for ss in range(len(sents)):
sent_temp = sents[ss][:]
if len(sent_temp) <= 1:
sents_p.append(sent_temp)
continue
idx_s= np.random.choice(len(sent_temp)-1, size=opt.permutation, replace=True)
for ii in range(opt.permutation):
sent_temp[idx_s[ii]] = np.random.choice(opt.n_words)
sents_p.append(sent_temp)
return sents_p
def delete_sent(sents, opt):
# substitute single word
sents_p = []
for ss in range(len(sents)):
sent_temp = sents[ss][:]
if len(sent_temp) <= 1:
sents_p.append(sent_temp)
continue
idx_s= np.random.choice(len(sent_temp)-1, size=opt.permutation, replace=True)
for ii in range(opt.permutation):
sent_temp[idx_s[ii]] = -1
sents_p.append([s for s in sent_temp if s!=-1])
return sents_p
def add_sent(sents, opt):
# substitute single word
sents_p = []
for ss in range(len(sents)):
sent_temp = sents[ss][:]
if len(sent_temp) <= 1:
sents_p.append(sent_temp)
continue
idx_s= np.random.choice(len(sent_temp)-1, size=opt.permutation, replace=True)
for ii in range(opt.permutation):
sent_temp.insert(idx_s[ii], np.random.choice(opt.n_words))
sents_p.append(sent_temp[:opt.maxlen])
return sents_p
def mixed_noise_sent(sents, opt):
sents = delete_sent(sents, opt)
sents = add_sent(sents, opt)
sents = substitute_sent(sents, opt)
return sents
def substitute_sent_char(sents, opt):
# substitute single word
sents_p = []
for ss in range(len(sents)):
sent_temp = sents[ss][:]
if len(sent_temp) <= 1:
sents_p.append(sent_temp)
continue
permute_choice = [ic for ic in range(len(sent_temp)) if sent_temp[ic] != 1]
idx_s= np.random.choice(permute_choice, size=int(opt.permutation * (len(permute_choice))), replace=True)
for ii in range(len(idx_s)):
sent_temp[idx_s[ii]] = np.random.choice(list(range(2,28)))
sents_p.append(sent_temp)
return sents_p