-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnet.py
215 lines (175 loc) · 10.5 KB
/
net.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
import tensorflow as tf
import tensorflow.contrib as tf_contrib
#构造可训练参数
def make_var(name, shape, trainable = True):
return tf.get_variable(name, shape, trainable = trainable)
#定义卷积层
def conv2d(input_, output_dim, kernel_size, stride, padding = "SAME", name = "conv2d", biased = False):
input_dim = input_.get_shape()[-1]
with tf.variable_scope(name):
kernel = make_var(name = 'weights', shape=[kernel_size, kernel_size, input_dim, output_dim])
output = tf.nn.conv2d(input_, kernel, [1, stride, stride, 1], padding = padding)
if biased:
biases = make_var(name = 'biases', shape = [output_dim])
output = tf.nn.bias_add(output, biases)
return output
#定义空洞卷积层
def atrous_conv2d(input_, output_dim, kernel_size, dilation, padding = "SAME", name = "atrous_conv2d", biased = False):
input_dim = input_.get_shape()[-1]
with tf.variable_scope(name):
kernel = make_var(name = 'weights', shape = [kernel_size, kernel_size, input_dim, output_dim])
output = tf.nn.atrous_conv2d(input_, kernel, dilation, padding = padding)
if biased:
biases = make_var(name = 'biases', shape = [output_dim])
output = tf.nn.bias_add(output, biases)
return output
#定义反卷积层
def deconv2d(input_, output_dim, kernel_size, stride, padding = "SAME", name = "deconv2d"):
input_dim = input_.get_shape()[-1]
input_height = int(input_.get_shape()[1])
input_width = int(input_.get_shape()[2])
with tf.variable_scope(name):
kernel = make_var(name = 'weights', shape = [kernel_size, kernel_size, output_dim, input_dim])
output = tf.nn.conv2d_transpose(input_, kernel, [1, input_height * 2, input_width * 2, output_dim], [1, 2, 2, 1], padding = "SAME")
return output
#定义batchnorm(批次归一化)层
def batch_norm(input_, name="batch_norm"):
with tf.variable_scope(name):
input_dim = input_.get_shape()[-1]
scale = tf.get_variable("scale", [input_dim], initializer=tf.random_normal_initializer(1.0, 0.02, dtype=tf.float32))
offset = tf.get_variable("offset", [input_dim], initializer=tf.constant_initializer(0.0))
mean, variance = tf.nn.moments(input_, axes=[1,2], keep_dims=True)
epsilon = 1e-5
inv = tf.rsqrt(variance + epsilon)
normalized = (input_-mean)*inv
output = scale*normalized + offset
return output
#定义最大池化层
def max_pooling(input_, kernel_size, stride, name, padding = "SAME"):
return tf.nn.max_pool(input_, ksize=[1, kernel_size, kernel_size, 1], strides=[1, stride, stride, 1], padding=padding, name=name)
#定义平均池化层
def avg_pooling(input_, kernel_size, stride, name, padding = "SAME"):
return tf.nn.avg_pool(input_, ksize=[1, kernel_size, kernel_size, 1], strides=[1, stride, stride, 1], padding=padding, name=name)
#定义lrelu激活层
def lrelu(x, leak=0.2, name = "lrelu"):
return tf.maximum(x, leak*x)
#定义relu激活层
def relu(input_, name = "relu"):
return tf.nn.relu(input_, name = name)
#定义残差块
def residule_block_33(input_, output_dim, kernel_size = 3, stride = 1, dilation = 2, atrous = False, name = "res"):
if not atrous:
conv2dc0 = atrous_conv2d(input_ = input_, output_dim = output_dim, kernel_size = kernel_size, dilation = dilation, name = (name + '_c0'))
conv2dc0_norm = batch_norm(input_ = conv2dc0, name = (name + '_bn0'))
conv2dc0_relu = relu(input_ = conv2dc0_norm)
conv2dc1 = atrous_conv2d(input_ = conv2dc0_relu, output_dim = output_dim, kernel_size = kernel_size, dilation = dilation, name = (name + '_c1'))
conv2dc1_norm = batch_norm(input_ = conv2dc1, name = (name + '_bn1'))
else:
conv2dc0 = conv2d(input_ = input_, output_dim = output_dim, kernel_size = kernel_size, stride = stride, name = (name + '_c0'))
conv2dc0_norm = batch_norm(input_ = conv2dc0, name = (name + '_bn0'))
conv2dc0_relu = relu(input_ = conv2dc0_norm)
conv2dc1 = conv2d(input_ = conv2dc0_relu, output_dim = output_dim, kernel_size = kernel_size, stride = stride, name = (name + '_c1'))
conv2dc1_norm = batch_norm(input_ = conv2dc1, name = (name + '_bn1'))
add_raw = input_ + conv2dc1_norm
output = relu(input_ = add_raw)
return output
#定义转置矩阵
def hw_flatten(x) :
return tf.reshape(x, shape=[x.shape[0], -1, x.shape[-1]])
#定义光谱归一化
def spectral_norm(w, iteration=1):
w_shape = w.shape.as_list()
w = tf.reshape(w, [-1, w_shape[-1]])
u = tf.get_variable("u", [1, w_shape[-1]], initializer=tf.random_normal_initializer(), trainable=False)
u_hat = u
v_hat = None
for i in range(iteration):
v_ = tf.matmul(u_hat, tf.transpose(w))
v_hat = tf.nn.l2_normalize(v_)
u_ = tf.matmul(v_hat, w)
u_hat = tf.nn.l2_normalize(u_)
u_hat = tf.stop_gradient(u_hat)
v_hat = tf.stop_gradient(v_hat)
sigma = tf.matmul(tf.matmul(v_hat, w), tf.transpose(u_hat))
with tf.control_dependencies([u.assign(u_hat)]):
w_norm = w / sigma
w_norm = tf.reshape(w_norm, w_shape)
return w_norm
def conv(x, channels, kernel=4, stride=2, use_bias=True, scope='conv_0'):
with tf.variable_scope(scope):
w = tf.get_variable("kernel", shape=[kernel, kernel, x.get_shape()[-1], channels], initializer=tf_contrib.layers.xavier_initializer(),
regularizer=None)
x = tf.nn.conv2d(input=x, filter=spectral_norm(w),
strides=[1, stride, stride, 1], padding='VALID')
if use_bias:
bias = tf.get_variable("bias", [channels], initializer=tf.constant_initializer(0.0))
x = tf.nn.bias_add(x, bias)
return x
#定义注意力网络
def attention(x, channels, scope='attention'):
with tf.variable_scope(scope):
batch_size, height, width, num_channels = x.get_shape().as_list()
f = conv(x, channels // 8, kernel=1, stride=1, scope='f_conv')
f = max_pooling(f,2,2,name='pool1')
g = conv(x, channels // 8, kernel=1, stride=1, scope='g_conv')
h = conv(x, channels // 2, kernel=1, stride=1, scope='h_conv')
h = max_pooling(h,2,2,name='pool2')
s = tf.matmul(hw_flatten(g), hw_flatten(f), transpose_b=True)
beta = tf.nn.softmax(s)
o = tf.matmul(beta, hw_flatten(h))
gamma = tf.get_variable("gamma", [1], initializer=tf.constant_initializer(0.0))
o = tf.reshape(o, shape=[batch_size, height, width, num_channels // 2])
o = conv(o, channels, kernel=1, stride=1, scope='attn_conv')
x = gamma * o + x
return x
#定义生成器
def Encoder_Decoder(image, gf_dim=64, reuse=False, name="generator"):
#生成器输入尺度: 1*256*256*3
input_dim = image.get_shape()[-1]
with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
#第1个卷积模块,输出尺度: 1*256*256*64
c0 = relu(batch_norm(conv2d(input_ = image, output_dim = gf_dim, kernel_size = 7, stride = 1, name = 'g_e0_c'), name = 'g_e0_bn'))
#第2个卷积模块,输出尺度: 1*128*128*128
c1 = relu(batch_norm(conv2d(input_ = c0, output_dim = gf_dim * 2, kernel_size = 3, stride = 2, name = 'g_e1_c'), name = 'g_e1_bn'))
#第3个卷积模块,输出尺度: 1*64*64*256
c2 = relu(batch_norm(conv2d(input_ = c1, output_dim = gf_dim * 4, kernel_size = 3, stride = 2, name = 'g_e2_c'), name = 'g_e2_bn'))
#9个残差块:
r1 = residule_block_33(input_ = c2, output_dim = gf_dim*4, atrous = False, name='g_r1')
r2 = residule_block_33(input_ = r1, output_dim = gf_dim*4, atrous = False, name='g_r2')
r3 = residule_block_33(input_ = r2, output_dim = gf_dim*4, atrous = False, name='g_r3')
r4 = residule_block_33(input_ = r3, output_dim = gf_dim*4, atrous = False, name='g_r4')
x = attention(r4, channels=gf_dim*4, scope='self_attention')
r5 = residule_block_33(input_ = x, output_dim = gf_dim*4, atrous = False, name='g_r5')
r6 = residule_block_33(input_ = r5, output_dim = gf_dim*4, atrous = False, name='g_r6')
r7 = residule_block_33(input_ = r6, output_dim = gf_dim*4, atrous = False, name='g_r7')
r8 = residule_block_33(input_ = r7, output_dim = gf_dim*4, atrous = False, name='g_r8')
r9 = residule_block_33(input_ = r8, output_dim = gf_dim*4, atrous = False, name='g_r9')
#第9个残差块的输出尺度: 1*64*64*256
#第1个反卷积模块,输出尺度: 1*128*128*128
d1 = relu(batch_norm(deconv2d(input_ = r9, output_dim = gf_dim * 2, kernel_size = 3, stride = 2, name = 'g_d1_dc'),name = 'g_d1_bn'))
#第2个反卷积模块,输出尺度: 1*256*256*64
d2 = relu(batch_norm(deconv2d(input_ = d1, output_dim = gf_dim, kernel_size = 3, stride = 2, name = 'g_d2_dc'),name = 'g_d2_bn'))
#最后一个卷积模块,输出尺度: 1*256*256*3
d3 = conv2d(input_=d2, output_dim = input_dim, kernel_size = 7, stride = 1, name = 'g_d3_c')
#经过tanh函数激活得到生成的输出
output = tf.nn.tanh(d3)
return output
#定义判别器
def discriminator(image, df_dim=64, reuse=False, name="discriminator"):
with tf.variable_scope(name):
if reuse:
tf.get_variable_scope().reuse_variables()
else:
assert tf.get_variable_scope().reuse is False
# with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
#第1个卷积模块,输出尺度: 1*128*128*64
h0 = lrelu(conv2d(input_ = image, output_dim = df_dim, kernel_size = 4, stride = 2, name='d_h0_conv'))
#第2个卷积模块,输出尺度: 1*64*64*128
h1 = lrelu(batch_norm(conv2d(input_ = h0, output_dim = df_dim*2, kernel_size = 4, stride = 2, name='d_h1_conv'), 'd_bn1'))
#第3个卷积模块,输出尺度: 1*32*32*256
h2 = lrelu(batch_norm(conv2d(input_ = h1, output_dim = df_dim*4, kernel_size = 4, stride = 2, name='d_h2_conv'), 'd_bn2'))
#第4个卷积模块,输出尺度: 1*32*32*512
h3 = lrelu(batch_norm(conv2d(input_ = h2, output_dim = df_dim*8, kernel_size = 4, stride = 1, name='d_h3_conv'), 'd_bn3'))
#最后一个卷积模块,输出尺度: 1*32*32*1
output = conv2d(input_ = h3, output_dim = 1, kernel_size = 4, stride = 1, name='d_h4_conv')
return output