深度学习课程 4

Week 1 - Convolutional Neural Networks

Step by Step

3.1 - Zero-Padding

1
X_pad = np.pad(X, ((0, 0), (pad, pad), (pad, pad), (0,0)), mode='constant', constant_values = (0,0))

3.2 - Single step of convolution

1
2
3
s = a_slice_prev * W
Z = np.sum(s)
Z = Z + np.sum(b)

3.3 - Convolutional Neural Networks - Forward pass



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
(m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
(f, f, n_C_prev, n_C) = W.shape

stride = hparameters["stride"]
pad = hparameters["pad"]

n_H = int((n_H_prev - f + 2 * pad) / stride) + 1
n_W = int((n_W_prev - f + 2 * pad) / stride) + 1

Z = np.zeros((m, n_H, n_W, n_C))

A_prev_pad = zero_pad(A_prev, pad)

for i in range(m):
a_prev_pad = A_prev_pad[i]
for h in range(n_H):
vert_start = h * stride
vert_end = vert_start + f

for w in range(n_W):
horiz_start = w * stride
horiz_end = horiz_start + f

for c in range(n_C):
a_slice_prev = a_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :]

weights = W[:, :, :, c]
biases = b[:, :, :, c]
Z[i, h, w, c] = conv_single_step(a_slice_prev, weights, biases)

4.1 - Forward Pooling



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
for i in range(m):
for h in range(n_H):
vert_start = h * stride
vert_end = vert_start + f

for w in range(n_W):
horiz_start = w * stride
horiz_end = horiz_start + f

for c in range (n_C):
a_prev_slice = A_prev[i, vert_start:vert_end, horiz_start:horiz_end, c]

if mode == "max":
A[i, h, w, c] = np.max(a_prev_slice)
elif mode == "average":
A[i, h, w, c] = np.mean(a_prev_slice)

5.2 Pooling layer - backward pass

1
mask = np.max(x) == x

5.2.2 - Average pooling - backward pass

1
2
3
(n_H, n_W) = shape
average = dz / (n_H * n_W)
a = np.ones(shape) * average

5.2.3 Putting it together: Pooling backward

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
(A_prev, hparameters) = cache

stride = hparameters["stride"]
f = hparameters["f"]

m, n_H_prev, n_W_prev, n_C_prev = A_prev.shape
m, n_H, n_W, n_C = dA.shape

dA_prev = np.zeros(A_prev.shape)

for i in range(m):

a_prev = A_prev[i]

for h in range(n_H):
for w in range(n_W):
for c in range(n_C):

vert_start = h * stride
vert_end = vert_start + f
horiz_start = w * stride
horiz_end = horiz_start + f

if mode == "max":
a_prev_slice = a_prev[vert_start:vert_end, horiz_start:horiz_end, c]
mask = create_mask_from_window(a_prev_slice)

dA_prev[i, vert_start: vert_end, horiz_start: horiz_end, c] += (dA[i, h, w, c] * mask)
elif mode == "average":
da = dA[i, h, w, c]
shape = (f, f)
dA_prev[i, vert_start: vert_end, horiz_start: horiz_end, c] += distribute_value(da, shape)

Application

1.1 - Create placeholders

1
2
X = tf.placeholder(tf.float32, shape=[None, n_H0, n_W0, n_C0])
Y = tf.placeholder(tf.float32, shape=[None, n_y])

1.2 - Initialize parameters

1
2
W1 = tf.get_variable("W1", [4, 4, 3, 8], initializer = tf.contrib.layers.xavier_initializer(seed = 0))
W2 = tf.get_variable("W2", [2, 2, 8, 16], initializer = tf.contrib.layers.xavier_initializer(seed = 0))

1.3 - Forward propagation

1
2
3
4
5
6
7
8
Z1 = tf.nn.conv2d(X, W1, strides = [1,1,1,1], padding = 'SAME')
A1 = tf.nn.relu(Z1)
P1 = tf.nn.max_pool(A1, ksize = [1,8,8,1], strides = [1,8,8,1], padding = 'SAME')
Z2 = tf.nn.conv2d(P1, W2, strides = [1,1,1,1], padding = 'SAME')
A2 = tf.nn.relu(Z2)
P2 = tf.nn.max_pool(A2, ksize = [1,4,4,1], strides = [1,4,4,1], padding = 'SAME')
F = tf.contrib.layers.flatten(P2)
Z3 = tf.contrib.layers.fully_connected(F, 6, activation_fn=None)

1.4 - Compute cost

1
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits = Z3, labels = Y))

1.5 Model

1
2
3
4
5
6
7
X, Y = create_placeholders(n_H0, n_W0, n_C0, n_y)
parameters = initialize_parameters()
Z3 = forward_propagation(X, parameters)
cost = compute_cost(Z3, Y)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
### ...
_ , temp_cost = sess.run([optimizer, cost], feed_dict={X: minibatch_X, Y: minibatch_Y})

Week 2 - Residual Networks

2.1 - The identity block

1
2
3
4
5
6
7
8
9
X = Conv2D(filters = F2, kernel_size = (f, f), strides = (1,1), padding = 'same', name = conv_name_base + '2b', kernel_initializer = glorot_uniform(seed=0))(X)
X = BatchNormalization(axis = 3, name = bn_name_base + '2b')(X)
X = Activation('relu')(X)

X = Conv2D(filters = F3, kernel_size = (1, 1), strides = (1,1), padding = 'valid', name = conv_name_base + '2c', kernel_initializer = glorot_uniform(seed=0))(X)
X = BatchNormalization(axis = 3, name = bn_name_base + '2c')(X)

X = layers.Add()([X, X_shortcut])
X = Activation('relu')(X)

2.2 - The convolutional block

1
2
3
4
5
6
7
8
9
10
11
12
X = Conv2D(F2, (f, f), strides = (1,1), padding = 'same', name = conv_name_base + '2b', kernel_initializer = glorot_uniform(seed=0))(X)
X = BatchNormalization(axis = 3, name = bn_name_base + '2b')(X)
X = Activation('relu')(X)

X = Conv2D(F3, (1, 1), strides = (1,1), name = conv_name_base + '2c', kernel_initializer = glorot_uniform(seed=0))(X)
X = BatchNormalization(axis = 3, name = bn_name_base + '2c')(X)

X_shortcut = Conv2D(F3, (1, 1), strides = (s,s), name = conv_name_base + '1', kernel_initializer = glorot_uniform(seed=0))(X_shortcut)
X_shortcut = BatchNormalization(axis = 3, name = bn_name_base + '1')(X_shortcut)

X = layers.Add()([X, X_shortcut])
X = Activation('relu')(X)

3 - Building your first ResNet model (50 layers)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
X = convolutional_block(X, f = 3, filters = [128, 128, 512], stage = 3, block='a', s = 2)
X = identity_block(X, 3, [128, 128, 512], stage=3, block='b')
X = identity_block(X, 3, [128, 128, 512], stage=3, block='c')
X = identity_block(X, 3, [128, 128, 512], stage=3, block='d')

X = convolutional_block(X, f = 3, filters = [256, 256, 1024], stage = 4, block='a', s = 2)
X = identity_block(X, 3, [256, 256, 1024], stage=4, block='b')
X = identity_block(X, 3, [256, 256, 1024], stage=4, block='c')
X = identity_block(X, 3, [256, 256, 1024], stage=4, block='d')
X = identity_block(X, 3, [256, 256, 1024], stage=4, block='e')
X = identity_block(X, 3, [256, 256, 1024], stage=4, block='f')

X = convolutional_block(X, f = 3, filters = [512, 512, 2048], stage = 5, block='a', s = 2)
X = identity_block(X, 3, [512, 512, 2048], stage=5, block='b')
X = identity_block(X, 3, [512, 512, 2048], stage=5, block='c')

X = AveragePooling2D((2,2), name='avg_pool')(X)

Week 3 - Autonomous driving - Car detection

2.1 - Model details

1
2
3
4
5
6
7
8
9
10
box_scores = np.multiply(box_confidence, box_class_probs)

box_classes = K.argmax(box_scores, axis=-1)
box_class_scores = K.max(box_scores, axis=-1)

filtering_mask = K.greater_equal(box_class_scores, threshold)

scores = tf.boolean_mask(box_class_scores, filtering_mask)
boxes = tf.boolean_mask(boxes, filtering_mask)
classes = tf.boolean_mask(box_classes, filtering_mask)

2.3 - Non-max suppression

iou

1
2
3
4
5
6
7
8
9
10
11
12
13
xi1 = max(box1_x1, box2_x1)
yi1 = max(box1_y1, box2_y1)
xi2 = min(box1_x2, box2_x2)
yi2 = min(box1_y2, box2_y2)
inter_width = xi2 - xi1
inter_height = yi2 - yi1
inter_area = max(inter_width, 0) * max(inter_height, 0)

box1_area = (box1_y2 - box1_y1) * (box1_x2 - box1_x1)
box2_area = (box2_y2 - box2_y1) * (box2_x2 - box2_x1)
union_area = (box1_area + box2_area) - inter_area

iou = inter_area / union_area

yolo_non_max_suppression

1
2
3
4
5
nms_indices = tf.image.non_max_suppression(boxes, scores, max_boxes_tensor, iou_threshold=iou_threshold)

scores = K.gather(scores, nms_indices)
boxes = K.gather(boxes, nms_indices)
classes = K.gather(classes, nms_indices)

2.4 Wrapping up the filtering

1
2
3
4
5
6
7
8
9
box_confidence, box_xy, box_wh, box_class_probs = yolo_outputs

boxes = yolo_boxes_to_corners(box_xy, box_wh)

scores, boxes, classes = yolo_filter_boxes(box_confidence, boxes, box_class_probs, threshold = score_threshold)

boxes = scale_boxes(boxes, image_shape)

scores, boxes, classes = yolo_non_max_suppression(scores, boxes, classes, max_boxes = max_boxes, iou_threshold = iou_threshold)

3.5 - Run the graph on an image

1
out_scores, out_boxes, out_classes = sess.run([scores, boxes, classes], feed_dict={yolo_model.input: image_data, K.learning_phase(): 0})

Week 4

Deep Learning & Art: Neural Style Transfer

3.1 - Computing the content cost

1
2
3
4
a_C_unrolled = tf.transpose(a_C)
a_G_unrolled = tf.transpose(a_G)

J_content = (1/ (4* n_H * n_W * n_C)) * tf.reduce_sum(tf.pow((a_G_unrolled - a_C_unrolled), 2))

3.2.1 - Style matrix

$$\mathbf{G}{gram} = \mathbf{A}{unrolled} \mathbf{A}_{unrolled}^T$$

1
GA = tf.matmul(A, tf.transpose(A))

3.2.2 - Style cost

$$J_{style}^{[l]}(S,G) = \frac{1}{4 \times {n_C}^2 \times (n_H \times n_W)^2} \sum {i=1}^{n_C}\sum{j=1}^{n_C}(G^{(S)}{(gram)i,j} - G^{(G)}{(gram)i,j})^2\tag{2} $$

1
2
3
4
5
6
7
8
9
m, n_H, n_W, n_C = a_G.get_shape().as_list()

a_S = tf.transpose(tf.reshape(a_S, [n_H*n_W, n_C]))
a_G = tf.transpose(tf.reshape(a_G, [n_H*n_W, n_C]))

GS = gram_matrix(a_S)
GG = gram_matrix(a_G)

J_style_layer = 1./(4 * n_C**2 * (n_H*n_W)**2) * tf.reduce_sum(tf.pow((GS - GG), 2))

3.2.3 Style Weights

1
J = alpha * J_content + beta * J_style

Face Recognition

1.2 - The Triplet Loss

1
2
3
4
5
pos_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, positive)), axis=None)
neg_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, negative)), axis=None)
basic_loss = tf.add(tf.subtract(pos_dist, neg_dist), alpha)

loss = tf.reduce_sum(tf.maximum(basic_loss, 0))