cd D:\software\OneDrive\桌面\吴恩达深度学习课后作业\第四周-一步步搭建多层神经网络以及应用(1 & 2)
D:\software\OneDrive\桌面\吴恩达深度学习课后作业\第四周-一步步搭建多层神经网络以及应用(1 & 2)
import numpy as np
import h5py
import matplotlib.pyplot as plt
import lr_utils
from testCases import *
from dnn_utils import sigmoid, sigmoid_backward, relu, relu_backward
%matplotlib inline
#设置图像
plt.rcParams['figure.figsize'] = (5.0, 4.0) # 图像显示大小
plt.rcParams['image.interpolation'] = 'nearest' #图像插值
plt.rcParams['image.cmap'] = 'gray' #颜色
np.random.seed(1) #指定随机种子
D:\software\Anaconda3\lib\site-packages\h5py_init_.py:36: FutureWarning: Conversion of the second argument of issubdtype from
float
tonp.floating
is deprecated. In future, it will be treated
asnp.float64 == np.dtype(float).type
.
from ._conv import register_converters as _register_converters
1、初始化两层的网络和L层的神经网络的参数。
2、实现正向传播模块(在下图中以紫色显示)。
- 完成模型正向传播步骤的LINEAR部分(z[1])。
- 提供使用的ACTIVATION函数(relu / Sigmoid)。
- 将前两个步骤合并为新的[LINEAR-> ACTIVATION]前向函数。
- 堆叠[LINEAR-> RELU]正向函数L-1次(第1到L-1层),并在末尾添加[LINEAR-> SIGMOID](最后的层)。这合成了一个新的L_model_forward函数。
3、计算损失。
4、实现反向传播模块(在下图中以红色表示)。
- 完成模型反向传播步骤的LINEAR部分。
- 提供的ACTIVATE函数的梯度(relu_backward / sigmoid_backward)
- 将前两个步骤组合成新的[LINEAR-> ACTIVATION]反向函数。
- 将[LINEAR-> RELU]向后堆叠L-1次,并在新的L_model_backward函数中后向添加[LINEAR-> SIGMOID]
5、最后更新参数。
首先编写两个辅助函数用来初始化模型的参数。
第一个函数将用于初始化两层模型的参数。 第二个将把初始化过程推广到L层模型上。
练习:创建并初始化2层神经网络的参数。
说明:
模型的结构为:LINEAR -> RELU -> LINEAR -> SIGMOID。
w:随机初始化权重矩阵。 确保准确的维度,使用np.random.randn(shape)* 0.01。
b:将偏差初始化为0。 使用np.zeros(shape)。
def initialize_parameters(n_x, n_h, n_y):
np.random.seed(1)
W1 = np.random.randn(n_h,n_x)*0.01
b1 = np.zeros((n_h,1))
W2 = np.random.randn(n_y,n_h)*0.01
b2 = np.zeros((n_y,1))
assert(W1.shape == (n_h,n_x))
assert(b1.shape == (n_h,1))
assert(W2.shape == (n_y,n_h))
assert(b2.shape == (n_y,1))
parameters = {
"W1":W1,
"b1":b1,
"W2":W2,
"b2":b2
}
return parameters
#测试initialize_parameters方法
parameters = initialize_parameters(2,2,1)
print("W1 = " + str(parameters["W1"]))
print("b1 = " + str(parameters["b1"]))
print("W2 = " + str(parameters["W2"]))
print("b2 = " + str(parameters["b2"]))
W1 = [[ 0.01624345 -0.00611756]
[-0.00528172 -0.01072969]]
b1 = [[0.]
[0.]]
W2 = [[ 0.00865408 -0.02301539]]
b2 = [[0.]]
练习:实现L层神经网络的初始化。
def initialize_parameters_deep(layer_dims):
np.random.seed(3)
parameters = {}
L = len(layer_dims)
for i in range(1,L):
#下面的式子,会导致正确率一直在64,
#parameters["W"+str(i)] = np.random.randn(layer_dims[i],layer_dims[i-1])*0.01
parameters["W" + str(i)] = (np.random.randn(layers_dims[i], layers_dims[i - 1])) / np.sqrt(layers_dims[i - 1])
parameters["b"+str(i)] = np.zeros((layer_dims[i],1))
assert(parameters["W"+str(i)].shape == (layer_dims[i],layer_dims[i-1]))
assert(parameters["b"+str(i)].shape == (layer_dims[i],1))
return parameters
#测试initialize_parameters_deep函数
parameters = initialize_parameters_deep([5,4,3])
print("W1 = " + str(parameters["W1"]))
print("b1 = " + str(parameters["b1"]))
print("W2 = " + str(parameters["W2"]))
print("b2 = " + str(parameters["b2"]))
W1 = [[ 0.01788628 0.0043651 0.00096497 -0.01863493 -0.00277388]
[-0.00354759 -0.00082741 -0.00627001 -0.00043818 -0.00477218]
[-0.01313865 0.00884622 0.00881318 0.01709573 0.00050034]
[-0.00404677 -0.0054536 -0.01546477 0.00982367 -0.01101068]]
b1 = [[0.]
[0.]
[0.]
[0.]]
W2 = [[-0.01185047 -0.0020565 0.01486148 0.00236716]
[-0.01023785 -0.00712993 0.00625245 -0.00160513]
[-0.00768836 -0.00230031 0.00745056 0.01976111]]
b2 = [[0.]
[0.]
[0.]]
现在,你已经初始化了参数,接下来将执行正向传播模块。 首先实现一些基本函数,用于稍后的模型实现。按以下顺序完成三个函数:、
LINEAR
LINEAR -> ACTIVATION,其中激活函数采用ReLU或Sigmoid。
[LINEAR -> RELU] X (L-1) -> LINEAR -> SIGMOID(整个模型)
def linear_forward(A, W, b):
Z = np.dot(W,A)+b
assert(Z.shape == (W.shape[0],A.shape[1]))
cache = (A,W,b)
return Z,cache
A, W, b = linear_forward_test_case()
Z,cache = linear_forward(A, W, b);
print("Z = " + str(Z))
Z = [[ 3.26295337 -1.23429987]]
把两个函数(线性和激活)组合为一个函数(LINEAR-> ACTIVATION)
练习:实现 LINEAR->ACTIVATION 层的正向传播。
def linear_activation_forward(A_prev, W, b, activation):
if activation == "sigmoid":
Z,linear_cache = linear_forward(A_prev, W, b)
A,activation_cache = sigmoid(Z)
if activation == "relu":
Z,linear_cache = linear_forward(A_prev, W, b)
A,activation_cache = relu(Z)
assert(A.shape == (W.shape[0],A_prev.shape[1]))
cache = (linear_cache,activation_cache)
return A,cache
A_prev, W, b = linear_activation_forward_test_case()
A,cache = linear_activation_forward(A_prev, W, b, "sigmoid")
print("With sigmoid: A = " + str(A))
A,cache = linear_activation_forward(A_prev, W, b, "relu")
print("With ReLU: A = " + str(A))
With sigmoid: A = [[0.96890023 0.11013289]]
With ReLU: A = [[3.43896131 0. ]]
为了方便实现L层神经网络,你将需要一个函数来复制前一个函数(使用RELU的linear_activation_forward)L-1次,
以及复制带有SIGMOID的linear_activation_forward。
练习:实现上述模型的正向传播。
提示:
使用你先前编写的函数
使用for循环复制[LINEAR-> RELU](L-1)次
不要忘记在“cache”列表中更新缓存。 要将新值 c添加到list中,可以使用list.append©。
def L_model_forward(X, parameters):
caches = []
A = X
L = len(parameters) // 2 #参数除以2 //:向下取整
# 1-L 不包括L
for i in range(1,L):
A_prev = A
A,cache = linear_activation_forward(A_prev,parameters["W"+str(i)],parameters["b"+str(i)],activation ="relu")
caches.append(cache)
AL,cache = linear_activation_forward(A,parameters["W"+str(L)],parameters["b"+str(L)],activation ="sigmoid")
caches.append(cache)
assert(AL.shape == (1,X.shape[1]))
return AL,caches
X, parameters = L_model_forward_test_case()
AL,caches = L_model_forward(X, parameters)
print("AL = " + str(AL))
print("Length of caches list = " + str(len(caches)))
AL = [[0.17007265 0.2524272 ]]
Length of caches list = 2
def compute_cost(AL, Y):
m = Y.shape[1]
cost = -1 / m * np.sum(Y * np.log(AL) + (1-Y) * np.log(1-AL),axis=1,keepdims=True)
cost = np.squeeze(cost)
assert(cost.shape==()) #检查是否为标量
return cost
Y,AL = compute_cost_test_case()
cost = compute_cost(AL, Y)
print("cost = " + str(cost))
cost = 0.41493159961539694
def linear_backward(dZ, cache):
A_prev, W, b = cache
m = A_prev.shape[1]
dW = 1/m * np.dot(dZ,A_prev.T)
db = 1/m * np.sum(dZ,axis=1,keepdims = True)
dA_prev = np.dot(W.T,dZ)
assert(dW.shape == W.shape)
assert(db.shape == b.shape)
assert(dA_prev.shape == A_prev.shape)
return dA_prev,dW,db
dZ, linear_cache = linear_backward_test_case()
dA_prev,dW,db = linear_backward(dZ, linear_cache)
print ("dA_prev = "+ str(dA_prev))
print ("dW = " + str(dW))
print ("db = " + str(db))
dA_prev = [[ 0.51822968 -0.19517421]
[-0.40506361 0.15255393]
[ 2.37496825 -0.89445391]]
dW = [[-0.10076895 1.40685096 1.64992505]]
db = [[0.50629448]]
def linear_activation_backward(dA, cache, activation):
linear_cache, activation_cache = cache
if activation == "sigmoid":
dZ = sigmoid_backward(dA, activation_cache)
dA_prev, dW, db = linear_backward(dZ,linear_cache)
if activation == "relu":
dZ = relu_backward(dA, activation_cache)
dA_prev, dW, db = linear_backward(dZ,linear_cache)
return dA_prev, dW, db
AL, linear_activation_cache = linear_activation_backward_test_case()
dA_prev, dW, db = linear_activation_backward(AL,linear_activation_cache,activation="sigmoid")
print ("sigmoid:")
print ("dA_prev = "+ str(dA_prev))
print ("dW = " + str(dW))
print ("db = " + str(db) + "\n")
dA_prev, dW, db = linear_activation_backward(AL,linear_activation_cache,activation = "relu")
print ("relu:")
print ("dA_prev = "+ str(dA_prev))
print ("dW = " + str(dW))
print ("db = " + str(db))
sigmoid:
dA_prev = [[ 0.11017994 0.01105339]
[ 0.09466817 0.00949723]
[-0.05743092 -0.00576154]]
dW = [[ 0.10266786 0.09778551 -0.01968084]]
db = [[-0.05729622]]
-----------------------------------------------------------------------
relu:
dA_prev = [[ 0.44090989 -0. ]
[ 0.37883606 -0. ]
[-0.2298228 0. ]]
dW = [[ 0.44513824 0.37371418 -0.10478989]]
db = [[-0.20837892]]
def L_model_backward(AL, Y, caches):
grads = {}
L = len(caches)
m = AL.shape[1]
Y = Y.reshape(AL.shape)
dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
current_cache = caches[L-1]
#dAL可以理解为L+1的
grads["dA"+str(L)],grads["dW"+str(L)],grads["db"+str(L)] = linear_activation_backward(dAL,current_cache,activation="sigmoid")
for i in reversed(range(L-1)):
current_cache = caches[i]
dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA"+str(i+2)],current_cache,activation="relu")
grads["dA" + str(i + 1)] = dA_prev_temp
grads["dW" + str(i + 1)] = dW_temp
grads["db" + str(i + 1)] = db_temp
return grads
AL, Y_assess, caches = L_model_backward_test_case()
grads = L_model_backward(AL, Y_assess, caches)
print ("dW1 = "+ str(grads["dW1"]))
print ("db1 = "+ str(grads["db1"]))
print ("dA1 = "+ str(grads["dA1"]))
dW1 = [[0.41010002 0.07807203 0.13798444 0.10502167]
[0. 0. 0. 0. ]
[0.05283652 0.01005865 0.01777766 0.0135308 ]]
db1 = [[-0.22007063]
[ 0. ]
[-0.02835349]]
dA1 = [[ 0. 0.52257901]
[ 0. -0.3269206 ]
[ 0. -0.32070404]
[ 0. -0.74079187]]
练习:实现update_parameters()以使用梯度下降来更新模型参数。
def update_parameters(parameters, grads, learning_rate):
L = len(parameters) // 2
for i in range(L):
parameters["W"+str(i+1)] = parameters["W"+str(i+1)] -learning_rate* grads["dW"+str(i+1)]
parameters["b"+str(i+1)] = parameters["b"+str(i+1)] - learning_rate*grads["db"+str(i+1)]
return parameters
parameters, grads = update_parameters_test_case()
parameters = update_parameters(parameters, grads, 0.1)
print ("W1 = "+ str(parameters["W1"]))
print ("b1 = "+ str(parameters["b1"]))
print ("W2 = "+ str(parameters["W2"]))
print ("b2 = "+ str(parameters["b2"]))
W1 = [[-0.59562069 -0.09991781 -2.14584584 1.82662008]
[-1.76569676 -0.80627147 0.51115557 -1.18258802]
[-1.0535704 -0.86128581 0.68284052 2.20374577]]
b1 = [[-0.04659241]
[-1.28888275]
[ 0.53405496]]
W2 = [[-0.55569196 0.0354055 1.32964895]]
b2 = [[-0.84610769]]
你将使用在上一个作业中实现的函数来构建深层网络,并将其应用于分类cat图像和非cat图像。
希望你会看到相对于先前的逻辑回归实现的分类,准确性有所提高。
import time
import scipy
from PIL import Image
from scipy import ndimage
np.random.seed(4)
train_x_orig, train_y, test_x_orig, test_y, classes = lr_utils.load_dataset()
index = 7
plt.imshow(train_x_orig[index])
print("y="+str(train_y[0,index])+".Its a "+classes[train_y[0,index]].decode("utf-8")+" picture.")
y=1.Its a cat picture.
m_train = train_x_orig.shape[0]
num_px = train_x_orig.shape[1]
m_test = test_x_orig.shape[0]
print ("Number of training examples: " + str(m_train))
print ("Number of testing examples: " + str(m_test))
print ("Each image is of size: (" + str(num_px) + ", " + str(num_px) + ", 3)")
print ("train_x_orig shape: " + str(train_x_orig.shape))
print ("train_y shape: " + str(train_y.shape))
print ("test_x_orig shape: " + str(test_x_orig.shape))
print ("test_y shape: " + str(test_y.shape))
Number of training examples: 209
Number of testing examples: 50
Each image is of size: (64, 64, 3)
train_x_orig shape: (209, 64, 64, 3)
train_y shape: (1, 209)
test_x_orig shape: (50, 64, 64, 3)
test_y shape: (1, 50)
与往常一样,在将图像输入到网络之前,需要对图像进行重塑和标准化。
train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0],-1).T
test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0],-1).T
train_x = train_x_flatten/255
test_x = test_x_flatten/255
print ("train_x's shape: " + str(train_x.shape))
print ("test_x's shape: " + str(test_x.shape))
train_x’s shape: (12288, 209)
test_x’s shape: (12288, 50)
INPUT -> LINEAR -> RELU -> LINEAR -> SIGMOID -> OUTPUT
[LINEAR -> RELU] X (L-1) -> LINEAR -> SIGMOID
与往常一样,你将遵循深度学习步骤来构建模型:
1.初始化参数/定义超参数
2.循环num_iterations次:
a. 正向传播
b. 计算损失函数
C. 反向传播
d. 更新参数(使用参数和反向传播的梯度)
4.使用训练好的参数来预测标签
LINEAR -> RELU -> LINEAR -> SIGMOID
def initialize_parameters(n_x, n_h, n_y):
…
return parameters
def linear_activation_forward(A_prev, W, b, activation):
…
return A, cache
def compute_cost(AL, Y):
…
return cost
def linear_activation_backward(dA, cache, activation):
…
return dA_prev, dW, db
def update_parameters(parameters, grads, learning_rate):
…
return parameters
n_x = 12288
n_h = 7
n_y = 1
layers_dims = (n_x,n_h,n_y)
def two_layer_model(X, Y, layers_dims, learning_rate = 0.0075, num_iterations = 3000, print_cost=False):
np.random.seed(5)
grads = {}
costs = []
m = X.shape[1]
(n_x, n_h, n_y) = layers_dims
#初始化参数
parameters = initialize_parameters(n_x,n_h,n_y)
W1 = parameters["W1"]
b1 = parameters["b1"]
W2 = parameters["W2"]
b2 = parameters["b2"]
for i in range(0, num_iterations):
#线性激活
A1, cache1 = linear_activation_forward(X,W1,b1,activation="relu")
A2, cache2 = linear_activation_forward(A1,W2,b2,activation="sigmoid")
#计算成本
cost = compute_cost(A2,Y)
dA2 = - (np.divide(Y, A2) - np.divide(1 - Y, 1 - A2))
#反向激活
dA1, dW2, db2 = linear_activation_backward(dA2,cache2,activation="sigmoid")
dA0,dW1,db1 = linear_activation_backward(dA1,cache1,activation="relu")
grads["dW1"] = dW1
grads["db1"] = db1
grads["dW2"] = dW2
grads["db2"] = db2
#梯度下降
parameters = update_parameters(parameters,grads,learning_rate)
W1 = parameters["W1"]
b1 = parameters["b1"]
W2 = parameters["W2"]
b2 = parameters["b2"]
if print_cost and i%100==0:
print("Cost after iteration {}: {}".format(i,np.squeeze(cost)))
if print_cost and i%100==0:
costs.append(cost)
plt.plot(np.squeeze(costs))
plt.ylabel('cost')
plt.xlabel('iterations (per tens)')
plt.title("Learning rate =" + str(learning_rate))
plt.show()
return parameters
parameters = two_layer_model(train_x, train_y, layers_dims=(n_x,n_h,n_y), num_iterations = 2500, print_cost=True)
Cost after iteration 0: 0.693049735659989
Cost after iteration 100: 0.6464320953428849
Cost after iteration 200: 0.6325140647912677
Cost after iteration 300: 0.6015024920354665
Cost after iteration 400: 0.5601966311605747
Cost after iteration 500: 0.515830477276473
Cost after iteration 600: 0.47549013139433266
Cost after iteration 700: 0.4339163151225749
Cost after iteration 800: 0.400797753620389
Cost after iteration 900: 0.35807050113237987
Cost after iteration 1000: 0.3394281538366412
Cost after iteration 1100: 0.30527536361962637
Cost after iteration 1200: 0.27491377282130186
Cost after iteration 1300: 0.2468176821061485
Cost after iteration 1400: 0.19850735037466086
Cost after iteration 1500: 0.17448318112556657
Cost after iteration 1600: 0.17080762978096237
Cost after iteration 1700: 0.11306524562164721
Cost after iteration 1800: 0.09629426845937147
Cost after iteration 1900: 0.08342617959726858
Cost after iteration 2000: 0.07439078704319078
Cost after iteration 2100: 0.06630748132267926
Cost after iteration 2200: 0.059193295010381654
Cost after iteration 2300: 0.05336140348560552
Cost after iteration 2400: 0.04855478562877014
def predict(X, Y, parameters):
"""
该函数用于预测L层神经网络的结果,当然也包含两层
参数:
X - 测试集
y - 标签
parameters - 训练模型的参数
返回:
p - 给定数据集X的预测
"""
m = X.shape[1]
n = len(parameters) // 2 # 神经网络的层数
A = np.zeros((1,m))
#根据参数前向传播
AL, caches = L_model_forward(X, parameters)
for i in range(0, AL.shape[1]):
if AL[0,i] > 0.5:
A[0,i] = 1
else:
A[0,i] = 0
print("准确度为: " + str(float(np.sum((A == Y))/m)))
return A
predictions_train = predict(train_x, train_y, parameters)
准确度为: 1.0
predictions_test = predict(test_x,test_y,parameters)
准确度为: 0.72
[LINEAR -> RELU] X (L-1) -> LINEAR -> SIGMOID
def initialize_parameters_deep(layer_dims):
…
return parameters
def L_model_forward(X, parameters):
…
return AL, caches
def compute_cost(AL, Y):
…
return cost
def L_model_backward(AL, Y, caches):
…
return grads
def update_parameters(parameters, grads, learning_rate):
…
return parameters
layers_dims = [12288, 20, 7, 5, 1]
def L_layer_model(X, Y, layers_dims, learning_rate = 0.0075, num_iterations = 3000, print_cost=False):
np.random.seed(1)
costs = []
parameters = initialize_parameters_deep(layers_dims)
for i in range(0,num_iterations):
AL, caches = L_model_forward(X,parameters)
cost = compute_cost(AL,Y)
grads = L_model_backward(AL, Y, caches)
parameters = update_parameters(parameters, grads, learning_rate)
if print_cost and i%100==0:
print ("Cost after iteration %i: %f" %(i, cost))
if print_cost and i%100==0:
costs.append(cost)
# plot the cost
plt.plot(np.squeeze(costs))
plt.ylabel('cost')
plt.xlabel('iterations (per tens)')
plt.title("Learning rate =" + str(learning_rate))
plt.show()
return parameters
parameters = L_layer_model(train_x, train_y, layers_dims, num_iterations = 2500, print_cost = True)
Cost after iteration 0: 0.715732
Cost after iteration 100: 0.674738
Cost after iteration 200: 0.660337
Cost after iteration 300: 0.646289
Cost after iteration 400: 0.629813
Cost after iteration 500: 0.606006
Cost after iteration 600: 0.569004
Cost after iteration 700: 0.519797
Cost after iteration 800: 0.464157
Cost after iteration 900: 0.408420
Cost after iteration 1000: 0.373155
Cost after iteration 1100: 0.305724
Cost after iteration 1200: 0.268102
Cost after iteration 1300: 0.238725
Cost after iteration 1400: 0.206323
Cost after iteration 1500: 0.179439
Cost after iteration 1600: 0.157987
Cost after iteration 1700: 0.142404
Cost after iteration 1800: 0.128652
Cost after iteration 1900: 0.112443
Cost after iteration 2000: 0.085056
Cost after iteration 2100: 0.057584
Cost after iteration 2200: 0.044568
Cost after iteration 2300: 0.038083
Cost after iteration 2400: 0.034411
pred_train = predict(train_x, train_y, parameters)
准确度为: 0.9952153110047847
pred_test = predict(test_x,test_y,parameters)
准确度为: 0.78
def print_mislabeled_images(classes, X, y, p):
"""
绘制预测和实际不同的图像。
X - 数据集
y - 实际的标签
p - 预测
"""
a = p + y
mislabeled_indices = np.asarray(np.where(a == 1))
plt.rcParams['figure.figsize'] = (40.0, 40.0) # set default size of plots
num_images = len(mislabeled_indices[0])
for i in range(num_images):
index = mislabeled_indices[1][i]
plt.subplot(2, num_images, i + 1)
plt.imshow(X[:,index].reshape(64,64,3), interpolation='nearest')
plt.axis('off')
plt.title("Prediction: " + classes[int(p[0,index])].decode("utf-8") + " \n Class: " + classes[y[0,index]].decode("utf-8"))
print_mislabeled_images(classes, test_x, test_y, pred_test)
# START CODE HERE ##
my_image = "D:/software/OneDrive/桌面/testPhoto/1.jpg" # change this to the name of your image file
my_label_y = [1] # the true class of your image (1 -> cat, 0 -> non-cat)
## END CODE HERE ##
fname = my_image
image = np.array(plt.imread(fname))
my_image = np.array(Image.fromarray(image).resize(size=(num_px,num_px))).reshape((num_px*num_px*3,1))
my_predicted_image = predict(my_image, my_label_y, parameters)
plt.imshow(image)
print ("y = " + str(np.squeeze(my_predicted_image)) + ", your L-layer model predicts a \"" + classes[int(np.squeeze(my_predicted_image)),].decode("utf-8") + "\" picture.")
准确度为: 1.0
y = 1.0, your L-layer model predicts a "cat" picture.