    之前部分实现的梯度下降线性预测模型中的training example只有一个特征属性:房屋面积,这显然是不符合实际情况的,这里增加特征属性的数量再实现一次梯度下降线性预测模型。


    1. 实现线性模型:f = w*x + b,模型参数w,b待定
    2. 寻找最优的w,b组合:

                 (1)引入衡量模型优劣的cost function:J(w,b) ——损失函数或者代价函数



    • 新的房子的特征有:房子面积、卧室数、楼层数、房龄共4个特征属性。
    Size (sqft)Number of BedroomsNumber of floorsAge of HomePrice (1000s dollars)



    1. X_train = np.array([[2104, 5, 1, 45],
    2. [1416, 3, 2, 40],
    3. [852, 2, 1, 35]])
    4. y_train = np.array([460, 232, 178])



    1. b_init = 785.1811367994083
    2. w_init = np.array([ 0.39133535, 18.75376741, -53.36032453, -26.42131618])


    1. def predict(x, w, b):
    2. """
    3. single predict using linear regression
    4. Args:
    5. x (ndarray): Shape (n,) example with multiple features
    6. w (ndarray): Shape (n,) model parameters
    7. b (scalar): model parameter
    8. Returns:
    9. p (scalar): prediction
    10. """
    11. p = np.dot(x, w) + b
    12. return p




    1. def compute_cost(X, y, w, b):
    2. """
    3. compute cost
    4. Args:
    5. X (ndarray (m,n)): Data, m examples with n features
    6. y (ndarray (m,)) : target values
    7. w (ndarray (n,)) : model parameters
    8. b (scalar) : model parameter
    9. Returns:
    10. cost (scalar): cost
    11. """
    12. m = X.shape[0]
    13. cost = 0.0
    14. for i in range(m):
    15. f_wb_i = np.dot(X[i], w) + b #(n,)(n,) = scalar (see np.dot)
    16. cost = cost + (f_wb_i - y[i])**2 #scalar
    17. cost = cost / (2 * m) #scalar
    18. return cost



    1. def compute_gradient(X, y, w, b):
    2. """
    3. Computes the gradient for linear regression
    4. Args:
    5. X (ndarray (m,n)): Data, m examples with n features
    6. y (ndarray (m,)) : target values
    7. w (ndarray (n,)) : model parameters
    8. b (scalar) : model parameter
    9. Returns:
    10. dj_dw (ndarray (n,)): The gradient of the cost w.r.t. the parameters w.
    11. dj_db (scalar): The gradient of the cost w.r.t. the parameter b.
    12. """
    13. m,n = X.shape #(number of examples, number of features)
    14. dj_dw = np.zeros((n,))
    15. dj_db = 0.
    16. for i in range(m):
    17. err = (np.dot(X[i], w) + b) - y[i]
    18. for j in range(n):
    19. dj_dw[j] = dj_dw[j] + err * X[i, j]
    20. dj_db = dj_db + err
    21. dj_dw = dj_dw / m
    22. dj_db = dj_db / m
    23. return dj_db, dj_dw


    1. def gradient_descent(X, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters):
    2. """
    3. Performs batch gradient descent to learn theta. Updates theta by taking
    4. num_iters gradient steps with learning rate alpha
    5. Args:
    6. X (ndarray (m,n)) : Data, m examples with n features
    7. y (ndarray (m,)) : target values
    8. w_in (ndarray (n,)) : initial model parameters
    9. b_in (scalar) : initial model parameter
    10. cost_function : function to compute cost
    11. gradient_function : function to compute the gradient
    12. alpha (float) : Learning rate
    13. num_iters (int) : number of iterations to run gradient descent
    14. Returns:
    15. w (ndarray (n,)) : Updated values of parameters
    16. b (scalar) : Updated value of parameter
    17. """
    18. # An array to store cost J and w's at each iteration primarily for graphing later
    19. J_history = []
    20. w = copy.deepcopy(w_in) #avoid modifying global w within function
    21. b = b_in
    22. for i in range(num_iters):
    23. # Calculate the gradient and update the parameters
    24. dj_db,dj_dw = gradient_function(X, y, w, b) ##None
    25. # Update Parameters using w, b, alpha and gradient
    26. w = w - alpha * dj_dw ##None
    27. b = b - alpha * dj_db ##None
    28. # Save cost J at each iteration
    29. if i<100000: # prevent resource exhaustion
    30. J_history.append( cost_function(X, y, w, b))
    31. # Print cost every at intervals 10 times or as many iterations if < 10
    32. if i% math.ceil(num_iters / 10) == 0:
    33. print(f"Iteration {i:4d}: Cost {J_history[-1]:8.2f} ")
    34. return w, b, J_history #return final w,b and J history for graphing


    1. # initialize parameters
    2. initial_w = np.zeros_like(w_init)
    3. initial_b = 0.
    4. # some gradient descent settings
    5. iterations = 1000
    6. alpha = 5.0e-7
    7. # run gradient descent
    8. w_final, b_final, J_hist = gradient_descent(X_train, y_train, initial_w, initial_b,
    9. compute_cost, compute_gradient,
    10. alpha, iterations)
    11. print(f"b,w found by gradient descent: {b_final:0.2f},{w_final} ")
    12. m,_ = X_train.shape
    13. for i in range(m):
    14. print(f"prediction: {np.dot(X_train[i], w_final) + b_final:0.2f}, target value: {y_train[i]}")
    15. # plot cost versus iteration
    16. fig, (ax1, ax2) = plt.subplots(1, 2, constrained_layout=True, figsize=(12, 4))
    17. ax1.plot(J_hist)
    18. ax2.plot(100 + np.arange(len(J_hist[100:])), J_hist[100:])
    19. ax1.set_title("Cost vs. iteration"); ax2.set_title("Cost vs. iteration (tail)")
    20. ax1.set_ylabel('Cost') ; ax2.set_ylabel('Cost')
    21. ax1.set_xlabel('iteration step') ; ax2.set_xlabel('iteration step')
    22. plt.show()




