最近重新回顾了一下机器学习的基础知识,关于梯度下降的知识,个人认为看懂原理和公式,仅仅是一方面,如果能从代码的角度重新实现或者走一遍可能会更加记忆深刻,下面两个参考链接讲的就非常好,大家一块来学习呀。
[1] 深入浅出–梯度下降法及其实现
[2] python写出梯度下降的代码
import numpy as np
m = 20 # Size of the points dataset.
X0 = np.ones((m, 1)) # Points x-coordinate and dummy value (x0, x1).
X1 = np.arange(1, m + 1).reshape(m, 1)
X = np.hstack((X0, X1))
y = np.array([ # Points y-coordinate
3, 4, 5, 5, 2, 4, 7, 8, 11, 8, 12,
11, 13, 13, 16, 17, 18, 17, 19, 21
]).reshape(m, 1)
alpha = 0.01 # The Learning Rate alpha.
def error_function(theta, X, y):
"""Error function J definition."""
diff = np.dot(X, theta) - y
return (1. / 2 * m) * np.dot(np.transpose(diff), diff)
def gradient_function(theta, X, y):
"""Gradient of the function J definition."""
diff = np.dot(X, theta) - y
return (1. / m) * np.dot(np.transpose(X), diff)
def gradient_descent(X, y, alpha):
"""Perform gradient descent."""
theta = np.array([1, 1]).reshape(2, 1)
gradient = gradient_function(theta, X, y)
while not np.all(np.absolute(gradient) <= 1e-5):
theta = theta - alpha * gradient
gradient = gradient_function(theta, X, y)
return theta
optimal = gradient_descent(X, y, alpha)
error = error_function(optimal, X, y)[0, 0]
print('optimal:', optimal)
print('error function:', error)
import numpy as np
iterations = 1000 # 定义迭代次数和学习率
alpha = 0.1 # 学习率
m = 100 # 数据长度
def compute_error(X, y, theta): # 定义损失函数
predictions = np.dot(X, theta)
sqrErrors = (predictions - y) ** 2
J = 1 / (2 * m) * np.sum(sqrErrors)
return J
def compute_gradient(theta, X, y): # 梯度更新
predictions = np.dot(X, theta)
errors = predictions - y
theta = theta - alpha / m * np.dot(X.T, errors)
return theta
def gradient_descent(X, y, theta, num_iters): # 梯度下降算法
J_history = np.zeros((num_iters, 1)) # 记录损失函数
for i in range(num_iters):
theta = compute_gradient(theta, X, y)
J_history[i] = compute_error(X, y, theta)
return theta, J_history
# 测试代码
x = 2 * np.random.rand(m, 1) # 随机生成一些数据
y = 100 + 3 * x + np.random.randn(m, 1)
X_b = np.c_[np.ones((m, 1)), x] # 在数据中添加x0=1
theta = np.random.randn(2, 1) # 初始化theta
theta, J_history = gradient_descent(X_b, y, theta, iterations) # 运行梯度下降算法
print("最终参数值:", theta) # 输出最终结果
print("损失函数值:", J_history[-1])
声明: 总结学习,有问题或不当之处,可以批评指正哦,谢谢。