• 手写LASSO回归python实现


    1. import numpy as np
    2. from matplotlib.font_manager import FontProperties
    3. from sklearn.datasets import make_regression
    4. from sklearn.model_selection import train_test_split
    5. import matplotlib.pyplot as plt
    6. class Lasso():
    7. def __init__(self):
    8. pass
    9. # 数据准备
    10. def prepare_data(self):
    11. # 生成样本数据
    12. X, y = make_regression(n_samples=40, n_features=80, random_state=0, noise=0.5)
    13. # 划分数据集
    14. X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    15. return X_train, X_test, y_train.reshape(-1,1), y_test.reshape(-1,1)
    16. # 参数初始化
    17. def initialize_params(self, dims):
    18. w = np.zeros((dims, 1))
    19. b = 0
    20. return w, b
    21. # 定义L1损失函数
    22. def l1_loss(self, X, y, w, b, alpha):
    23. num_train = X.shape[0] # 样本数
    24. num_feature = X.shape[1] # 特征数
    25. y_hat = np.dot(X, w) + b # 回归预测数据
    26. # 计算损失
    27. loss = np.sum((y_hat - y) ** 2) / num_train + alpha * np.sum(np.abs(w)) # 修改此处
    28. # 计算梯度,即参数的变化
    29. dw = np.dot(X.T, (y_hat - y)) / num_train + alpha * np.sign(w) # 修改此处
    30. db = np.sum((y_hat - y)) / num_train
    31. return y_hat, loss, dw, db
    32. def lasso_train(self, X, y, learning_rate, epochs, alpha):
    33. loss_list = []
    34. w, b = self.initialize_params(X.shape[1])
    35. # 归一化特征
    36. X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
    37. for i in range(1, epochs):
    38. y_hat, loss, dw, db = self.l1_loss(X, y, w, b, alpha)
    39. # 更新参数
    40. w += -learning_rate * dw
    41. b += -learning_rate * db
    42. loss_list.append(loss)
    43. # if i % 300 == 0:
    44. # print('epoch %d loss %f' % (i, loss))
    45. params = {
    46. 'w': w,
    47. 'b': b
    48. }
    49. grads = {
    50. 'dw': dw,
    51. 'db': db
    52. }
    53. return loss, loss_list, params, grads
    54. # 根据计算的得到的参数进行预测
    55. def predict(self, X, params):
    56. w = params['w']
    57. b = params['b']
    58. y_pred = np.dot(X, w) + b
    59. return y_pred
    60. if __name__ == '__main__':
    61. lasso = Lasso()
    62. X_train, X_test, y_train, y_test = lasso.prepare_data()
    63. alphas=np.arange(0.01,0.11,0.01)
    64. wc=[]#统计参数w中绝对值小于0.1的个数,模拟稀疏度
    65. for alpha in alphas:
    66. # 参数:训练集x,训练集y,学习率,迭代次数,正则化系数
    67. loss, loss_list, params, grads = lasso.lasso_train(X_train, y_train, 0.02, 3000,alpha)
    68. w=np.squeeze(params['w'])
    69. count=np.sum(np.abs(w)<1e-1)
    70. wc.append(count)
    71. # 设置中文字体
    72. plt.rcParams['font.sans-serif'] = ['SimHei']
    73. plt.rcParams['axes.unicode_minus'] = False
    74. plt.figure(figsize=(10, 8))
    75. plt.plot(alphas, wc, 'o-')
    76. plt.xlabel('正则项系数',fontsize=15)
    77. plt.ylabel('参数w矩阵的稀疏度',fontsize=15)
    78. plt.show()

  • 相关阅读:
    elasticsearch源码解析TODO列表
    SpringCloud-微服务CAP原则
    RabbitMQ基础
    40、jenkins部署vue项目
    常见HTML面试题
    大数据、Hadoop、Hbase介绍
    Vue2.0开发之——Vue基础用法-事件绑定$event(20)
    智慧工地管理系统源码(电脑端+手机端+APP+SAAS云平台)
    【实验】配置用户自动获取IPv6地址的案例
    linux中sshd是什么(ssh服务无法启动解决办法)
  • 原文地址:https://blog.csdn.net/qq_58158950/article/details/134435537