• 周志华《机器学习》第三章课后习题


    目录

    3.1 试析在什么情形下式(3.2) 中不必考虑偏置项 b.

    3.2、试证明,对于参数w,对率回归的目标函数(3.18)是非凸的,但其对数似然函数(3.27)是凸的.

     3.3、编程实现对率回归,并给出西瓜数据集3.0α上的结果.

    3.4 选择两个 UCI 数据集,比较 10 折交叉验证法和留一法所估计出的对率回归的错误率。

    3.5 编辑实现线性判别分析,并给出西瓜数据集 3.0α 上的结果.


    3.1 试析在什么情形下式(3.2) 中不必考虑偏置项 b.

    请添加图片描述


    ①b与输入毫无关系,如果没有b,y‘=wx必须经过原点
    ②当两个线性模型相减时,消除了b。可用训练集中每个样本都减去第一个样本,然后对新的样本做线性回归,不用考虑偏置项b。

    3.2、试证明,对于参数w,对率回归的目标函数(3.18)是非凸的,但其对数似然函数(3.27)是凸的.

     3.27

     

     3.3、编程实现对率回归,并给出西瓜数据集3.0α上的结果.

    数据集:

    3.3.py

    1. # -*- coding: utf-8 -*
    2. '''
    3. data importion
    4. '''
    5. import numpy as np # for matrix calculation
    6. import matplotlib.pyplot as plt
    7. # load the CSV file as a numpy matrix
    8. # 将CSV文件加载为numpy矩阵
    9. dataset = np.loadtxt('watermelon3_0_Ch.csv', delimiter=",")
    10. # separate the data from the target attributes
    11. # 将数据与目标属性分离
    12. X = dataset[:, 1:3]
    13. y = dataset[:, 3]
    14. m, n = np.shape(X)
    15. # draw scatter diagram to show the raw data
    16. #绘制出数据点
    17. f1 = plt.figure(1)
    18. plt.title('watermelon_3a')
    19. plt.xlabel('density')
    20. plt.ylabel('ratio_sugar')
    21. plt.scatter(X[y == 0, 0], X[y == 0, 1], marker='o', color='k', s=100, label='bad')
    22. plt.scatter(X[y == 1, 0], X[y == 1, 1], marker='o', color='g', s=100, label='good')
    23. plt.legend(loc='upper right')
    24. # plt.show()
    25. '''
    26. using sklearn lib for logistic regression
    27. 使用sklearn库进行逻辑回归
    28. '''
    29. from sklearn import metrics
    30. from sklearn import model_selection
    31. from sklearn.linear_model import LogisticRegression
    32. import matplotlib.pylab as pl
    33. # generalization of test and train set
    34. # 先划分训练集和测试集,采用sklearn.model_selection.train_test_split()实现
    35. X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, test_size=0.5, random_state=0)
    36. # model training
    37. # 采用sklearn.linear_model.LogisticRegression,基于训练集直接拟合出逻辑回归模型,然后在测试集上评估模型(查看混淆矩阵和F1值)
    38. log_model = LogisticRegression() # using log-regression lib model
    39. log_model.fit(X_train, y_train) # fitting
    40. # model validation 模型确认
    41. y_pred = log_model.predict(X_test)
    42. # summarize the fit of the model 总结模型的拟合情况
    43. print(metrics.confusion_matrix(y_test, y_pred))
    44. print(metrics.classification_report(y_test, y_pred))
    45. precision, recall, thresholds = metrics.precision_recall_curve(y_test, y_pred)
    46. # show decision boundary in plt 在PLT中显示决策边界
    47. # X - some data in 2dimensional np.array X -二维np.array中的一些数据
    48. f2 = plt.figure(2)
    49. h = 0.001
    50. x0_min, x0_max = X[:, 0].min() - 0.1, X[:, 0].max() + 0.1
    51. x1_min, x1_max = X[:, 1].min() - 0.1, X[:, 1].max() + 0.1
    52. x0, x1 = np.meshgrid(np.arange(x0_min, x0_max, h),
    53. np.arange(x1_min, x1_max, h))
    54. # here "model" is your model's prediction (classification) function
    55. # 这里的“模型”是模型的预测(分类)函数
    56. z = log_model.predict(np.c_[x0.ravel(), x1.ravel()])
    57. # Put the result into a color plot 把结果放入颜色图中
    58. z = z.reshape(x0.shape)
    59. # 采用matplotlib.contourf绘制的决策区域和边界,可以看出对率回归分类器还是成功的分出了绝大多数类:
    60. plt.contourf(x0, x1, z, cmap=pl.cm.Paired)
    61. # Plot also the training pointsplt.title('watermelon_3a')
    62. plt.title('watermelon_3a')
    63. plt.xlabel('density')
    64. plt.ylabel('ratio_sugar')
    65. plt.scatter(X[y == 0, 0], X[y == 0, 1], marker='o', color='k', s=100, label='bad')
    66. plt.scatter(X[y == 1, 0], X[y == 1, 1], marker='o', color='g', s=100, label='good')
    67. # plt.show()
    68. '''
    69. coding to implement logistic regression
    70. 编码以实现逻辑回归
    71. '''
    72. from sklearn import model_selection
    73. import self_def
    74. # X_train, X_test, y_train, y_test
    75. np.ones(n)
    76. m, n = np.shape(X)
    77. X_ex = np.c_[X, np.ones(m)] # extend the variable matrix to [x, 1]
    78. X_train, X_test, y_train, y_test = model_selection.train_test_split(X_ex, y, test_size=0.5, random_state=0)
    79. # using gradDescent to get the optimal parameter beta = [w, b] in page-59
    80. beta = self_def.gradDscent_2(X_train, y_train)
    81. # prediction, beta mapping to the model
    82. y_pred = self_def.predict(X_test, beta)
    83. m_test = np.shape(X_test)[0]
    84. # calculation of confusion_matrix and prediction accuracy
    85. # #混淆矩阵的计算和预测精度
    86. cfmat = np.zeros((2, 2))
    87. for i in range(m_test):
    88. if y_pred[i] == y_test[i] == 0:
    89. cfmat[0, 0] += 1
    90. elif y_pred[i] == y_test[i] == 1:
    91. cfmat[1, 1] += 1
    92. elif y_pred[i] == 0:
    93. cfmat[1, 0] += 1
    94. elif y_pred[i] == 1:
    95. cfmat[0, 1] += 1
    96. print(cfmat)

    self_def.py  是 需要调用的函数

    1. import numpy as np
    2. def likelihood_sub(x, y, beta):
    3. '''
    4. @param X: one sample variables
    5. @param y: one sample label
    6. @param beta: the parameter vector in 3.27
    7. @return: the sub_log-likelihood of 3.27
    8. 3.27式子的变成对象
    9. '''
    10. return -y * np.dot(beta, x.T) + np.math.log(1 + np.math.exp(np.dot(beta, x.T)))
    11. def likelihood(X, y, beta):
    12. '''
    13. @param X: the sample variables matrix
    14. @param y: the sample label matrix
    15. @param beta: the parameter vector in 3.27
    16. @return: the log-likelihood of 3.27
    17. '''
    18. sum = 0
    19. m, n = np.shape(X)
    20. for i in range(m):
    21. sum += likelihood_sub(X[i], y[i], beta)
    22. return sum
    23. def partial_derivative(X, y, beta): # refer to 3.30 on book page 60 请参阅第60页的3.30
    24. '''
    25. @param X: the sample variables matrix
    26. @param y: the sample label matrix
    27. @param X:样本变量矩阵
    28. @param y:样本标签矩阵
    29. @param beta: the parameter vector in 3.27
    30. @return: the partial derivative of beta [j]
    31. '''
    32. m, n = np.shape(X)
    33. pd = np.zeros(n)
    34. for i in range(m):
    35. tmp = y[i] - sigmoid(X[i], beta)
    36. for j in range(n):
    37. pd[j] += X[i][j] * (tmp)
    38. return pd
    39. def gradDscent_1(X, y): # implementation of fundational gradDscent algorithms 基本梯度算法的实现
    40. '''
    41. @param X: X is the variable matrix
    42. @param y: y is the label array
    43. @return: the best parameter estimate of 3.27
    44. 然后基于训练集(注意x->[x,1]),给出基于3.27似然函数的定步长梯度下降法,降低损失,注意这里的偏梯度实现技巧:
    45. '''
    46. import matplotlib.pyplot as plt
    47. h = 0.1 # step length of iterator 迭代器的步长
    48. max_times = 500 # give the iterative times limit 给出迭代次数的极限
    49. m, n = np.shape(X)
    50. b = np.zeros((n, max_times)) # for show convergence curve of parameter 表示参数的收敛曲线
    51. beta = np.zeros(n) # parameter and initial 参数和初始
    52. delta_beta = np.ones(n) * h
    53. llh = 0
    54. llh_temp = 0
    55. for i in range(max_times):
    56. beta_temp = beta.copy()
    57. for j in range(n):
    58. # for partial derivative 偏导数
    59. beta[j] += delta_beta[j]
    60. llh_tmp = likelihood(X, y, beta)
    61. delta_beta[j] = -h * (llh_tmp - llh) / delta_beta[j]
    62. b[j, i] = beta[j]
    63. beta[j] = beta_temp[j]
    64. beta += delta_beta
    65. llh = likelihood(X, y, beta)
    66. t = np.arange(max_times)
    67. f2 = plt.figure(3)
    68. p1 = plt.subplot(311)
    69. p1.plot(t, b[0])
    70. plt.ylabel('w1')
    71. p2 = plt.subplot(312)
    72. p2.plot(t, b[1])
    73. plt.ylabel('w2')
    74. p3 = plt.subplot(313)
    75. p3.plot(t, b[2])
    76. plt.ylabel('b')
    77. plt.show()
    78. return beta
    79. '''
    80. 采用随机梯度下降法来优化:上面采用的是全局定步长梯度下降法(称之为批量梯度下降),
    81. 这种方法在可能会面临收敛过慢和收敛曲线波动情况的同时,每次迭代需要全局计算,
    82. 计算量随数据量增大而急剧增大。所以尝试采用随机梯度下降来改善参数迭代寻优过程。
    83. '''
    84. def gradDscent_2(X, y): # implementation of stochastic gradDscent algorithms 随机梯度算法的实现
    85. '''
    86. @param X: X is the variable matrix
    87. @param y: y is the label array
    88. @return: the best parameter estimate of 3.27
    89. 随机梯度下降法的核心思想是增量学习:一次只用一个新样本来更新回归系数,从而形成在线流式处理。
    90. 同时为了加快收敛,采用变步长的策略,h随着迭代次数逐渐减小。
    91. '''
    92. import matplotlib.pyplot as plt
    93. m, n = np.shape(X)
    94. h = 0.5 # step length of iterator and initial
    95. beta = np.zeros(n) # parameter and initial
    96. delta_beta = np.ones(n) * h
    97. llh = 0
    98. llh_temp = 0
    99. b = np.zeros((n, m)) # for show convergence curve of parameter
    100. for i in range(m):
    101. beta_temp = beta.copy()
    102. for j in range(n):
    103. # for partial derivative
    104. h = 0.5 * 1 / (1 + i + j) # change step length of iterator
    105. beta[j] += delta_beta[j]
    106. b[j, i] = beta[j]
    107. llh_tmp = likelihood_sub(X[i], y[i], beta)
    108. delta_beta[j] = -h * (llh_tmp - llh) / delta_beta[j]
    109. beta[j] = beta_temp[j]
    110. beta += delta_beta
    111. llh = likelihood_sub(X[i], y[i], beta)
    112. t = np.arange(m)
    113. f2 = plt.figure(3)
    114. p1 = plt.subplot(311)
    115. p1.plot(t, b[0])
    116. plt.ylabel('w1')
    117. p2 = plt.subplot(312)
    118. p2.plot(t, b[1])
    119. plt.ylabel('w2')
    120. p3 = plt.subplot(313)
    121. p3.plot(t, b[2])
    122. plt.ylabel('b')
    123. plt.show()
    124. return beta
    125. #sigmoid函数
    126. def sigmoid(x, beta):
    127. '''
    128. @param x: is the predict variable
    129. @param beta: is the parameter
    130. @return: the sigmoid function value
    131. '''
    132. return 1.0 / (1 + np.math.exp(- np.dot(beta, x.T)))
    133. def predict(X, beta):
    134. '''
    135. prediction the class lable using sigmoid 使用sigmoid预测类标签
    136. @param X: data sample form like [x, 1] 数据样本形式如[x, 1]
    137. @param beta: the parameter of sigmoid form like [w, b] 形如[w, b]的参数
    138. @return: the class lable array 类标签数组
    139. '''
    140. m, n = np.shape(X)
    141. y = np.zeros(m)
    142. for i in range(m):
    143. if sigmoid(X[i], beta) > 0.5: y[i] = 1;
    144. return y
    145. return

    3.4 选择两个 UCI 数据集,比较 10 折交叉验证法和留一法所估计出的对率回归的错误率。

    参考代码: han1057578619/MachineLearning_Zhouzhihua_ProblemSets

    3.5 编辑实现线性判别分析,并给出西瓜数据集 3.0α 上的结果.

     3.5.py

    1. import numpy as np
    2. import pandas as pd
    3. from matplotlib import pyplot as plt
    4. class LDA(object):
    5. # 绘图,求出均值向量,根据公式3.34和3.39求出类内散度矩阵和类间散度矩阵
    6. def fit(self, X_, y_, plot_=False):
    7. pos = y_ == 1
    8. neg = y_ == 0
    9. X0 = X_[neg]
    10. X1 = X_[pos]
    11. # 均值向量,(1, 2)
    12. u0 = X0.mean(0, keepdims=True) # (1, n)
    13. u1 = X1.mean(0, keepdims=True)
    14. # 类内散度矩阵,公式3.33,(2, 2)
    15. sw = np.dot((X0 - u0).T, (X0 - u0)) + np.dot((X1 - u1).T, (X1 - u1))
    16. # 类间散度矩阵,公式3.37,(1, 2)
    17. w = np.dot(np.linalg.inv(sw), (u0 - u1).T).reshape(1, -1)
    18. if plot_:
    19. fig, ax = plt.subplots()
    20. ax.spines['right'].set_color('none')
    21. ax.spines['top'].set_color('none')
    22. ax.spines['left'].set_position(('data', 0))
    23. ax.spines['bottom'].set_position(('data', 0))
    24. plt.scatter(X1[:, 0], X1[:, 1], c='k', marker='o', label='good')
    25. plt.scatter(X0[:, 0], X0[:, 1], c='r', marker='x', label='bad')
    26. plt.xlabel('密度', labelpad=1)
    27. plt.ylabel('含糖量')
    28. plt.legend(loc='upper right')
    29. x_tmp = np.linspace(-0.05, 0.15)
    30. y_tmp = x_tmp * w[0, 1] / w[0, 0]
    31. plt.plot(x_tmp, y_tmp, '#808080', linewidth=1)
    32. wu = w / np.linalg.norm(w)
    33. # 正负样板店
    34. X0_project = np.dot(X0, np.dot(wu.T, wu))
    35. plt.scatter(X0_project[:, 0], X0_project[:, 1], c='r', s=15)
    36. for i in range(X0.shape[0]):
    37. plt.plot([X0[i, 0], X0_project[i, 0]], [X0[i, 1], X0_project[i, 1]], '--r', linewidth=1)
    38. X1_project = np.dot(X1, np.dot(wu.T, wu))
    39. plt.scatter(X1_project[:, 0], X1_project[:, 1], c='k', s=15)
    40. for i in range(X1.shape[0]):
    41. plt.plot([X1[i, 0], X1_project[i, 0]], [X1[i, 1], X1_project[i, 1]], '--k', linewidth=1)
    42. # 中心点的投影
    43. u0_project = np.dot(u0, np.dot(wu.T, wu))
    44. plt.scatter(u0_project[:, 0], u0_project[:, 1], c='#FF4500', s=60)
    45. u1_project = np.dot(u1, np.dot(wu.T, wu))
    46. plt.scatter(u1_project[:, 0], u1_project[:, 1], c='#696969', s=60)
    47. # 均值向量的投影点
    48. ax.annotate(r'u0 投影点',
    49. xy=(u0_project[:, 0], u0_project[:, 1]),
    50. xytext=(u0_project[:, 0] - 0.2, u0_project[:, 1] - 0.1),
    51. size=13,
    52. va="center", ha="left",
    53. arrowprops=dict(arrowstyle="->",
    54. color="k",
    55. )
    56. )
    57. ax.annotate(r'u1 投影点',
    58. xy=(u1_project[:, 0], u1_project[:, 1]),
    59. xytext=(u1_project[:, 0] - 0.1, u1_project[:, 1] + 0.1),
    60. size=13,
    61. va="center", ha="left",
    62. arrowprops=dict(arrowstyle="->",
    63. color="k",
    64. )
    65. )
    66. plt.axis("equal") # 两坐标轴的单位刻度长度保存一致
    67. plt.show()
    68. self.w = w
    69. self.u0 = u0
    70. self.u1 = u1
    71. return self
    72. def predict(self, X):
    73. project = np.dot(X, self.w.T)
    74. wu0 = np.dot(self.w, self.u0.T)
    75. wu1 = np.dot(self.w, self.u1.T)
    76. return (np.abs(project - wu1) < np.abs(project - wu0)).astype(int)
    77. if __name__ == '__main__':
    78. data_path = r'watermelon3_0_Ch.csv'
    79. data = pd.read_csv(data_path).values
    80. X = data[:, 1:3].astype(float)
    81. y = data[:, 3]
    82. y[y == '是'] = 1
    83. y[y == '否'] = 0
    84. y = y.astype(int)
    85. lda = LDA()
    86. lda.fit(X, y, plot_=True)
    87. print(lda.predict(X)) # 和逻辑回归的结果一致
    88. print(y)

     

    想要代码与数据资源的,可以加我微信好友

     

    参考的博客:

    (4条消息) 周志华《机器学习》课后习题第三章解答:Ch3.3 - 编程实现对率回归_zhangriqi的博客-CSDN博客

    周志华《机器学习》课后习题(第三章):线性模型-阿里云开发者社区 (aliyun.com) 

  • 相关阅读:
    linux 命令模式下我如何移动光标呢
    383.赎金信
    CRM系统如何帮助企业数字化转型?分为几个阶段?
    一文详解训练LLM流程
    21. [Python GUI] PyQt5中的模型与视图框架-抽象模型基类QAbstractItemModel与自定义模型
    Java_线程的概念和线程的创建的方法
    2021年软件测试面试题大全
    博士论文答辩流程
    vue3弹性布局 类似九宫格排列(多选选项)
    在面试提问环节应该问那些内容
  • 原文地址:https://blog.csdn.net/qq_40694323/article/details/125410908