• Python机器学习16——相关向量机(RVM)


    本系列基本不讲数学原理,只从代码角度去让读者们利用最简洁的Python代码实现机器学习方法


    背景介绍

    学机器学习的应该都知道支持向量机(SVM),这个方法在深度学习兴起之前算是很热门的分类方法,在机器学习里面,分类算法属SVM效果比较好,回归算法属随机森林(RF)的效果比较好。

    虽然目前在深度学习神经网络算法的面前,它们的效果都已经黯然失色。但是学术界还是不少人使用这些传统的算法,因为数学理论强,很受老师们喜欢。

    相关向量机(RVM)也是,它是一种基于贝叶斯框架的方法,核心思想是先验后验概率找最大似然,在科研领域中,关于使用RVM的文章不在少数。

    但是由于我们最常用的sklearn库没有rvm的接口......所以没办法直接调用,这篇博客就是补充这个空白的。rvm使用Python实现相关向量机,并且做成我们最为熟悉的sklearn库接口,方便使用。

     


    算法原理

    我这里就不多介绍原理了,感兴趣同学直接看这个pdf,讲的还是不错的。

    https://files-cdn.cnblogs.com/files/axlute/RVMExplained.pdf

     


    代码实现

    定义RVM的类,回归和分类都有。

    1. """Relevance Vector Machine classes for regression and classification."""
    2. import numpy as np
    3. from scipy.optimize import minimize
    4. from scipy.special import expit
    5. from sklearn.base import BaseEstimator, RegressorMixin, ClassifierMixin
    6. from sklearn.metrics.pairwise import (
    7. linear_kernel,
    8. rbf_kernel,
    9. polynomial_kernel
    10. )
    11. from sklearn.multiclass import OneVsOneClassifier
    12. from sklearn.utils.validation import check_X_y
    13. class BaseRVM(BaseEstimator):
    14. """Base Relevance Vector Machine class.
    15. Implementation of Mike Tipping's Relevance Vector Machine using the
    16. scikit-learn API. Add a posterior over weights method and a predict
    17. in subclass to use for classification or regression.
    18. """
    19. def __init__(
    20. self,
    21. kernel='rbf',
    22. degree=3,
    23. coef1=None,
    24. coef0=0.0,
    25. n_iter=3000,
    26. tol=1e-3,
    27. alpha=1e-6,
    28. threshold_alpha=1e9,
    29. beta=1.e-6,
    30. beta_fixed=False,
    31. bias_used=True,
    32. verbose=False
    33. ):
    34. """Copy params to object properties, no validation."""
    35. self.kernel = kernel
    36. self.degree = degree
    37. self.coef1 = coef1
    38. self.coef0 = coef0
    39. self.n_iter = n_iter
    40. self.tol = tol
    41. self.alpha = alpha
    42. self.threshold_alpha = threshold_alpha
    43. self.beta = beta
    44. self.beta_fixed = beta_fixed
    45. self.bias_used = bias_used
    46. self.verbose = verbose
    47. def get_params(self, deep=True):
    48. """Return parameters as a dictionary."""
    49. params = {
    50. 'kernel': self.kernel,
    51. 'degree': self.degree,
    52. 'coef1': self.coef1,
    53. 'coef0': self.coef0,
    54. 'n_iter': self.n_iter,
    55. 'tol': self.tol,
    56. 'alpha': self.alpha,
    57. 'threshold_alpha': self.threshold_alpha,
    58. 'beta': self.beta,
    59. 'beta_fixed': self.beta_fixed,
    60. 'bias_used': self.bias_used,
    61. 'verbose': self.verbose
    62. }
    63. return params
    64. def set_params(self, **parameters):
    65. """Set parameters using kwargs."""
    66. for parameter, value in parameters.items():
    67. setattr(self, parameter, value)
    68. return self
    69. def _apply_kernel(self, x, y):
    70. """Apply the selected kernel function to the data."""
    71. if self.kernel == 'linear':
    72. phi = linear_kernel(x, y)
    73. elif self.kernel == 'rbf':
    74. phi = rbf_kernel(x, y, self.coef1)
    75. elif self.kernel == 'poly':
    76. phi = polynomial_kernel(x, y, self.degree, self.coef1, self.coef0)
    77. elif callable(self.kernel):
    78. phi = self.kernel(x, y)
    79. if len(phi.shape) != 2:
    80. raise ValueError(
    81. "Custom kernel function did not return 2D matrix"
    82. )
    83. if phi.shape[0] != x.shape[0]:
    84. raise ValueError(
    85. "Custom kernel function did not return matrix with rows"
    86. " equal to number of data points."""
    87. )
    88. else:
    89. raise ValueError("Kernel selection is invalid.")
    90. if self.bias_used:
    91. phi = np.append(phi, np.ones((phi.shape[0], 1)), axis=1)
    92. return phi
    93. def _prune(self):
    94. """Remove basis functions based on alpha values."""
    95. keep_alpha = self.alpha_ < self.threshold_alpha
    96. if not np.any(keep_alpha):
    97. keep_alpha[0] = True
    98. if self.bias_used:
    99. keep_alpha[-1] = True
    100. if self.bias_used:
    101. if not keep_alpha[-1]:
    102. self.bias_used = False
    103. self.relevance_ = self.relevance_[keep_alpha[:-1]]
    104. else:
    105. self.relevance_ = self.relevance_[keep_alpha]
    106. self.alpha_ = self.alpha_[keep_alpha]
    107. self.alpha_old = self.alpha_old[keep_alpha]
    108. self.gamma = self.gamma[keep_alpha]
    109. self.phi = self.phi[:, keep_alpha]
    110. self.sigma_ = self.sigma_[np.ix_(keep_alpha, keep_alpha)]
    111. self.m_ = self.m_[keep_alpha]
    112. def fit(self, X, y):
    113. """Fit the RVR to the training data."""
    114. X, y = check_X_y(X, y)
    115. n_samples, n_features = X.shape
    116. self.phi = self._apply_kernel(X, X)
    117. n_basis_functions = self.phi.shape[1]
    118. self.relevance_ = X
    119. self.y = y
    120. self.alpha_ = self.alpha * np.ones(n_basis_functions)
    121. self.beta_ = self.beta
    122. self.m_ = np.zeros(n_basis_functions)
    123. self.alpha_old = self.alpha_
    124. for i in range(self.n_iter):
    125. self._posterior()
    126. self.gamma = 1 - self.alpha_*np.diag(self.sigma_)
    127. self.alpha_ = self.gamma/(self.m_ ** 2)
    128. if not self.beta_fixed:
    129. self.beta_ = (n_samples - np.sum(self.gamma))/(
    130. np.sum((y - np.dot(self.phi, self.m_)) ** 2))
    131. self._prune()
    132. if self.verbose:
    133. print("Iteration: {}".format(i))
    134. print("Alpha: {}".format(self.alpha_))
    135. print("Beta: {}".format(self.beta_))
    136. print("Gamma: {}".format(self.gamma))
    137. print("m: {}".format(self.m_))
    138. print("Relevance Vectors: {}".format(self.relevance_.shape[0]))
    139. print()
    140. delta = np.amax(np.absolute(self.alpha_ - self.alpha_old))
    141. if delta < self.tol and i > 1:
    142. break
    143. self.alpha_old = self.alpha_
    144. if self.bias_used:
    145. self.bias = self.m_[-1]
    146. else:
    147. self.bias = None
    148. return self
    149. class RVR(BaseRVM, RegressorMixin):
    150. """Relevance Vector Machine Regression.
    151. Implementation of Mike Tipping's Relevance Vector Machine for regression
    152. using the scikit-learn API.
    153. """
    154. def _posterior(self):
    155. """Compute the posterior distriubtion over weights."""
    156. i_s = np.diag(self.alpha_) + self.beta_ * np.dot(self.phi.T, self.phi)
    157. self.sigma_ = np.linalg.inv(i_s)
    158. self.m_ = self.beta_ * np.dot(self.sigma_, np.dot(self.phi.T, self.y))
    159. def predict(self, X, eval_MSE=False):
    160. """Evaluate the RVR model at x."""
    161. phi = self._apply_kernel(X, self.relevance_)
    162. y = np.dot(phi, self.m_)
    163. if eval_MSE:
    164. MSE = (1/self.beta_) + np.dot(phi, np.dot(self.sigma_, phi.T))
    165. return y, MSE[:, 0]
    166. else:
    167. return y
    168. class RVC(BaseRVM, ClassifierMixin):
    169. """Relevance Vector Machine Classification.
    170. Implementation of Mike Tipping's Relevance Vector Machine for
    171. classification using the scikit-learn API.
    172. """
    173. def __init__(self, n_iter_posterior=50, **kwargs):
    174. """Copy params to object properties, no validation."""
    175. self.n_iter_posterior = n_iter_posterior
    176. super(RVC, self).__init__(**kwargs)
    177. def get_params(self, deep=True):
    178. """Return parameters as a dictionary."""
    179. params = super(RVC, self).get_params(deep=deep)
    180. params['n_iter_posterior'] = self.n_iter_posterior
    181. return params
    182. def _classify(self, m, phi):
    183. return expit(np.dot(phi, m))
    184. def _log_posterior(self, m, alpha, phi, t):
    185. y = self._classify(m, phi)
    186. log_p = -1 * (np.sum(np.log(y[t == 1]), 0) +
    187. np.sum(np.log(1-y[t == 0]), 0))
    188. log_p = log_p + 0.5*np.dot(m.T, np.dot(np.diag(alpha), m))
    189. jacobian = np.dot(np.diag(alpha), m) - np.dot(phi.T, (t-y))
    190. return log_p, jacobian
    191. def _hessian(self, m, alpha, phi, t):
    192. y = self._classify(m, phi)
    193. B = np.diag(y*(1-y))
    194. return np.diag(alpha) + np.dot(phi.T, np.dot(B, phi))
    195. def _posterior(self):
    196. result = minimize(
    197. fun=self._log_posterior,
    198. hess=self._hessian,
    199. x0=self.m_,
    200. args=(self.alpha_, self.phi, self.t),
    201. method='Newton-CG',
    202. jac=True,
    203. options={
    204. 'maxiter': self.n_iter_posterior
    205. }
    206. )
    207. self.m_ = result.x
    208. self.sigma_ = np.linalg.inv(
    209. self._hessian(self.m_, self.alpha_, self.phi, self.t)
    210. )
    211. def fit(self, X, y):
    212. """Check target values and fit model."""
    213. self.classes_ = np.unique(y)
    214. n_classes = len(self.classes_)
    215. if n_classes < 2:
    216. raise ValueError("Need 2 or more classes.")
    217. elif n_classes == 2:
    218. self.t = np.zeros(y.shape)
    219. self.t[y == self.classes_[1]] = 1
    220. return super(RVC, self).fit(X, self.t)
    221. else:
    222. self.multi_ = None
    223. self.multi_ = OneVsOneClassifier(self)
    224. self.multi_.fit(X, y)
    225. return self
    226. def predict_proba(self, X):
    227. """Return an array of class probabilities."""
    228. phi = self._apply_kernel(X, self.relevance_)
    229. y = self._classify(self.m_, phi)
    230. return np.column_stack((1-y, y))
    231. def predict(self, X):
    232. """Return an array of classes for each input."""
    233. if len(self.classes_) == 2:
    234. y = self.predict_proba(X)
    235. res = np.empty(y.shape[0], dtype=self.classes_.dtype)
    236. res[y[:, 1] <= 0.5] = self.classes_[0]
    237. res[y[:, 1] >= 0.5] = self.classes_[1]
    238. return res
    239. else:
    240. return self.multi_.predict(X)

    好了,下面就可以像别的sklearn库里面的包一样使用了。

     


    代码测试

    我们对分类问题和回归问题都测试一下,并且和支持向量机做对比。

    1. import numpy as np
    2. import pandas as pd
    3. import matplotlib.pyplot as plt
    4. import seaborn as sns
    5. from sklearn.preprocessing import StandardScaler
    6. from sklearn.model_selection import train_test_split
    7. from sklearn.model_selection import KFold, StratifiedKFold
    8. from sklearn.model_selection import GridSearchCV
    9. from sklearn.metrics import plot_confusion_matrix
    10. from sklearn.svm import SVC
    11. from sklearn.svm import SVR
    12. from sklearn.datasets import load_boston
    13. from sklearn.datasets import load_breast_cancer

    分类测试

    分类我们使用经典的鸢尾花数据集

    1. iris = load_breast_cancer() #加载数据
    2. X = iris.data
    3. y = iris.target
    4. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=0)
    5. scaler = StandardScaler()
    6. scaler.fit(X_train)
    7. X_train_s = scaler.transform(X_train)
    8. X_test_s = scaler.transform(X_test)

    支持向量机,不同核函数的效果:

    1. #线性核函数
    2. model = SVC(kernel="linear", random_state=123)
    3. model.fit(X_train_s, y_train)
    4. print(model.score(X_test_s, y_test))
    5. #二次多项式核
    6. model = SVC(kernel="poly", degree=2, random_state=123)
    7. model.fit(X_train_s, y_train)
    8. print(model.score(X_test_s, y_test))
    9. #三次多项式
    10. model = SVC(kernel="poly", degree=3, random_state=123)
    11. model.fit(X_train_s, y_train)
    12. print(model.score(X_test_s, y_test))
    13. #径向核
    14. model = SVC(kernel="rbf", random_state=123)
    15. model.fit(X_train_s, y_train)
    16. print(model.score(X_test_s, y_test))
    17. #S核
    18. model = SVC(kernel="sigmoid",random_state=123)
    19. model.fit(X_train_s, y_train)
    20. print(model.score(X_test_s, y_test))

    相关向量机(RVM)效果:

    1. model = RVC(kernel="linear")
    2. model.fit(X_train_s, y_train)
    3. print(model.score(X_test_s, y_test))
    4. model = RVC(kernel="rbf")
    5. model.fit(X_train_s, y_train)
    6. print(model.score(X_test_s, y_test))
    7. model = RVC(kernel="poly")
    8. model.fit(X_train_s, y_train)
    9. print(model.score(X_test_s, y_test))

     

    效果差不多。 


    回归测试

    回归使用波士顿数据集

    1. # Support Vector Regression with Boston Housing Data
    2. X, y = load_boston(return_X_y=True)
    3. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)
    4. scaler = StandardScaler()
    5. scaler.fit(X_train)
    6. X_train_s = scaler.transform(X_train)
    7. X_test_s = scaler.transform(X_test)

    支持向量机效果:(不同核函数)

    1. #线性核函数
    2. model = SVR(kernel="linear")
    3. model.fit(X_train_s, y_train)
    4. print(model.score(X_test_s, y_test))
    5. #二次多项式核
    6. model = SVR(kernel="poly", degree=2)
    7. model.fit(X_train_s, y_train)
    8. print(model.score(X_test_s, y_test))
    9. #三次多项式
    10. model = SVR(kernel="poly", degree=3)
    11. model.fit(X_train_s, y_train)
    12. print(model.score(X_test_s, y_test))
    13. #径向核
    14. model = SVR(kernel="rbf")
    15. model.fit(X_train_s, y_train)
    16. print(model.score(X_test_s, y_test))
    17. #S核
    18. model = SVR(kernel="sigmoid")
    19. model.fit(X_train_s, y_train)
    20. print(model.score(X_test_s, y_test))

     相关向量机(RVM)效果:

    1. model = RVR(kernel="linear")
    2. model.fit(X_train_s, y_train)
    3. print(model.score(X_test_s, y_test))
    4. model = RVR(kernel="rbf")
    5. model.fit(X_train_s, y_train)
    6. print(model.score(X_test_s, y_test))
    7. model = RVR(kernel="poly")
    8. model.fit(X_train_s, y_train)
    9. print(model.score(X_test_s, y_test))

    可以看到,在回归问题上,相关向量机比支持向量机的效果要好。

    结论:

    分类用SVM,回归用RVM 

    当然我这里只用了两个sklearn自带的数据集测试,结论肯定有点武断,有兴趣的同学可以用于别的数据集,然后做多次K折交叉验证,进一步对比他们的效果。

  • 相关阅读:
    【allegro 17.4软件操作保姆级教程八】布线操作基础之三
    【python】爬取杭州市二手房销售数据做数据分析【附源码】
    企业知识库有什么价值?
    Java 21 新特性:虚拟线程(Virtual Threads)
    可观测|时序数据降采样在Prometheus实践复盘
    蓝牙信标的优势及应用场景
    装机指南1.0
    Oracle共享内存不释放
    linux常见命令以及jdk,tomcat环境搭建
    GDPU 数据结构 天码行空9
  • 原文地址:https://blog.csdn.net/weixin_46277779/article/details/127927306