from matplotlib.font_manager import FontProperties
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
X, y = make_regression(n_samples=40, n_features=80, random_state=0, noise=0.5)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
return X_train, X_test, y_train.reshape(-1,1), y_test.reshape(-1,1)
def initialize_params(self, dims):
def l1_loss(self, X, y, w, b, alpha):
loss = np.sum((y_hat - y) ** 2) / num_train + alpha * np.sum(np.abs(w))
dw = np.dot(X.T, (y_hat - y)) / num_train + alpha * np.sign(w)
db = np.sum((y_hat - y)) / num_train
return y_hat, loss, dw, db
def lasso_train(self, X, y, learning_rate, epochs, alpha):
w, b = self.initialize_params(X.shape[1])
X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
for i in range(1, epochs):
y_hat, loss, dw, db = self.l1_loss(X, y, w, b, alpha)
return loss, loss_list, params, grads
def predict(self, X, params):
y_pred = np.dot(X, w) + b
if __name__ == '__main__':
X_train, X_test, y_train, y_test = lasso.prepare_data()
alphas=np.arange(0.01,0.11,0.01)
loss, loss_list, params, grads = lasso.lasso_train(X_train, y_train, 0.02, 3000,alpha)
w=np.squeeze(params['w'])
count=np.sum(np.abs(w)<1e-1)
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
plt.figure(figsize=(10, 8))
plt.plot(alphas, wc, 'o-')
plt.xlabel('正则项系数',fontsize=15)
plt.ylabel('参数w矩阵的稀疏度',fontsize=15)

