首先导入所需要的库
- import sklearn
- import matplotlib.pyplot as plt
- from matplotlib import font_manager
- from matplotlib import rcParams
- from sklearn.datasets import load_boston
- from sklearn import linear_model
- from sklearn.model_selection import train_test_split
- from sklearn.metrics import mean_squared_error
- rcParams['font.family'] = 'SimHei'
再进行数据的分割
- boston = load_boston(return_X_y=True)
- x,y = boston
- print(x.shape)
- print(y.shape)
x,y的维度分别是
(506, 13) (506,)
进行训练
- #切分训练集和测试集
- x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=2)
- #最小二乘法
- regression = linear_model.LinearRegression()
- #print(x_train.shape)
- #print(y_train.shape)
- #训练数据
- regression.fit(x_train,y_train)
- #预测数据
- y_predict = regression.predict(x_test)
- #print(y_predict)
- #print(y_test)
- #计算均方误差
- mse = mean_squared_error(y_test,y_predict)
- print('mse:',mse)
计算得出
mse: 18.495420122448206
画图
- plt.scatter(y_test, y_predict, color='blue')
- plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()])
- plt.xlabel('真实值')
- plt.ylabel('预测值')
- plt.title('线性回归')
- plt.show()