sklearn模型的保存和加载API
from sklearn.externals import joblib
保存 joblib.dump(rf,'test.pkl')
加载 estimator = joblib.load('test.pkl')
线性回归的模型保存加载案例
- def linea3():
- """
- 岭回归对波士顿房价进行预测
- :return:
- """
- # 1)获取数据
- boston = load_boston()
- print("特征数量:\n", boston.data.shape)
-
- # 2)划分数据集
- x_train, x_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=22)
-
- # 3)标准化
- transfer = StandardScaler()
- x_train = transfer.fit_transform(x_train)
- x_test = transfer.transform(x_test)
-
- # 4)预估器
- estimator = Ridge()
- estimator.fit(x_train, y_train)
-
- # 保存模型
- joblib.dump(estimator, "my_ridge.pkl")
-
- # 5)得出模型
- print("岭回归的权重系数:\n", estimator.coef_)
- print("岭回归的偏置为:\n", estimator.intercept_)
-
- # 6)模型评估
- y_predict = estimator.predict(x_test)
- print("预测房价:\n", y_predict)
- error = mean_squared_error(y_test, y_predict)
- print("岭回归-均方误差为:\n", error)
-
- return None

- def linea3():
- """
- 岭回归对波士顿房价进行预测
- :return:
- """
- # 1)获取数据
- boston = load_boston()
- print("特征数量:\n", boston.data.shape)
-
- # 2)划分数据集
- x_train, x_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=22)
-
- # 3)标准化
- transfer = StandardScaler()
- x_train = transfer.fit_transform(x_train)
- x_test = transfer.transform(x_test)
-
- # 4)预估器
- # estimator = Ridge()
- # estimator.fit(x_train, y_train)
- #
- # # 保存模型
- # joblib.dump(estimator, "my_ridge.pkl")
- # 加载模块
- estimator = joblib.load("my_ridge.pkl")
-
- # 5)得出模型
- print("岭回归的权重系数:\n", estimator.coef_)
- print("岭回归的偏置为:\n", estimator.intercept_)
-
- # 6)模型评估
- y_predict = estimator.predict(x_test)
- print("预测房价:\n", y_predict)
- error = mean_squared_error(y_test, y_predict)
- print("岭回归-均方误差为:\n", error)
-
- return None

K-means聚类步骤
1 随机设置k个特征空间内的点作为初识的聚类中心
2 对于其他每个点计算到k个中心的距离,未知的点选择最近的一个聚类中心点作为标记类别
3 接着对着标记的聚类中心之后,重新计算出每个聚类的新中心点(平均值)
4 如果计算得出的新中心点与原来中心点一样,那么结束,否则重新进行第二步过程