- # # 科学计算模块
- # import numpy as np
- # import pandas as pd
- # # 绘图模块
- # import matplotlib as mpl
- # import matplotlib.pyplot as plt
- # from sklearn.linear_model import LinearRegression
- # from sklearn import datasets
- # from sklearn.model_selection import train_test_split
- # from sklearn.neighbors import KNeighborsClassifier
- # from sklearn import preprocessing
- # from sklearn.svm import SVC
- # #栾尾花数据预测
- # # X,y = datasets.load_iris(return_X_y=True)
- # # x_train ,x_test,y_train,y_test=train_test_split(X,y,test_size=0.3)
- # # knn=KNeighborsClassifier()
- # # knn.fit(x_train,y_train)
- # # print(knn.predict(x_test))
- # # print(y_test)
- # #knn.coef_ 斜率
- # #knn.intercept_ 截距 y=0.3x+6
- #
- # #datasets 自定义数据
- # # X,y = datasets.make_regression(n_samples=100,n_features=2,n_targets=2,noise=2)
- # # plt.scatter(X,y)
- # # plt.show()
- #
- # #标准数字化
- # # X,y=datasets.make_classification(n_samples=300,n_features=2,n_redundant=0,n_informative=2,random_state=22,n_clusters_per_class=1,scale=100)
- # # # sp=np.array(X).shape
- # # print(np.array(y).shape)
- # # # plt.scatter(X[:,0],X[:,1],c=y)
- # # # plt.show()
- # # X=preprocessing.scale(X) #因为数据差距比较大,此处将数据标准化,可增加预测相似度
- # # x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.3)
- # # clf=SVC()
- # # clf.fit(x_train,y_train)
- # # print(clf.score(x_test,y_test)) #评估预测集,与预测集的相似度,相当于predict 与 y_test 对比度
- #
- #
- # #栾尾花数据交叉验证
- # from sklearn.model_selection import cross_val_score #交叉验证可选择好用的model ,参数等等
- # X,y = datasets.load_iris(return_X_y=True)
- # x_train ,x_test,y_train,y_test=train_test_split(X,y,test_size=0.3)
- # # knn=KNeighborsClassifier(n_neighbors=5)
- # # scores=cross_val_score(knn,X,y,cv=5,scoring='accuracy')
- # # print(scores.mean()) #此处是将数据分成5组取出5组的概率,最后取均值
- #
- # #此处判断k值在什么区间这个概率较大
- # # k_range=range(2,31)
- # # k_scores=[]
- # # for k in k_range:
- # # knn = KNeighborsClassifier(n_neighbors=k)
- # # #scores = cross_val_score(knn, X, y, cv=10, scoring='accuracy') #for classfication 选择概率大的
- # # loss = -cross_val_score(knn, X, y, cv=10, scoring='mean_squared_error') #for regression 选择误差小的
- # # k_scores.append(scores.mean())
- # #
- # # plt.plot(k_range,k_scores)
- # # plt.show()
- #
- # import pickle
- # #保存读取model.fit() 数据
- # clf=SVC()
- # iris=datasets.load_iris()
- # X,y=iris.data,iris.target
- # clf.fit(X,y)
- # #
- # # # pickle.dump()
- # # with open('./model_iris' ,'wb') as ff:
- # # pickle.dump(clf, ff)
- # #
- # # with open('./model_iris', 'rb') as ff:
- # # clf2=pickle.load(ff)
- # # print(clf2.predict([X[0:1]]))
- #
- # import joblib
- # joblib.dump(clf,'./model_iris')
- # clf3=joblib.load('./model_iris')
- # print(clf3.predict([X[0:1]]))
学习链接