• sklearn 机器学习基本用法


    1. # # 科学计算模块
    2. # import numpy as np
    3. # import pandas as pd
    4. # # 绘图模块
    5. # import matplotlib as mpl
    6. # import matplotlib.pyplot as plt
    7. # from sklearn.linear_model import LinearRegression
    8. # from sklearn import datasets
    9. # from sklearn.model_selection import train_test_split
    10. # from sklearn.neighbors import KNeighborsClassifier
    11. # from sklearn import preprocessing
    12. # from sklearn.svm import SVC
    13. # #栾尾花数据预测
    14. # # X,y = datasets.load_iris(return_X_y=True)
    15. # # x_train ,x_test,y_train,y_test=train_test_split(X,y,test_size=0.3)
    16. # # knn=KNeighborsClassifier()
    17. # # knn.fit(x_train,y_train)
    18. # # print(knn.predict(x_test))
    19. # # print(y_test)
    20. # #knn.coef_ 斜率
    21. # #knn.intercept_ 截距 y=0.3x+6
    22. #
    23. # #datasets 自定义数据
    24. # # X,y = datasets.make_regression(n_samples=100,n_features=2,n_targets=2,noise=2)
    25. # # plt.scatter(X,y)
    26. # # plt.show()
    27. #
    28. # #标准数字化
    29. # # X,y=datasets.make_classification(n_samples=300,n_features=2,n_redundant=0,n_informative=2,random_state=22,n_clusters_per_class=1,scale=100)
    30. # # # sp=np.array(X).shape
    31. # # print(np.array(y).shape)
    32. # # # plt.scatter(X[:,0],X[:,1],c=y)
    33. # # # plt.show()
    34. # # X=preprocessing.scale(X) #因为数据差距比较大,此处将数据标准化,可增加预测相似度
    35. # # x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.3)
    36. # # clf=SVC()
    37. # # clf.fit(x_train,y_train)
    38. # # print(clf.score(x_test,y_test)) #评估预测集,与预测集的相似度,相当于predict 与 y_test 对比度
    39. #
    40. #
    41. # #栾尾花数据交叉验证
    42. # from sklearn.model_selection import cross_val_score #交叉验证可选择好用的model ,参数等等
    43. # X,y = datasets.load_iris(return_X_y=True)
    44. # x_train ,x_test,y_train,y_test=train_test_split(X,y,test_size=0.3)
    45. # # knn=KNeighborsClassifier(n_neighbors=5)
    46. # # scores=cross_val_score(knn,X,y,cv=5,scoring='accuracy')
    47. # # print(scores.mean()) #此处是将数据分成5组取出5组的概率,最后取均值
    48. #
    49. # #此处判断k值在什么区间这个概率较大
    50. # # k_range=range(2,31)
    51. # # k_scores=[]
    52. # # for k in k_range:
    53. # # knn = KNeighborsClassifier(n_neighbors=k)
    54. # # #scores = cross_val_score(knn, X, y, cv=10, scoring='accuracy') #for classfication 选择概率大的
    55. # # loss = -cross_val_score(knn, X, y, cv=10, scoring='mean_squared_error') #for regression 选择误差小的
    56. # # k_scores.append(scores.mean())
    57. # #
    58. # # plt.plot(k_range,k_scores)
    59. # # plt.show()
    60. #
    61. # import pickle
    62. # #保存读取model.fit() 数据
    63. # clf=SVC()
    64. # iris=datasets.load_iris()
    65. # X,y=iris.data,iris.target
    66. # clf.fit(X,y)
    67. # #
    68. # # # pickle.dump()
    69. # # with open('./model_iris' ,'wb') as ff:
    70. # # pickle.dump(clf, ff)
    71. # #
    72. # # with open('./model_iris', 'rb') as ff:
    73. # # clf2=pickle.load(ff)
    74. # # print(clf2.predict([X[0:1]]))
    75. #
    76. # import joblib
    77. # joblib.dump(clf,'./model_iris')
    78. # clf3=joblib.load('./model_iris')
    79. # print(clf3.predict([X[0:1]]))

    学习链接

    11 Save_哔哩哔哩_bilibili

  • 相关阅读:
    传统考勤太复杂怎么办?这个小技巧,我必须吹爆!
    频繁GC问题原因总结
    leetcode 2366. Minimum Replacements to Sort the Array(数组排序的最少替换数)
    Go语言学习笔记——错误处理
    解决Drag and drop is not supported导致无法将物理机上的文件拖入Ubuntu
    SQL不同类型分组排序
    JavaSE - 数组
    hdu 3549 Flow Problem(简单网络流Dinic)
    HTTP版本、状态码
    java读取配置文件工具类
  • 原文地址:https://blog.csdn.net/Steven_yang_1/article/details/133814300