• sklearn 机器学习基本用法


    1. # # 科学计算模块
    2. # import numpy as np
    3. # import pandas as pd
    4. # # 绘图模块
    5. # import matplotlib as mpl
    6. # import matplotlib.pyplot as plt
    7. # from sklearn.linear_model import LinearRegression
    8. # from sklearn import datasets
    9. # from sklearn.model_selection import train_test_split
    10. # from sklearn.neighbors import KNeighborsClassifier
    11. # from sklearn import preprocessing
    12. # from sklearn.svm import SVC
    13. # #栾尾花数据预测
    14. # # X,y = datasets.load_iris(return_X_y=True)
    15. # # x_train ,x_test,y_train,y_test=train_test_split(X,y,test_size=0.3)
    16. # # knn=KNeighborsClassifier()
    17. # # knn.fit(x_train,y_train)
    18. # # print(knn.predict(x_test))
    19. # # print(y_test)
    20. # #knn.coef_ 斜率
    21. # #knn.intercept_ 截距 y=0.3x+6
    22. #
    23. # #datasets 自定义数据
    24. # # X,y = datasets.make_regression(n_samples=100,n_features=2,n_targets=2,noise=2)
    25. # # plt.scatter(X,y)
    26. # # plt.show()
    27. #
    28. # #标准数字化
    29. # # X,y=datasets.make_classification(n_samples=300,n_features=2,n_redundant=0,n_informative=2,random_state=22,n_clusters_per_class=1,scale=100)
    30. # # # sp=np.array(X).shape
    31. # # print(np.array(y).shape)
    32. # # # plt.scatter(X[:,0],X[:,1],c=y)
    33. # # # plt.show()
    34. # # X=preprocessing.scale(X) #因为数据差距比较大,此处将数据标准化,可增加预测相似度
    35. # # x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.3)
    36. # # clf=SVC()
    37. # # clf.fit(x_train,y_train)
    38. # # print(clf.score(x_test,y_test)) #评估预测集,与预测集的相似度,相当于predict 与 y_test 对比度
    39. #
    40. #
    41. # #栾尾花数据交叉验证
    42. # from sklearn.model_selection import cross_val_score #交叉验证可选择好用的model ,参数等等
    43. # X,y = datasets.load_iris(return_X_y=True)
    44. # x_train ,x_test,y_train,y_test=train_test_split(X,y,test_size=0.3)
    45. # # knn=KNeighborsClassifier(n_neighbors=5)
    46. # # scores=cross_val_score(knn,X,y,cv=5,scoring='accuracy')
    47. # # print(scores.mean()) #此处是将数据分成5组取出5组的概率,最后取均值
    48. #
    49. # #此处判断k值在什么区间这个概率较大
    50. # # k_range=range(2,31)
    51. # # k_scores=[]
    52. # # for k in k_range:
    53. # # knn = KNeighborsClassifier(n_neighbors=k)
    54. # # #scores = cross_val_score(knn, X, y, cv=10, scoring='accuracy') #for classfication 选择概率大的
    55. # # loss = -cross_val_score(knn, X, y, cv=10, scoring='mean_squared_error') #for regression 选择误差小的
    56. # # k_scores.append(scores.mean())
    57. # #
    58. # # plt.plot(k_range,k_scores)
    59. # # plt.show()
    60. #
    61. # import pickle
    62. # #保存读取model.fit() 数据
    63. # clf=SVC()
    64. # iris=datasets.load_iris()
    65. # X,y=iris.data,iris.target
    66. # clf.fit(X,y)
    67. # #
    68. # # # pickle.dump()
    69. # # with open('./model_iris' ,'wb') as ff:
    70. # # pickle.dump(clf, ff)
    71. # #
    72. # # with open('./model_iris', 'rb') as ff:
    73. # # clf2=pickle.load(ff)
    74. # # print(clf2.predict([X[0:1]]))
    75. #
    76. # import joblib
    77. # joblib.dump(clf,'./model_iris')
    78. # clf3=joblib.load('./model_iris')
    79. # print(clf3.predict([X[0:1]]))

    学习链接

    11 Save_哔哩哔哩_bilibili

  • 相关阅读:
    Linkstech多核并行仿真丨光伏发电系统模型及IEEE 39 bus模型多核并行实测
    基于springboot实现医患档案管理系统项目【项目源码】计算机毕业设计
    Jenkins-jenkins凭证管理与代码拉取
    JAVA计算机毕业设计自由教学平台Mybatis+源码+数据库+lw文档+系统+调试部署
    MSDC 4.3 接口规范(2)
    Task08|文本数据|joyfulpandas
    Docker的安装与基础命令
    2023.09.09青少年软件编程(Python)等级考试试卷(一级)
    在基于ABP框架的前端项目Vue&Element项目中采用电子签章处理文件和打印处理
    【ESP32S3】VSCode 开发环境搭建
  • 原文地址:https://blog.csdn.net/Steven_yang_1/article/details/133814300