人工智能 sklearn(一)
一.鸢尾花数据集(iris)
Iris数据集是常用的分类实验数据集,由Fisher, 1936收集整理。Iris也称鸢尾花卉数据集,是一类多重变量分析的数据集。数据集包含150个数据样本,分为3类,每类50个数据,每个数据包含4个属性。可通过花萼s长度,花萼宽度,花瓣长度,花瓣宽度4个属性预测鸢尾花卉属于(Setosa,Versicolour,Virginica)三个种类中的哪一类。
二.逻辑回归求解鸢尾花问题
import numpy as np #科学计算
import matplotlib.pyplot as plt #画图
import seaborn as sns #画图
import pandas as pd #数据分析
from mpl_toolkits.mplot3d import Axes3D
from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
#导入数据包
iris=datasets.load_iris(as_frame=Ture)
#数据概阔
print(iris.keys())
print (iris["feature_name"])
print (iris["target_name"])
print (iris["target"])
print (iris.frame.head())
print (iris.frame.tail())
print (iris.frame.info())
#绘制二维散点图
plt.plot(iris.frame.loc[iris["target"]==0]["petal length (cm)"],iris.frame.loc[iris["target"]==0]["petal width (cm)"],"rs",lable="setosa") #”rs”:红色方框
plt.plot(iris.frame.loc[iris["target"]==1]["petal length (cm)"],iris.frame.loc[iris["target"]==1]["petal width (cm)"],"bx",lable="versicolour") #”bx”:蓝色×
plt.plot(iris.frame.loc[iris["target"]==2]["petal length (cm)"],iris.frame.loc[iris["target"]==2]["petal width (cm)"],"go",lable="virginica") #”go”:绿色点
plt.xlable("petal length (cm)")
plt.ylabel("peta width (cm)")
plt.title("预测分类边界")
plt.legend(title="species",bbox_to_anchor=(1.05,1),loc="upper left")
plt.rcParams["font.sans-serif"]="SimHei"
plt.show()
#绘制二维图(多种)
sns.pairplot(data=iris.frame,hue="target",palette="coolwarm")
plt.show()
#创建测试集和训练集
X_train,X_test,Y_train,Y_test=train_test_split(iris["data"],iris["target"],test_size=0.25,random_state=(180))
#训练第一个逻辑回归模型(基础模型,没有调节任何超参数)
def basic_logistic_regression(X_train,Y_train,X_test,Y_test):
model=LogisticRegression()
model.fit(X_train,Y_train)
Y_train_pre=model.predict(X_train)
Y_test_pre=model.predict(X_test)
MSE_train=mean_squared_error(Y_train,Y_train_pre)
MSE_test=mean_squared_error(Y_test,Y_test_pre)
print("权重:"+str(model.coef_))
print ("截距:"+str(model.intercept_))
print("训练集均误差:"+str(MSE_train))
print("训练集均误差:" + str(MSE_test))
print("score_train:"+str(model.score(X_train,Y_train)))
print("score_test:"+str(module.score(X_test,Y_test)))
#调用函数
basic_logistic_regression(X_train,Y_train,X_test,Y_test)