AdaBoost(Adaptive Boosting的简称)是一种集成学习方法,它的核心思想在于将多个弱学习器组合起来,形成一个强学习器。通过这种方式,AdaBoost能够显著提高分类性能。下面详细介绍AdaBoost的主要概念和工作原理:
AdaBoost被广泛应用于各种分类问题,包括二分类和多分类问题,如人脸识别、客户流失预测、文本分类等领域。
- #coding=utf-8
- #AdaBoostClassifier.py
- import pandas as pd
- from sklearn.model_selection import train_test_split
- #from sklearn.naive_bayes import GaussianNB
- from sklearn.tree import DecisionTreeClassifier
- from sklearn.ensemble import AdaBoostClassifier
-
- # 加载数据
- filename="./glass.data"
- glass_data = pd.read_csv(filename,index_col=0,header=None)
- # 先从DataFrame中取出数组值(.value)
- X,y = glass_data.iloc[:,:-1].values, glass_data.iloc[:,-1].values
- #X,y = glass_data.iloc[:,:-1], glass_data.iloc[:,-1]
- # 划分训练集与测试集
- X_train, X_test, y_train, y_test = train_test_split(
- X, y, shuffle=True, stratify=y, random_state=1)
-
- # 创建基本分类器对象
- #base_clf = GaussianNB()
- base_clf = DecisionTreeClassifier(max_depth=2,random_state=0)
- # 创建AdaBoostingClassifier对象
- ada_clf = AdaBoostClassifier(base_estimator=base_clf,
- random_state=0,n_estimators=1000)
-
- for clf in (base_clf, ada_clf):
- clf.fit(X_train, y_train)
- print(clf.__class__.__name__,"训练集准确率:",
- clf.score(X_train, y_train), sep="")
- print(clf.__class__.__name__,"测试集准确率:",
- clf.score(X_test, y_test), sep="")
- print(clf.__class__.__name__,
- "对测试集前2个样本预测的分类标签:\n",
- clf.predict(X_test[:2]), sep="")
- print(clf.__class__.__name__,
- "对测试集前2个样本预测的分类概率:\n",
- clf.predict_proba(X_test[:2]), sep="")
- print("分类器中的标签排列:",clf.classes_)
- # 概率预测转化为标签预测
- print("根据预测概率推算预测标签:",end="")
- for i in clf.predict_proba(X_test[:2]).argmax(axis=1):
- print(clf.classes_[i], end=" ")
- print()
-
- print("测试集前2个样本的真实标签:",y_test[:2],sep="")