• lightgbm使用multiclass训练二分类模型


    由于lgbm.predict_proba输出的结果维度和样本的数据集不一致,导致dataframe进行列赋值的时候报错,下面可以通过修改num_class来验证

    # -*- coding: utf-8 -*-
    """
    
    @Time  : 2022/7/29 14:48
    @Author: Breeze
    @File  : 多分类测试输出.py
    """
    import sys
    
    from lightgbm import LGBMClassifier
    import numpy as np
    import os
    print(sys.version)
    clf_multiclass = LGBMClassifier()
    n_estimators = 100
    lr = 0.01
    max_depth = 3
    l1 = 0.1
    l2 = 0.1
    subsample_for_bin = 32
    min_child_sample = 32
    num_class = 3
    wd_namelist_model = LGBMClassifier(
            objective='multiclass',  # 'multilogloss
            num_class=num_class,   # 'regression' for LGBMRegressor, 'binary' or 'multiclass' for LGBMClassifier, 'lambdarank' for LGBMRanker.
            n_estimators=n_estimators,
            learning_rate=lr,
            num_leaves=2**max_depth-1, # (0, 2^max_depth - 1]
            colsample_bytree=0.6,
            subsample=0.6,
            max_depth=max_depth,  # 10
            reg_alpha=l1,  # 0.04
            reg_lambda=l2,
            subsample_for_bin=subsample_for_bin,
            subsample_freq=32,  # 12
            min_split_gain=0.01,
            min_child_weight=0.01,
            min_child_sample=min_child_sample,
            silent=False,
            verbose=1,
            # importance_type='gain',
            random_state=2022)
    train_data = np.random.rand(50000, 100)  # 500 entities, each contains 100 features
    train_label = np.random.randint(num_class, size=50000)  # 5 targets
    val_data = np.random.rand(5000, 100)
    val_label = np.random.randint(num_class, size=5000)
    
    # wd_namelist_model.fit(train_data,train_label)
    
    wd_namelist_model.fit(train_data, train_label
                          # , sample_weight=train_weight
    
                          , eval_set=[(train_data, train_label),(val_data, val_label)]
                          # , eval_sample_weight=[train_weight, val_weight]
                          #                           , eval_metric=['auc']
                          , eval_metric=['auc_mu']  #  binary_logloss  multi_logloss,cross_entropy
                          , verbose=10
                          , early_stopping_rounds=50)
    # val_pred = wd_namelist_model.predict(val_data)
    val_pred = wd_namelist_model.predict_proba(val_data)# [0,:]
    
    print(val_pred.shape)

    如果想要用多分类,来训练二分类目标变量,可以通过如下方法获取概率值

    wd_namelist_model.predict_proba(val_data)[0,:len(val_data)]

  • 相关阅读:
    ARP协议-介于数据链路层和网络层之间的协议
    小皮面板为什么还是打不开?
    云原生Kubernetes:K8S集群使用带凭证的harbor仓库
    nginx配置https访问 生成ssl自签名证书,浏览器直接访问
    蓝桥杯练习题——dp
    windos安装Mysql8.0,及解决重新登录异常问题 ERROR 1045 (28000)
    禁止瘟疫清零计划 Project Zero与VR一起启动
    将风险前置
    2022Android开发面试(备战金九银十题库+小技巧)
    java的继承特性和方法重写
  • 原文地址:https://blog.csdn.net/mtj66/article/details/126059400