• 【信号处理】基于EEG脑电信号的自闭症预测典型方法实现


    理论

    自闭者主要受到遗传和环境因素的共同影响。由于自闭症是一种谱系障碍,因此每个自闭症患者都有独特的优势和挑战。自闭症患者学习、思考和解决问题的方式可以是高技能的,也可以是严峻的挑战。研究表明,高质量的早期干预可以改善学习、沟通和社交技能,以及潜在的大脑发育。然而诊断过程可能需要数年时间。本项目主要实现自闭者的早期检测(正常vs非正常),为早期筛查和干预提供及时的预警。

    工具

    自闭者脑电数据集

    方法实现

    数据加载
    1. from sklearn.metrics import roc_auc_score
    2. from sklearn.model_selection import train_test_split
    3. from xgboost import XGBClassifier
    4. from sklearn.ensemble import RandomForestClassifier
    5. from sklearn.feature_selection import SelectKBest
    6. from sklearn.feature_selection import mutual_info_classif,f_classif
    7. from sklearn.pipeline import Pipeline
    8. from sklearn.model_selection import cross_val_score,StratifiedKFold
    9. from sklearn.feature_selection import RFE
    10. from sklearn.feature_selection import RFECV
    11. from sklearn.neural_network import MLPClassifier
    12. from category_encoders.target_encoder import TargetEncoder
    13. from sklearn.model_selection import GridSearchCV,RandomizedSearchCV
    14. from sklearn.tree import DecisionTreeClassifier
    15. from sklearn.linear_model import LogisticRegression
    16. from sklearn.preprocessing import StandardScaler,RobustScaler
    17. from category_encoders import MEstimateEncoder
    18. from sklearn.preprocessing import LabelEncoder
    19. from imblearn.over_sampling import RandomOverSampler
    20. from sklearn.inspection import permutation_importance
    21. from imblearn.over_sampling import SMOTE
    22. from sklearn.svm import SVC
    23. from sklearn.ensemble import GradientBoostingClassifier
    24. from sklearn.ensemble import RandomForestClassifier
    25. from sklearn.tree import DecisionTreeClassifier
    26. from sklearn.neighbors import KNeighborsClassifier
    27. from sklearn.linear_model import LogisticRegression
    28. from sklearn.metrics import confusion_matrix, accuracy_score
    29. from sklearn.model_selection import train_test_split
    30. from sklearn.naive_bayes import GaussianNB
    31. from sklearn.ensemble import StackingClassifier,VotingClassifier
    32. from sklearn.metrics import roc_auc_score, roc_curve, confusion_matrix, ConfusionMatrixDisplay
    33. train=pd.read_csv('/Autism_Prediction/train.csv')
    34. test=pd.read_csv('/Autism_Prediction/test.csv')
    k-折交叉验证数据划分
    1. np.random.seed(1) #I'm using this because there's some
    2. #randomness in how the selectors work, without this, in each run we get different results
    3. kf = StratifiedKFold(n_splits=2, random_state=None,shuffle=False) #for cross validation/ random_state
    4. # is None because shuffle is False
    5. score=[]
    6. for train_index, val_index in kf.split(train_set,y):
    7. #indices for train and validation sets
    8. X_train, X_val =train_set.iloc[train_index,:], train_set.iloc[val_index,:]
    9. y_train, y_val = y[train_index], y[val_index]
    10. #******************************* CLEANING ***********************************
    11. #for train set
    12. X_train.ethnicity=X_train.ethnicity.str.replace('others','Others',regex=False)
    13. X_train.ethnicity=X_train.ethnicity.str.replace('?','Others',regex=False)
    14. X_train.relation=X_train.relation.str.replace('?','Others',regex=False)
    15. X_train.relation=X_train.relation.str.replace('Health care professional','Others',regex=False)
    16. #for validation set:
    17. X_val.ethnicity=X_val.ethnicity.str.replace('others','Others',regex=False)
    18. X_val.ethnicity=X_val.ethnicity.str.replace('?','Others',regex=False)
    19. X_val.relation=X_val.relation.str.replace('?','Others',regex=False)
    20. X_val.relation=X_val.relation.str.replace('Health care professional','Others',regex=False)
    21. #***************************************ENCODING******************************************
    22. #FOR ENCODING USE THE TRAINING VALUES, DO NOT CALCULATE THEM AGAIN FOR THE TEST SET!
    23. le=LabelEncoder()
    24. for col in ['jaundice','austim']:
    25. #for the training set:
    26. X_train[col]=le.fit_transform(X_train[col])
    27. #for the validation set:
    28. X_val[col]=le.transform(X_val[col])
    29. #*********************Encoding Relation Column***************************
    30. #create an encoding map, using the training set, then implementing it on val and test sets
    31. rel=X_train.relation.value_counts()
    32. rel=dict(zip(rel.index,range(len(rel))))
    33. #for the training set:
    34. X_train.relation=X_train.relation.map(rel)
    35. #for the validation set: if there's a category not present in the map, we'll assign sth. to it
    36. X_val.relation=X_val.relation.map(rel)
    37. X_val.relation[X_val.relation.isna()]=len(rel)
    38. #*********************Encoding Ethnicity Column***************************
    39. #create an encoding map, using the training set, then implementing it on val and test sets
    40. eth=X_train.ethnicity.value_counts()
    41. eth=dict(zip(eth.index,range(len(eth))))
    42. #for the training set:
    43. X_train.ethnicity=X_train.ethnicity.map(eth)
    44. #for the validation set: if there's a category not present in the map, we'll assign sth. to it
    45. X_val.ethnicity=X_val.ethnicity.map(eth)
    46. X_val.ethnicity[X_val.ethnicity.isna()]=len(eth)
    47. #*****************************Encoding Country Of Res******************************
    48. #create an encoding map, using the training set, then implementing it on val and test sets
    49. cont=X_train.contry_of_res.value_counts()
    50. cont=dict(zip(cont.index,range(len(cont))))
    51. #for the training set:
    52. X_train.contry_of_res=X_train.contry_of_res.map(cont)
    53. #for the validation set: if there's a category not present in the map, we'll assign sth. to it
    54. X_val.contry_of_res=X_val.contry_of_res.map(cont)
    55. X_val.contry_of_res[X_val.contry_of_res.isna()]=len(cont)
    56. #***************************Age Grouping***********************************
    57. # age_grouper(X_train)
    58. # age_grouper(X_val)
    59. #*******************************Standardization*************************
    60. ss=StandardScaler()
    61. rs=RobustScaler()
    62. X_train[['result','age']]=rs.fit_transform(X_train[['result','age']])
    63. X_val[['result','age']]=rs.transform(X_val[['result','age']])
     使用不同模型进行数据分类
    model_list = ['KNearestNeighbours', 'DecisionTree', 'LGBM','XGBRF','CatBoostClassifier','RandomForest','Logistic Regression', 'SVC' ]
    k近邻模型
    1. # K Neighbors Classifier
    2. kn_clf = KNeighborsClassifier(n_neighbors=6)
    3. kn_clf.fit(X_train,y_train)
    4. y_pred=pd.DataFrame(kn_clf.predict_proba(X_val))[1].values
    5. score.append(roc_auc_score(y_val,y_pred))
    6. np.array(score)
    7. cm = confusion_matrix(y_val, kn_clf.predict(X_val))
    8. cmd = ConfusionMatrixDisplay(cm)
    9. cmd.plot();

     

     决策树模型
    1. #DecissionTree
    2. dt_clf = DecisionTreeClassifier(max_leaf_nodes=10, random_state=0, criterion='entropy')
    3. dt_clf.fit(X_train, y_train)
    4. y_pred=pd.DataFrame(dt_clf.predict_proba(X_val))[1].values
    5. score.append(roc_auc_score(y_val,y_pred))
    6. np.array(score)
    7. cm = confusion_matrix(y_val, dt_clf.predict(X_val))
    8. cmd = ConfusionMatrixDisplay(cm)
    9. cmd.plot();

     lightgbm模型
    1. # lightgbm
    2. import lightgbm
    3. lgb_clf = lightgbm.LGBMClassifier(max_depth=2, random_state=4)
    4. lgb_clf.fit(X_train, y_train)
    5. y_pred=pd.DataFrame(lgb_clf.predict_proba(X_val))[1].values
    6. score.append(roc_auc_score(y_val,y_pred))
    7. np.array(score)
    8. cm = confusion_matrix(y_val, lgb_clf.predict(X_val))
    9. cmd = ConfusionMatrixDisplay(cm)
    10. cmd.plot();

     

     

    代码获取

    相关问题和项目开发,欢迎交流沟通。

  • 相关阅读:
    【Proteus仿真】8位数码管动态扫描显示变化数据
    Win11不识别蓝牙适配器的解决方法
    ESP32主板-MoonESP32
    Android模拟器中替换库和img
    leetcode 40.组合总和Ⅱ 回溯法求解 (c++版本)
    一文详解常见的基于比较的排序算法【简单易懂】
    【Spring Boot】初识Thymeleaf
    可恶的C指针、运算左右结合性质
    JavaScript基础语法
    百度Java面试题前200页都在这里了
  • 原文地址:https://blog.csdn.net/YINTENAXIONGNAIER/article/details/138019556