自闭者主要受到遗传和环境因素的共同影响。由于自闭症是一种谱系障碍,因此每个自闭症患者都有独特的优势和挑战。自闭症患者学习、思考和解决问题的方式可以是高技能的,也可以是严峻的挑战。研究表明,高质量的早期干预可以改善学习、沟通和社交技能,以及潜在的大脑发育。然而诊断过程可能需要数年时间。本项目主要实现自闭者的早期检测(正常vs非正常),为早期筛查和干预提供及时的预警。
自闭者脑电数据集
- from sklearn.metrics import roc_auc_score
- from sklearn.model_selection import train_test_split
- from xgboost import XGBClassifier
- from sklearn.ensemble import RandomForestClassifier
- from sklearn.feature_selection import SelectKBest
- from sklearn.feature_selection import mutual_info_classif,f_classif
- from sklearn.pipeline import Pipeline
- from sklearn.model_selection import cross_val_score,StratifiedKFold
- from sklearn.feature_selection import RFE
- from sklearn.feature_selection import RFECV
- from sklearn.neural_network import MLPClassifier
- from category_encoders.target_encoder import TargetEncoder
- from sklearn.model_selection import GridSearchCV,RandomizedSearchCV
- from sklearn.tree import DecisionTreeClassifier
- from sklearn.linear_model import LogisticRegression
- from sklearn.preprocessing import StandardScaler,RobustScaler
- from category_encoders import MEstimateEncoder
- from sklearn.preprocessing import LabelEncoder
- from imblearn.over_sampling import RandomOverSampler
- from sklearn.inspection import permutation_importance
- from imblearn.over_sampling import SMOTE
- from sklearn.svm import SVC
- from sklearn.ensemble import GradientBoostingClassifier
- from sklearn.ensemble import RandomForestClassifier
- from sklearn.tree import DecisionTreeClassifier
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.linear_model import LogisticRegression
- from sklearn.metrics import confusion_matrix, accuracy_score
- from sklearn.model_selection import train_test_split
- from sklearn.naive_bayes import GaussianNB
- from sklearn.ensemble import StackingClassifier,VotingClassifier
- from sklearn.metrics import roc_auc_score, roc_curve, confusion_matrix, ConfusionMatrixDisplay
-
- train=pd.read_csv('/Autism_Prediction/train.csv')
- test=pd.read_csv('/Autism_Prediction/test.csv')
- np.random.seed(1) #I'm using this because there's some
- #randomness in how the selectors work, without this, in each run we get different results
- kf = StratifiedKFold(n_splits=2, random_state=None,shuffle=False) #for cross validation/ random_state
- # is None because shuffle is False
- score=[]
-
- for train_index, val_index in kf.split(train_set,y):
-
- #indices for train and validation sets
- X_train, X_val =train_set.iloc[train_index,:], train_set.iloc[val_index,:]
- y_train, y_val = y[train_index], y[val_index]
-
- #******************************* CLEANING ***********************************
-
- #for train set
- X_train.ethnicity=X_train.ethnicity.str.replace('others','Others',regex=False)
- X_train.ethnicity=X_train.ethnicity.str.replace('?','Others',regex=False)
- X_train.relation=X_train.relation.str.replace('?','Others',regex=False)
- X_train.relation=X_train.relation.str.replace('Health care professional','Others',regex=False)
-
-
- #for validation set:
- X_val.ethnicity=X_val.ethnicity.str.replace('others','Others',regex=False)
- X_val.ethnicity=X_val.ethnicity.str.replace('?','Others',regex=False)
- X_val.relation=X_val.relation.str.replace('?','Others',regex=False)
- X_val.relation=X_val.relation.str.replace('Health care professional','Others',regex=False)
-
-
- #***************************************ENCODING******************************************
-
- #FOR ENCODING USE THE TRAINING VALUES, DO NOT CALCULATE THEM AGAIN FOR THE TEST SET!
-
- le=LabelEncoder()
- for col in ['jaundice','austim']:
-
- #for the training set:
- X_train[col]=le.fit_transform(X_train[col])
-
- #for the validation set:
- X_val[col]=le.transform(X_val[col])
-
-
- #*********************Encoding Relation Column***************************
-
- #create an encoding map, using the training set, then implementing it on val and test sets
- rel=X_train.relation.value_counts()
- rel=dict(zip(rel.index,range(len(rel))))
-
- #for the training set:
- X_train.relation=X_train.relation.map(rel)
-
- #for the validation set: if there's a category not present in the map, we'll assign sth. to it
- X_val.relation=X_val.relation.map(rel)
- X_val.relation[X_val.relation.isna()]=len(rel)
-
-
-
- #*********************Encoding Ethnicity Column***************************
-
- #create an encoding map, using the training set, then implementing it on val and test sets
- eth=X_train.ethnicity.value_counts()
- eth=dict(zip(eth.index,range(len(eth))))
-
- #for the training set:
- X_train.ethnicity=X_train.ethnicity.map(eth)
-
- #for the validation set: if there's a category not present in the map, we'll assign sth. to it
- X_val.ethnicity=X_val.ethnicity.map(eth)
- X_val.ethnicity[X_val.ethnicity.isna()]=len(eth)
-
-
- #*****************************Encoding Country Of Res******************************
-
- #create an encoding map, using the training set, then implementing it on val and test sets
- cont=X_train.contry_of_res.value_counts()
- cont=dict(zip(cont.index,range(len(cont))))
-
- #for the training set:
- X_train.contry_of_res=X_train.contry_of_res.map(cont)
-
- #for the validation set: if there's a category not present in the map, we'll assign sth. to it
- X_val.contry_of_res=X_val.contry_of_res.map(cont)
- X_val.contry_of_res[X_val.contry_of_res.isna()]=len(cont)
-
- #***************************Age Grouping***********************************
-
- # age_grouper(X_train)
- # age_grouper(X_val)
-
- #*******************************Standardization*************************
- ss=StandardScaler()
- rs=RobustScaler()
- X_train[['result','age']]=rs.fit_transform(X_train[['result','age']])
- X_val[['result','age']]=rs.transform(X_val[['result','age']])
-
model_list = ['KNearestNeighbours', 'DecisionTree', 'LGBM','XGBRF','CatBoostClassifier','RandomForest','Logistic Regression', 'SVC' ]
- # K Neighbors Classifier
-
- kn_clf = KNeighborsClassifier(n_neighbors=6)
- kn_clf.fit(X_train,y_train)
- y_pred=pd.DataFrame(kn_clf.predict_proba(X_val))[1].values
- score.append(roc_auc_score(y_val,y_pred))
-
- np.array(score)
-
- cm = confusion_matrix(y_val, kn_clf.predict(X_val))
- cmd = ConfusionMatrixDisplay(cm)
- cmd.plot();
- #DecissionTree
- dt_clf = DecisionTreeClassifier(max_leaf_nodes=10, random_state=0, criterion='entropy')
- dt_clf.fit(X_train, y_train)
- y_pred=pd.DataFrame(dt_clf.predict_proba(X_val))[1].values
- score.append(roc_auc_score(y_val,y_pred))
-
- np.array(score)
-
- cm = confusion_matrix(y_val, dt_clf.predict(X_val))
- cmd = ConfusionMatrixDisplay(cm)
- cmd.plot();
- # lightgbm
- import lightgbm
- lgb_clf = lightgbm.LGBMClassifier(max_depth=2, random_state=4)
- lgb_clf.fit(X_train, y_train)
- y_pred=pd.DataFrame(lgb_clf.predict_proba(X_val))[1].values
- score.append(roc_auc_score(y_val,y_pred))
-
- np.array(score)
-
- cm = confusion_matrix(y_val, lgb_clf.predict(X_val))
- cmd = ConfusionMatrixDisplay(cm)
- cmd.plot();
相关问题和项目开发,欢迎交流沟通。