import pandas as pd
import os
import matplotlib.pyplot as plt
from hbn.constants import Defaults
# load model data
= os.path.join(Defaults.MODEL_DIR, '2023-03-05_00-08-51-51S/model_410307956')
MODEL_DIR = plt.imread(os.path.join(MODEL_DIR, "out-localspec-20230305T162237.383579/test-roc_auc_score-20230305T162330.839932.png"))
model_auc = pd.read_csv(os.path.join(MODEL_DIR, 'classifier-feature_importance.csv')) df_feat
plt.imshow(model_auc)
<matplotlib.image.AxesImage at 0x7f093d5c7790>
= df_feat[df_feat['clf'] == "DecisionTreeClassifier"]
df_decision_feat = df_feat[df_feat['clf'] == "LogisticRegressionCV"] df_logistic_feat
Top Features by Feature Importance for Decision Tree Classifier
10, 'feature_probabilities_rank_order') df_decision_feat.nlargest(
feature_names_rank_order | feature_probabilities_rank_order | feature_names_common | feature_probabilities_common | feature_sum | feature_names_sum | participants | model | clf | target | features | assessment | domains | measures | abbrevs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | numeric__SDQ,SDQ_27_x | 1.0 | numeric__SDQ,SDQ_27_x | 1.0 | 1.053863 | numeric__SDQ,SDQ_27_x | NDARAA075AMK-NDARAB653ZXP-NDARAL828WXM-NDARAM2... | 20230305T162237.383579 | DecisionTreeClassifier | DX_01_Cat_new_binarize | Parent Measures-all | Parent Measures | all | all | all |
3930 | numeric__ARI_P,ARI_P_03_x | 1.0 | numeric__ARI_P,ARI_P_03_x | 1.0 | 0.000000 | numeric__ARI_P,ARI_P_03_x | NDARAA075AMK-NDARAB653ZXP-NDARAL828WXM-NDARAM2... | 20230305T162237.383579 | DecisionTreeClassifier | DX_01_Cat_new_binarize | Parent Measures-all | Parent Measures | all | all | all |
0 | numeric__SympChck,CSC_35P_y | 0.9 | numeric__SympChck,CSC_35P_y | 0.9 | 5.893814 | numeric__SympChck,CSC_35P_y | NDARAA075AMK-NDARAB653ZXP-NDARAL828WXM-NDARAM2... | 20230305T162237.383579 | DecisionTreeClassifier | DX_01_Cat_new_binarize | Parent Measures-all | Parent Measures | all | all | all |
1271 | numeric__CBCL,CBCL_17_x | 0.7 | numeric__CBCL,CBCL_17_x | 0.7 | 0.000000 | numeric__CBCL,CBCL_27_y | NDARAA075AMK-NDARAB653ZXP-NDARAL828WXM-NDARAM2... | 20230305T162237.383579 | DecisionTreeClassifier | DX_01_Cat_new_binarize | Parent Measures-all | Parent Measures | all | all | all |
1272 | numeric__CBCL,CBCL_16_x | 0.7 | numeric__CBCL,CBCL_16_x | 0.7 | 0.000000 | numeric__CBCL,CBCL_26_y | NDARAA075AMK-NDARAB653ZXP-NDARAL828WXM-NDARAM2... | 20230305T162237.383579 | DecisionTreeClassifier | DX_01_Cat_new_binarize | Parent Measures-all | Parent Measures | all | all | all |
1273 | numeric__CBCL,CBCL_15_x | 0.7 | numeric__CBCL,CBCL_15_x | 0.7 | 0.000000 | numeric__CBCL,CBCL_25_y | NDARAA075AMK-NDARAB653ZXP-NDARAL828WXM-NDARAM2... | 20230305T162237.383579 | DecisionTreeClassifier | DX_01_Cat_new_binarize | Parent Measures-all | Parent Measures | all | all | all |
1274 | numeric__CBCL,CBCL_14_x | 0.7 | numeric__CBCL,CBCL_14_x | 0.7 | 0.000000 | numeric__CBCL,CBCL_24_y | NDARAA075AMK-NDARAB653ZXP-NDARAL828WXM-NDARAM2... | 20230305T162237.383579 | DecisionTreeClassifier | DX_01_Cat_new_binarize | Parent Measures-all | Parent Measures | all | all | all |
1275 | numeric__CBCL,CBCL_13_x | 0.7 | numeric__CBCL,CBCL_13_x | 0.7 | 0.000000 | numeric__CBCL,CBCL_23_y | NDARAA075AMK-NDARAB653ZXP-NDARAL828WXM-NDARAM2... | 20230305T162237.383579 | DecisionTreeClassifier | DX_01_Cat_new_binarize | Parent Measures-all | Parent Measures | all | all | all |
1276 | numeric__CBCL,CBCL_12_x | 0.7 | numeric__CBCL,CBCL_12_x | 0.7 | 0.000000 | numeric__CBCL,CBCL_22_y | NDARAA075AMK-NDARAB653ZXP-NDARAL828WXM-NDARAM2... | 20230305T162237.383579 | DecisionTreeClassifier | DX_01_Cat_new_binarize | Parent Measures-all | Parent Measures | all | all | all |
1277 | numeric__CBCL,CBCL_113C_x | 0.7 | numeric__CBCL,CBCL_113C_x | 0.7 | 0.000000 | numeric__CBCL,CBCL_21_y | NDARAA075AMK-NDARAB653ZXP-NDARAL828WXM-NDARAM2... | 20230305T162237.383579 | DecisionTreeClassifier | DX_01_Cat_new_binarize | Parent Measures-all | Parent Measures | all | all | all |
10, 'feature_probabilities_rank_order').plot.barh(x = 'feature_names_rank_order', y = 'feature_probabilities_rank_order') df_decision_feat.nlargest(
<AxesSubplot:ylabel='feature_names_rank_order'>
Top Features by Feature Importance for Logistic Regression CV
10, 'feature_probabilities_rank_order') df_logistic_feat.nlargest(
feature_names_rank_order | feature_probabilities_rank_order | feature_names_common | feature_probabilities_common | feature_sum | feature_names_sum | participants | model | clf | target | features | assessment | domains | measures | abbrevs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
3931 | numeric__ESWAN,MDD_8A_x | 0.4 | numeric__ESWAN,MDD_8A_x | 0.4 | 7.978700 | numeric__PreInt_TxHx,Past_DX | NDARAA075AMK-NDARAB653ZXP-NDARAL828WXM-NDARAM2... | 20230305T162237.383579 | LogisticRegressionCV | DX_01_Cat_new_binarize | Parent Measures-all | Parent Measures | all | all | all |
3932 | numeric__PreInt_TxHx,Past_DX | 0.4 | numeric__PreInt_TxHx,Past_DX | 0.4 | 7.283470 | numeric__ESWAN,MDD_8A_x | NDARAA075AMK-NDARAB653ZXP-NDARAL828WXM-NDARAM2... | 20230305T162237.383579 | LogisticRegressionCV | DX_01_Cat_new_binarize | Parent Measures-all | Parent Measures | all | all | all |
3934 | numeric__PreInt_TxHx,psych_meds_cur | 0.4 | numeric__PreInt_TxHx,psych_meds_cur | 0.4 | 4.912422 | numeric__SDQ,SDQ_15_x | NDARAA075AMK-NDARAB653ZXP-NDARAL828WXM-NDARAM2... | 20230305T162237.383579 | LogisticRegressionCV | DX_01_Cat_new_binarize | Parent Measures-all | Parent Measures | all | all | all |
7860 | numeric__PreInt_Demos_Home,living_12_y | 0.4 | numeric__PreInt_Demos_Home,living_12_y | 0.4 | -8.063588 | numeric__SympChck,CSC_55fC_y | NDARAA075AMK-NDARAB653ZXP-NDARAL828WXM-NDARAM2... | 20230305T162237.383579 | LogisticRegressionCV | DX_01_Cat_new_binarize | Parent Measures-all | Parent Measures | all | all | all |
3937 | numeric__SCQ,SCQ_31_y | 0.3 | numeric__SCQ,SCQ_31_y | 0.3 | 3.852732 | numeric__SDQ,SDQ_26_x | NDARAA075AMK-NDARAB653ZXP-NDARAL828WXM-NDARAM2... | 20230305T162237.383579 | LogisticRegressionCV | DX_01_Cat_new_binarize | Parent Measures-all | Parent Measures | all | all | all |
4718 | numeric__SRS_Pre,SRS_P_63_y | 0.3 | numeric__SRS_Pre,SRS_P_63_y | 0.3 | 0.020622 | numeric__CBCL,CBCL_Ext_T_y | NDARAA075AMK-NDARAB653ZXP-NDARAL828WXM-NDARAM2... | 20230305T162237.383579 | LogisticRegressionCV | DX_01_Cat_new_binarize | Parent Measures-all | Parent Measures | all | all | all |
5174 | numeric__CBCL,CBCL_12_y | 0.3 | numeric__CBCL,CBCL_12_y | 0.3 | 0.000000 | numeric__SCARED_P,SCARED_P_25_x | NDARAA075AMK-NDARAB653ZXP-NDARAL828WXM-NDARAM2... | 20230305T162237.383579 | LogisticRegressionCV | DX_01_Cat_new_binarize | Parent Measures-all | Parent Measures | all | all | all |
5175 | numeric__CBCL,CBCL_113C_y | 0.3 | numeric__CBCL,CBCL_113C_y | 0.3 | 0.000000 | numeric__SRS,SRS_12_x | NDARAA075AMK-NDARAB653ZXP-NDARAL828WXM-NDARAM2... | 20230305T162237.383579 | LogisticRegressionCV | DX_01_Cat_new_binarize | Parent Measures-all | Parent Measures | all | all | all |
5458 | numeric__CBCL,CBCL_56G_x | 0.3 | numeric__CBCL,CBCL_56G_x | 0.3 | 0.000000 | numeric__Barratt,Barratt_P1_Occ_y | NDARAA075AMK-NDARAB653ZXP-NDARAL828WXM-NDARAM2... | 20230305T162237.383579 | LogisticRegressionCV | DX_01_Cat_new_binarize | Parent Measures-all | Parent Measures | all | all | all |
6007 | numeric__PreInt_Demos_Fam,P1_Weight | 0.3 | numeric__PreInt_Demos_Fam,P1_Weight | 0.3 | 0.000000 | numeric__NLES_P,NLES_P_13b_y | NDARAA075AMK-NDARAB653ZXP-NDARAL828WXM-NDARAM2... | 20230305T162237.383579 | LogisticRegressionCV | DX_01_Cat_new_binarize | Parent Measures-all | Parent Measures | all | all | all |
10, 'feature_probabilities_rank_order').plot.barh(x = 'feature_names_rank_order', y = 'feature_probabilities_rank_order') df_logistic_feat.nlargest(
<AxesSubplot:ylabel='feature_names_rank_order'>