import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from hbn.constants import Defaults
from hbn.visualization import visualize
import warnings
"ignore")
warnings.filterwarnings(
%load_ext autoreload
%autoreload 2
%matplotlib inline
#pio.renderers.default = 'iframe'
import plotly.io as pio
='notebook'
pio.renderers.default
#import ipywidgets as widgets # interactive display
#%config InlineBackend.figure_format = 'svg' # other available formats are: 'retina', 'png', 'jpeg', 'pdf'
Models are trained to classify diagnoses across 5 major disorders: ADHD, ASD, Anxiety, Depression, and Reading Impairment
# check models
from hbn.models.predictive_modeling import check_models
filter='*all_feature_models/*') check_models(
2023-04-06_11-33-21-21S-57: ['No Diagnosis Given', 'Generalized Anxiety Disorder', 'Social Anxiety (Social Phobia)', 'Specific Phobia', 'Separation Anxiety', 'Other Specified Anxiety Disorder', 'Selective Mutism', 'Panic Disorder', 'Unspecified Anxiety Disorder', 'Agoraphobia']: ['DX_01_Cat_new_binarize']: ['female']
2023-03-22_17-43-23-23S-41: ['No Diagnosis Given', 'Major Depressive Disorder', 'Persistent Depressive Disorder (Dysthymia)', 'Disruptive Mood Dysregulation Disorder', 'Other Specified Depressive Disorder', 'Depressive Disorder Due to Another Medical Condition']: ['DX_01_Cat_new_binarize']: ['female']
2023-03-16_15-03-36-30: ['No Diagnosis Given', 'ADHD-Combined Type', 'ADHD-Inattentive Type', 'Other Specified Attention-Deficit/Hyperactivity Disorder', 'ADHD-Hyperactive/Impulsive Type', 'Unspecified Attention-Deficit/Hyperactivity Disorder']: ['DX_01_Cat_new_binarize']: ['female', 'male']
2023-03-19_15-30-25-25S-53: ['ADHD-Combined Type', 'ADHD-Inattentive Type', 'No Diagnosis Given', 'Other Specified Attention-Deficit/Hyperactivity Disorder', 'ADHD-Hyperactive/Impulsive Type', 'Unspecified Attention-Deficit/Hyperactivity Disorder']: ['DX_01_Cat_new_binarize']: ['male']
2023-03-22_17-43-23-23S-13: ['No Diagnosis Given', 'Other Specified Depressive Disorder', 'Major Depressive Disorder', 'Disruptive Mood Dysregulation Disorder', 'Persistent Depressive Disorder (Dysthymia)']: ['DX_01_Cat_new_binarize']: ['male']
2023-03-21_17-42-28-28S-61: ['No Diagnosis Given', 'Autism Spectrum Disorder']: ['DX_01_Cat_new_binarize']: ['female', 'male']
2023-03-19_15-45-21-21S-72: ['No Diagnosis Given', 'Specific Learning Disorder with Impairment in Reading']: ['DX_01_Cat_new_binarize']: ['female', 'male']
2023-03-17_16-20-51-94: ['No Diagnosis Given', 'ADHD-Combined Type', 'ADHD-Inattentive Type', 'Other Specified Attention-Deficit/Hyperactivity Disorder', 'ADHD-Hyperactive/Impulsive Type', 'Unspecified Attention-Deficit/Hyperactivity Disorder']: ['DX_01_Cat_new_binarize']: ['female', 'male']
2023-03-22_17-43-14-14S-30: ['No Diagnosis Given', 'Other Specified Anxiety Disorder', 'Generalized Anxiety Disorder', 'Separation Anxiety', 'Social Anxiety (Social Phobia)', 'Specific Phobia', 'Unspecified Anxiety Disorder', 'Agoraphobia']: ['DX_01_Cat_new_binarize']: ['male']
2023-04-05_17-24-04-04S-11: ['No Diagnosis Given', 'Specific Learning Disorder with Impairment in Reading']: ['DX_01_Cat_new_binarize']: ['female']
2023-03-19_15-31-26-26S-7: ['No Diagnosis Given', 'Autism Spectrum Disorder']: ['DX_01_Cat_new_binarize']: ['female']
2023-03-22_17-43-05-05S-87: ['No Diagnosis Given', 'Specific Learning Disorder with Impairment in Reading']: ['DX_01_Cat_new_binarize']: ['male']
2023-03-19_15-30-01-01S-5: ['No Diagnosis Given', 'Other Specified Depressive Disorder', 'Major Depressive Disorder', 'Disruptive Mood Dysregulation Disorder', 'Persistent Depressive Disorder (Dysthymia)', 'Depressive Disorder Due to Another Medical Condition']: ['DX_01_Cat_new_binarize']: ['female', 'male']
2023-04-05_17-22-04-04S-21: ['No Diagnosis Given', 'Generalized Anxiety Disorder', 'Other Specified Anxiety Disorder', 'Social Anxiety (Social Phobia)', 'Separation Anxiety', 'Specific Phobia', 'Selective Mutism', 'Unspecified Anxiety Disorder', 'Panic Disorder', 'Agoraphobia']: ['DX_01_Cat_new_binarize']: ['female', 'male']
2023-04-05_17-24-04-04S-8: ['ADHD-Combined Type', 'ADHD-Inattentive Type', 'No Diagnosis Given', 'Other Specified Attention-Deficit/Hyperactivity Disorder', 'ADHD-Hyperactive/Impulsive Type', 'Unspecified Attention-Deficit/Hyperactivity Disorder']: ['DX_01_Cat_new_binarize']: ['male']
2023-04-06_11-33-19-19S-98: ['No Diagnosis Given', 'Other Specified Depressive Disorder', 'Major Depressive Disorder', 'Disruptive Mood Dysregulation Disorder', 'Persistent Depressive Disorder (Dysthymia)']: ['DX_01_Cat_new_binarize']: ['male']
2023-04-05_08-38-54-54S-26: ['No Diagnosis Given', 'Autism Spectrum Disorder']: ['DX_01_Cat_new_binarize']: ['female']
2023-04-05_08-38-54-54S-42: ['No Diagnosis Given', 'Major Depressive Disorder', 'Persistent Depressive Disorder (Dysthymia)', 'Disruptive Mood Dysregulation Disorder', 'Other Specified Depressive Disorder', 'Depressive Disorder Due to Another Medical Condition']: ['DX_01_Cat_new_binarize']: ['female']
2023-04-01_18-18-41-72: ['No Diagnosis Given', 'ADHD-Combined Type', 'ADHD-Inattentive Type', 'Other Specified Attention-Deficit/Hyperactivity Disorder', 'ADHD-Hyperactive/Impulsive Type', 'Unspecified Attention-Deficit/Hyperactivity Disorder']: ['DX_01_Cat_new_binarize']: ['female', 'male']
2023-04-05_17-24-04-04S-31: ['No Diagnosis Given', 'Specific Learning Disorder with Impairment in Reading']: ['DX_01_Cat_new_binarize']: ['male']
2023-03-22_16-13-13-13S-76: ['No Diagnosis Given', 'Autism Spectrum Disorder']: ['DX_01_Cat_new_binarize']: ['male']
2023-04-06_11-33-22-22S-60: ['No Diagnosis Given', 'ADHD-Inattentive Type', 'ADHD-Combined Type', 'Other Specified Attention-Deficit/Hyperactivity Disorder', 'ADHD-Hyperactive/Impulsive Type', 'Unspecified Attention-Deficit/Hyperactivity Disorder']: ['DX_01_Cat_new_binarize']: ['female']
2023-03-19_15-30-41-41S-99: ['No Diagnosis Given', 'ADHD-Inattentive Type', 'ADHD-Combined Type', 'Other Specified Attention-Deficit/Hyperactivity Disorder', 'ADHD-Hyperactive/Impulsive Type', 'Unspecified Attention-Deficit/Hyperactivity Disorder']: ['DX_01_Cat_new_binarize']: ['female']
2023-04-05_17-26-05-05S-96: ['No Diagnosis Given', 'Other Specified Anxiety Disorder', 'Generalized Anxiety Disorder', 'Separation Anxiety', 'Social Anxiety (Social Phobia)', 'Specific Phobia', 'Unspecified Anxiety Disorder', 'Agoraphobia']: ['DX_01_Cat_new_binarize']: ['male']
2023-04-05_17-24-04-04S-97: ['No Diagnosis Given', 'Specific Learning Disorder with Impairment in Reading']: ['DX_01_Cat_new_binarize']: ['female', 'male']
2023-03-22_17-43-19-19S-66: ['No Diagnosis Given', 'Generalized Anxiety Disorder', 'Social Anxiety (Social Phobia)', 'Specific Phobia', 'Separation Anxiety', 'Other Specified Anxiety Disorder', 'Selective Mutism', 'Panic Disorder', 'Unspecified Anxiety Disorder', 'Agoraphobia']: ['DX_01_Cat_new_binarize']: ['female']
2023-04-03_10-31-05-05S-20: ['No Diagnosis Given', 'Autism Spectrum Disorder']: ['DX_01_Cat_new_binarize']: ['female', 'male']
2023-04-05_08-38-55-55S-54: ['No Diagnosis Given', 'Other Specified Depressive Disorder', 'Major Depressive Disorder', 'Disruptive Mood Dysregulation Disorder', 'Persistent Depressive Disorder (Dysthymia)', 'Depressive Disorder Due to Another Medical Condition']: ['DX_01_Cat_new_binarize']: ['female', 'male']
2023-04-05_08-38-53-53S-82: ['No Diagnosis Given', 'Autism Spectrum Disorder']: ['DX_01_Cat_new_binarize']: ['male']
2023-03-22_17-43-10-10S-35: ['No Diagnosis Given', 'Specific Learning Disorder with Impairment in Reading']: ['DX_01_Cat_new_binarize']: ['female']
# # 2023 models
# models_dict = {
# 'Anxiety': 'all_feature_models/2023-03-09_14-35-10-10S-77',
# 'Depression': 'all_feature_models/2023-03-09_14-35-15-15S-12',
# 'ADHD': 'all_feature_models/2023-03-09_14-31-02-61',
# 'ASD': 'all_feature_models/2023-03-09_14-35-06-06S-61',
# 'Learning-Disorder-Reading-Impairment': 'all_feature_models/2023-03-09_14-35-19-19S-5',
# 'ASD-females': 'all_feature_models/2023-03-09_14-35-37-37S-20',
# 'ASD-males': 'all_feature_models/2023-03-09_14-35-31-31S-25',
# 'ADHD-females': 'all_feature_models/2023-03-09_14-35-27-27S-21',
# 'ADHD-males': 'all_feature_models/2023-03-09_14-35-23-23S-59'
# }
= { 'ADHD-incl-comorbidities': 'all_feature_models/2023-03-16_15-03-36-30', # incl comobordities
models_dict 'ADHD': 'all_feature_models/2023-03-17_16-20-51-94',
'ADHD-females': 'all_feature_models/2023-03-19_15-30-41-41S-99',
'ADHD-males': 'all_feature_models/2023-03-19_15-30-25-25S-53',
'ADHD-fullmodel': 'all_feature_models/2023-04-01_18-18-41-72',
'ADHD-fullmodel-males': 'all_feature_models/2023-04-05_17-24-04-04S-8',
'ADHD-fullmodel-females': 'all_feature_models/2023-04-06_11-33-22-22S-60',
'ASD': 'all_feature_models/2023-03-21_17-42-28-28S-61',
'ASD-females': 'all_feature_models/2023-03-19_15-31-26-26S-7',
'ASD-males': 'all_feature_models/2023-03-22_16-13-13-13S-76',
'ASD-fullmodel': 'all_feature_models/2023-04-03_10-31-05-05S-20',
'ASD-fullmodel-females': 'all_feature_models/2023-04-05_08-38-54-54S-26',
'ASD-fullmodel-males': 'all_feature_models/2023-04-05_08-38-53-53S-82',
'Depression': 'all_feature_models/2023-03-19_15-30-01-01S-5',
'Depression-females': 'all_feature_models/2023-03-22_17-43-23-23S-41',
'Depression-males': 'all_feature_models/2023-03-22_17-43-23-23S-13',
'Depression-fullmodel': 'all_feature_models/2023-04-05_08-38-55-55S-54',
'Depression-fullmodel-females': 'all_feature_models/2023-04-05_08-38-54-54S-42',
'Depression-fullmodel-males': 'all_feature_models/2023-04-06_11-33-19-19S-98',
'Anxiety-females': 'all_feature_models/2023-03-22_17-43-19-19S-66',
'Anxiety-males': 'all_feature_models/2023-03-22_17-43-14-14S-30',
'Anxiety-fullmodel': 'all_feature_models/2023-04-05_17-22-04-04S-21',
'Anxiety-fullmodel-males':'all_feature_models/2023-04-05_17-26-05-05S-96',
'Anxiety-fullmodel-females': 'all_feature_models/2023-04-06_11-33-21-21S-57',
'Reading Impairment': 'all_feature_models/2023-03-19_15-45-21-21S-72',
'Reading Impairment-females': 'all_feature_models/2023-03-22_17-43-10-10S-35',
'Reading Impairment-males': 'all_feature_models/2023-03-19_15-31-26-26S-7',
'Reading Impairment-fullmodel': 'all_feature_models/2023-04-05_17-24-04-04S-97',
'Reading Impairment-fullmodel-females': 'all_feature_models/2023-04-05_17-24-04-04S-11',
'Reading Impairment-fullmodel-males': 'all_feature_models/2023-04-05_17-24-04-04S-31',
}
# loop over models
= pd.DataFrame()
df_all = pd.DataFrame()
df_features for key, value in models_dict.items():
= os.path.join(Defaults.MODEL_DIR, value)
MODEL_DIR
= pd.read_csv(os.path.join(MODEL_DIR, 'classifier-all-phenotypic-models-performance.csv'))
df_classify 'data'] = df_classify['data'].map({'model-data': 'null', 'model-null': 'data'})
df_classify['participant_group'] = key
df_classify[
try:
= pd.read_csv(os.path.join(MODEL_DIR, 'classifier-feature_importance.csv'))
df_feature 'participant_group'] = key
df_feature[= pd.concat([df_features, df_feature])
df_features except:
pass
= pd.concat([df_all, df_classify]) df_all
# set plotting style
visualize.plotting_style()
- Each figure describes a model (Decision Tree Classifier) trained on either all assessments OR one assessment (e.g., Parent, Child, Teacher) across multiple domains (e.g., Language Tasks, Cognitive Testing)
- Models (x axis) show ROC AUC results
Exceptions:
- Demographics are included in these models: race and ethnicity, sex, age, diagnosis subtype and comorbidities are NOT INCLUDED
- Only numeric variables are included (no categorical variables)
- SMOTE (minor class upsampling) is applied to models
Full feature models (Child + Teacher + Parent)
#df = df_all[df_all['assessment']=='all']
= ['ADHD-fullmodel', 'Depression-fullmodel', 'Anxiety-fullmodel', 'Reading Impairment-fullmodel', 'ASD-fullmodel']
to_keep
= df_all[(df_all['participant_group'].isin(to_keep)) &
df1 'target']=='DX_01_Cat_new_binarize')]
(df_all['participant_group'] = df1['participant_group'].str.replace("-fullmodel","")
df1[
=df1, x='participant_group', y='roc_auc_score', title='') visualize.predictive_modeling_group(df
Full feature models (Child + Teacher + Parent) - Males/Females
= df_all[df_all['participant_group'].str.contains('-fullmodel-females|-fullmodel-males') &
df1 'target']=='DX_01_Cat_new_binarize')]
(df_all['participant_group'] = df1['participant_group'].str.replace("-fullmodel","")
df1[
=df1, x='participant_group', y='roc_auc_score', title='') visualize.predictive_modeling_group(df
ADHD - difference between including comobidities in a basic demographic model versus excluding it from model
demographic variables are race, ethnicity, age
= df_all[df_all['measures']=='Demographics']
df
= df[(df['target']=='DX_01_Cat_new_binarize')]
df1
=df1, x='participant_group', y='roc_auc_score', title='') visualize.predictive_modeling_group(df
All features - ADHD - Parent vs. Child vs. Teacher
= df_all[df_all['participant_group'].isin(['ADHD'])]
df 'new_col'] = df['assessment'] + '-' + df['abbrevs']
df[
= df[(df['target']=='DX_01_Cat_new_binarize')]
df1 'new_col'] = df1['new_col'].map({'Child Measures-all': 'Child',
df1['Parent Measures-Basic_Demos': 'Demographics',
'Parent Measures-all': 'Parent',
'Teacher Measures-all': 'Teacher',
'all-all': 'Full Model'})
=df1, x='new_col', y='roc_auc_score', title='') visualize.predictive_modeling_group(df
All features - ADHD (females) - Parent vs. Child vs. Teacher
= df_all[df_all['participant_group']=='ADHD-females']
df 'new_col'] = df['assessment'] + '-' + df['abbrevs']
df[
= df[(df['target']=='DX_01_Cat_new_binarize')]
df1
=df1, x='new_col', y='roc_auc_score', title='') visualize.predictive_modeling_group(df
All features - ADHD (males) - Parent vs. Child vs. Teacher
= df_all[df_all['participant_group']=='ADHD-males']
df 'new_col'] = df['assessment'] + '-' + df['abbrevs']
df[
= df[(df['target']=='DX_01_Cat_new_binarize')]
df1
=df1, x='new_col', y='roc_auc_score', title='') visualize.predictive_modeling_group(df
All features - ASD - Parent vs. Child vs. Teacher
= df_all[df_all['participant_group']=='ASD']
df 'new_col'] = df['assessment'] + '-' + df['abbrevs']
df[
= df[(df['target']=='DX_01_Cat_new_binarize')]
df1
=df1, x='new_col', y='roc_auc_score', title='') visualize.predictive_modeling_group(df
All features - ASD (females) - Parent vs. Child vs. Teacher
= df_all[df_all['participant_group']=='ASD-females']
df 'new_col'] = df['assessment'] + '-' + df['abbrevs']
df[
= df[(df['target']=='DX_01_Cat_new_binarize')]
df1
=df1, x='new_col', y='roc_auc_score', title='') visualize.predictive_modeling_group(df
All features - ASD (females) - Parent vs. Child vs. Teacher
= df_all[df_all['participant_group']=='ASD-males']
df 'new_col'] = df['assessment'] + '-' + df['abbrevs']
df[
= df[(df['target']=='DX_01_Cat_new_binarize')]
df1
=df1, x='new_col', y='roc_auc_score', title='') visualize.predictive_modeling_group(df
All features - Depression - Parent vs. Child vs. Teacher
= df_all[df_all['participant_group']=='Depression']
df 'new_col'] = df['assessment'] + '-' + df['abbrevs']
df[
= df[(df['target']=='DX_01_Cat_new_binarize')]
df1
=df1, x='new_col', y='roc_auc_score', title='') visualize.predictive_modeling_group(df
All features - Reading Impairment - Parent vs. Child vs. Teacher
= df_all[df_all['participant_group']=='Reading Impairment']
df 'new_col'] = df['assessment'] + '-' + df['abbrevs']
df[
= df[(df['target']=='DX_01_Cat_new_binarize')]
df1
=df1, x='new_col', y='roc_auc_score', title='') visualize.predictive_modeling_group(df
Parent features - compare across diagnoses
= df_all[(df_all['assessment']=='Parent Measures') & (df_all['abbrevs']=='all')]
df
= df[(df['target']=='DX_01_Cat_new_binarize')]
df1
=df1, x='participant_group', y='roc_auc_score', title='') visualize.predictive_modeling_group(df
Child features - compare across diagnoses
= df_all[(df_all['assessment']=='Child Measures') & (df_all['abbrevs']=='all')]
df
= df[(df['target']=='DX_01_Cat_new_binarize')]
df1
=df1, x='participant_group', y='roc_auc_score', title='') visualize.predictive_modeling_group(df
Teacher features - compare across diagnoses
= df_all[(df_all['assessment']=='Teacher Measures') & (df_all['abbrevs']=='all')]
df
= df[(df['target']=='DX_01_Cat_new_binarize')]
df1
=df1, x='participant_group', y='roc_auc_score', title='') visualize.predictive_modeling_group(df
Basic Demos features - compare across diagnoses
= df_all[(df_all['assessment']=='Parent Measures') & (df_all['abbrevs']!='all')]
df
= df[(df['target']=='DX_01_Cat_new_binarize')]
df1
=df1, x='participant_group', y='roc_auc_score', title='') visualize.predictive_modeling_group(df
Models are trained to classify diagnoses across 5 major disorders: ADHD, ASD, Anxiety, Depression, and Reading Impairment
- Each figure describes a model (Decision Tree Classifier) trained on individual measures (e.g., CELF, EEG etc) across multiple domains (e.g., Language Tasks, Cognitive Testing)
- Models (x axis) show ROC AUC results
Exceptions:
- Demographics are included in these models: race and ethnicity, sex, age, diagnosis subtype and comorbidities are NOT INCLUDED
- Only numeric variables are included (no categorical variables)
- SMOTE (minor class upsampling) is applied to models
# check models
from hbn.models.predictive_modeling import check_models
filter='*all_demos_SMOTE/*')
check_models(#check_models(filter='*all_demos_CORRECT/*')
2023-03-26_19-05-08-29: ['No Diagnosis Given', 'ADHD-Combined Type', 'ADHD-Inattentive Type', 'Other Specified Attention-Deficit/Hyperactivity Disorder', 'ADHD-Hyperactive/Impulsive Type', 'Unspecified Attention-Deficit/Hyperactivity Disorder']: ['DX_01_Cat_new_binarize']: ['female', 'male']
2023-03-26_19-07-42-42S-9: ['ADHD-Combined Type', 'ADHD-Inattentive Type', 'No Diagnosis Given', 'Other Specified Attention-Deficit/Hyperactivity Disorder', 'ADHD-Hyperactive/Impulsive Type', 'Unspecified Attention-Deficit/Hyperactivity Disorder']: ['DX_01_Cat_new_binarize']: ['male']
2023-03-26_19-09-52-52S-21: ['No Diagnosis Given', 'Other Specified Anxiety Disorder', 'Generalized Anxiety Disorder', 'Separation Anxiety', 'Social Anxiety (Social Phobia)', 'Specific Phobia', 'Unspecified Anxiety Disorder', 'Agoraphobia']: ['DX_01_Cat_new_binarize']: ['male']
2023-03-26_19-09-17-17S-24: ['No Diagnosis Given', 'Specific Learning Disorder with Impairment in Reading']: ['DX_01_Cat_new_binarize']: ['male']
2023-03-26_19-09-49-49S-93: ['No Diagnosis Given', 'Generalized Anxiety Disorder', 'Social Anxiety (Social Phobia)', 'Specific Phobia', 'Separation Anxiety', 'Other Specified Anxiety Disorder', 'Selective Mutism', 'Panic Disorder', 'Unspecified Anxiety Disorder', 'Agoraphobia']: ['DX_01_Cat_new_binarize']: ['female']
2023-03-26_19-10-54-54S-20: ['No Diagnosis Given', 'Other Specified Depressive Disorder', 'Major Depressive Disorder', 'Disruptive Mood Dysregulation Disorder', 'Persistent Depressive Disorder (Dysthymia)']: ['DX_01_Cat_new_binarize']: ['male']
2023-03-26_19-09-21-21S-45: ['No Diagnosis Given', 'Specific Learning Disorder with Impairment in Reading']: ['DX_01_Cat_new_binarize']: ['female']
2023-03-26_19-10-13-13S-35: ['No Diagnosis Given', 'Major Depressive Disorder', 'Persistent Depressive Disorder (Dysthymia)', 'Disruptive Mood Dysregulation Disorder', 'Other Specified Depressive Disorder', 'Depressive Disorder Due to Another Medical Condition']: ['DX_01_Cat_new_binarize']: ['female']
2023-03-26_19-08-18-18S-6: ['No Diagnosis Given', 'Autism Spectrum Disorder']: ['DX_01_Cat_new_binarize']: ['male']
2023-03-26_19-07-27-27S-72: ['No Diagnosis Given', 'Specific Learning Disorder with Impairment in Reading']: ['DX_01_Cat_new_binarize']: ['female', 'male']
2023-03-26_19-06-38-38S-26: ['No Diagnosis Given', 'Other Specified Depressive Disorder', 'Major Depressive Disorder', 'Disruptive Mood Dysregulation Disorder', 'Persistent Depressive Disorder (Dysthymia)', 'Depressive Disorder Due to Another Medical Condition']: ['DX_01_Cat_new_binarize']: ['female', 'male']
2023-03-26_19-06-18-18S-70: ['No Diagnosis Given', 'Autism Spectrum Disorder']: ['DX_01_Cat_new_binarize']: ['female', 'male']
2023-03-26_19-08-53-53S-20: ['No Diagnosis Given', 'Autism Spectrum Disorder']: ['DX_01_Cat_new_binarize']: ['female']
2023-03-26_19-07-47-47S-5: ['No Diagnosis Given', 'ADHD-Inattentive Type', 'ADHD-Combined Type', 'Other Specified Attention-Deficit/Hyperactivity Disorder', 'ADHD-Hyperactive/Impulsive Type', 'Unspecified Attention-Deficit/Hyperactivity Disorder']: ['DX_01_Cat_new_binarize']: ['female']
Compare model with/without SMOTE (upsampling minority class)
SMOTE always does better
= {
models_dict 'ADHD-noSMOTE': 'all_demos_CORRECT/2023-03-07_18-43-26-43',
'ADHD': 'all_demos_SMOTE/2023-03-26_19-05-08-29',
}
# loop over models
= pd.DataFrame()
df_all for key, value in models_dict.items():
= os.path.join(Defaults.MODEL_DIR, value)
MODEL_DIR
= pd.read_csv(os.path.join(MODEL_DIR, 'classifier-all-phenotypic-models-performance.csv'))
df_classify 'data'] = df_classify['data'].map({'model-data': 'null', 'model-null': 'data'})
df_classify['participant_group'] = key
df_classify[
= pd.concat([df_all, df_classify])
df_all
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'assessment']=='Parent Measures')]
(df_all[
=df1, x='participant_group', y='roc_auc_score', title='') visualize.predictive_modeling_group(df
= {
models_dict 'ADHD': 'all_demos_SMOTE/2023-03-26_19-05-08-29',
'ASD': 'all_demos_SMOTE/2023-03-26_19-06-18-18S-70',
'Depression': 'all_demos_SMOTE/2023-03-26_19-06-38-38S-26',
'Reading Impairment': 'all_demos_SMOTE/2023-03-26_19-07-27-27S-72',
'ASD-females': 'all_demos_SMOTE/2023-03-26_19-08-53-53S-20',
'ASD-males': 'all_demos_SMOTE/2023-03-26_19-08-18-18S-6',
'ADHD-females': 'all_demos_SMOTE/2023-03-26_19-07-47-47S-5',
'ADHD-males': 'all_demos_SMOTE/2023-03-26_19-07-42-42S-9',
'Depression-females': 'all_demos_SMOTE/2023-03-26_19-10-13-13S-35',
'Depression-males': 'all_demos_SMOTE/2023-03-26_19-10-54-54S-20',
'Anxiety-females': 'all_demos_SMOTE/2023-03-26_19-09-49-49S-93',
'Anxiety-males': 'all_demos_SMOTE/2023-03-26_19-09-52-52S-21',
'Reading Impairment-females': 'all_demos_SMOTE/2023-03-26_19-09-21-21S-45',
'Reading Impairment-males': 'all_demos_SMOTE/2023-03-26_19-09-17-17S-24'
}
# loop over models
= pd.DataFrame()
df_all for key, value in models_dict.items():
= os.path.join(Defaults.MODEL_DIR, value)
MODEL_DIR
= pd.read_csv(os.path.join(MODEL_DIR, 'classifier-all-phenotypic-models-performance.csv'))
df_classify 'data'] = df_classify['data'].map({'model-data': 'null', 'model-null': 'data'})
df_classify['participant_group'] = key
df_classify[
= pd.concat([df_all, df_classify]) df_all
Child Measures - Language Tasks
'assessment'].unique() df_all[
array(['Child Measures', 'Clinical Measures', 'Parent Measures',
'Teacher Measures'], dtype=object)
= 'Language_Tasks'
domain
= df_all[(df_all['domains']==domain) & (df_all['assessment']=='Child Measures')]['abbrevs'].unique()
abbrevs
for abbrev in abbrevs:
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'assessment']=='Child Measures') &
(df_all['domains']==domain) &
(df_all['abbrevs']==abbrev)]
(df_all[
=df1, x='participant_group', y='roc_auc_score', title=abbrev) visualize.predictive_modeling_group(df
Child Measures - Cognitive Testing
= 'Cognitive_Testing'
domain
= df_all[df_all['domains']==domain]['abbrevs'].unique()
abbrevs
for abbrev in abbrevs:
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'assessment']=='Child Measures') &
(df_all['domains']==domain) &
(df_all['abbrevs']==abbrev)]
(df_all[
=df1, x='participant_group', y='roc_auc_score', title=abbrev) visualize.predictive_modeling_group(df
Child Measures - Interview_of_Emotional_and_Psychological_Function
= 'Interview_of_Emotional_and_Psychological_Function'
domain = 'Child Measures'
assessment
= df_all[(df_all['domains']==domain) & (df_all['assessment']==assessment)]['abbrevs'].unique()
abbrevs
for abbrev in abbrevs:
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'assessment']==assessment) &
(df_all['domains']==domain) &
(df_all['abbrevs']==abbrev)]
(df_all[
=df1, x='participant_group', y='roc_auc_score', title=abbrev) visualize.predictive_modeling_group(df
Child Measures - Interview_of_Emotional_and_Psychological_Function
= 'Interview_of_Emotional_and_Psychological_Function'
domain = 'Child Measures'
assessment
= df_all[(df_all['domains']==domain) & (df_all['assessment']==assessment)]['abbrevs'].unique()
abbrevs
for abbrev in abbrevs:
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'assessment']==assessment) &
(df_all['domains']==domain) &
(df_all['abbrevs']==abbrev)]
(df_all[
=df1, x='participant_group', y='roc_auc_score', title=abbrev) visualize.predictive_modeling_group(df
Child Measures - Medical_Status_Measures
= 'Medical_Status_Measures'
domain = 'Child Measures'
assessment
= df_all[(df_all['domains']==domain) & (df_all['assessment']==assessment)]['abbrevs'].unique()
abbrevs
for abbrev in abbrevs:
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'assessment']==assessment) &
(df_all['domains']==domain) &
(df_all['abbrevs']==abbrev)]
(df_all[
=df1, x='participant_group', y='roc_auc_score', title=abbrev) visualize.predictive_modeling_group(df
Child Measures - Physical_Fitness_and_Status
= 'Physical_Fitness_and_Status'
domain = 'Child Measures'
assessment
= df_all[(df_all['domains']==domain) & (df_all['assessment']==assessment)]['abbrevs'].unique()
abbrevs
for abbrev in abbrevs:
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'assessment']==assessment) &
(df_all['domains']==domain) &
(df_all['abbrevs']==abbrev)]
(df_all[
=df1, x='participant_group', y='roc_auc_score', title=abbrev) visualize.predictive_modeling_group(df
Child Measures - Questionnaire_Measures_of_Emotional_and_Cognitive_Status
= 'Questionnaire_Measures_of_Emotional_and_Cognitive_Status'
domain = 'Child Measures'
assessment
= df_all[(df_all['domains']==domain) & (df_all['assessment']==assessment)]['abbrevs'].unique()
abbrevs
for abbrev in abbrevs:
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'assessment']==assessment) &
(df_all['domains']==domain) &
(df_all['abbrevs']==abbrev)]
(df_all[
=df1, x='participant_group', y='roc_auc_score', title=abbrev) visualize.predictive_modeling_group(df
Child Measures - Questionnaire_Measures_of_Family_Structure_Stress_and_Trauma
= 'Questionnaire_Measures_of_Family_Structure_Stress_and_Trauma'
domain = 'Child Measures'
assessment
= df_all[(df_all['domains']==domain) & (df_all['assessment']==assessment)]['abbrevs'].unique()
abbrevs
for abbrev in abbrevs:
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'assessment']==assessment) &
(df_all['domains']==domain) &
(df_all['abbrevs']==abbrev)]
(df_all[
=df1, x='participant_group', y='roc_auc_score', title=abbrev) visualize.predictive_modeling_group(df
Child Measures - Physiology/Motor/Neurology
= 'Child Measures'
assessment
= ['Neurologic_Function', 'Physiologic_Function', 'Vision', 'Motor_Skills']
cols
= df_all[(df_all['domains'].isin(cols)) & (df_all['assessment']==assessment)]['abbrevs'].unique()
abbrevs
for abbrev in abbrevs:
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'assessment']==assessment) &
(df_all['abbrevs']==abbrev)]
(df_all[
=df1, x='participant_group', y='roc_auc_score', title=abbrev) visualize.predictive_modeling_group(df
Child Measures - Questionnaire_Measures_of_Substance_Use_&_Addiction
= 'Questionnaire_Measures_of_Substance_Use_&_Addiction'
domain = 'Child Measures'
assessment
= df_all[(df_all['domains']==domain) & (df_all['assessment']==assessment)]['abbrevs'].unique()
abbrevs
for abbrev in abbrevs:
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'assessment']==assessment) &
(df_all['domains']==domain) &
(df_all['abbrevs']==abbrev)]
(df_all[
=df1, x='participant_group', y='roc_auc_score', title=abbrev) visualize.predictive_modeling_group(df
Parent Measures - Demographic_Questionnaire_Measures
= 'Demographic_Questionnaire_Measures'
domain = 'Parent Measures'
assessment
= df_all[(df_all['domains']==domain) & (df_all['assessment']==assessment)]['abbrevs'].unique()
abbrevs
for abbrev in abbrevs:
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'assessment']==assessment) &
(df_all['domains']==domain) &
(df_all['abbrevs']==abbrev)]
(df_all[
=df1, x='participant_group', y='roc_auc_score', title=abbrev) visualize.predictive_modeling_group(df
Parent Measures - Interview_of_Emotional_and_Psychological_Function
= 'Interview_of_Emotional_and_Psychological_Function'
domain = 'Parent Measures'
assessment
= df_all[(df_all['domains']==domain) & (df_all['assessment']==assessment)]['abbrevs'].unique()
abbrevs
for abbrev in abbrevs:
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'assessment']==assessment) &
(df_all['domains']==domain) &
(df_all['abbrevs']==abbrev)]
(df_all[
=df1, x='participant_group', y='roc_auc_score', title=abbrev) visualize.predictive_modeling_group(df
Parent Measures - Questionnaire_Measures_of_Family_structure_Stress_and_Trauma
= 'Questionnaire_Measures_of_Family_structure_Stress_and_Trauma'
domain = 'Parent Measures'
assessment
= df_all[(df_all['domains']==domain) & (df_all['assessment']==assessment)]['abbrevs'].unique()
abbrevs
for abbrev in abbrevs:
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'assessment']==assessment) &
(df_all['domains']==domain) &
(df_all['abbrevs']==abbrev)]
(df_all[
=df1, x='participant_group', y='roc_auc_score', title=abbrev) visualize.predictive_modeling_group(df
= 'Teacher Measures'
assessment
= df_all[(df_all['assessment']==assessment)]['abbrevs'].unique()
abbrevs
for abbrev in abbrevs:
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'assessment']==assessment) &
(df_all['abbrevs']==abbrev)]
(df_all[
=df1, x='participant_group', y='roc_auc_score', title=abbrev) visualize.predictive_modeling_group(df
Children’s global assessment scale
= 'Clinical Measures'
assessment
= df_all[(df_all['assessment']==assessment)]['abbrevs'].unique()
abbrevs
for abbrev in abbrevs:
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'assessment']==assessment) &
(df_all['abbrevs']=='CGAS')]
(df_all[
=df1, x='participant_group', y='roc_auc_score', title=abbrev) visualize.predictive_modeling_group(df
All measures - ADHD
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'participant_group']=='ADHD')]
(df_all[
=df1, x='abbrevs', y='roc_auc_score', title='All measures - ADHD') visualize.predictive_modeling_group(df
= ['CBCL', 'C3SR', 'SWAN', 'SympChck', 'SDQ']
to_keep
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'participant_group']=='ADHD') &
(df_all['abbrevs'].isin(to_keep))]
(df_all[
=df1, x='abbrevs', y='roc_auc_score', title='All measures - ADHD') visualize.predictive_modeling_group(df
= ['CBCL', 'TRF', 'YSR']
to_keep
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'participant_group']=='ADHD') &
(df_all['abbrevs'].isin(to_keep))]
(df_all[
=df1, x='abbrevs', y='roc_auc_score', title='All measures - ADHD') visualize.predictive_modeling_group(df
Which measures predict females better than males (and vice versa)?
these graphs demonstrate which measures are better predicted in females than males. overall takeaway is that more measures better predict males than females (91% of measures classify males better)
# filter dataframe and groupby abbrevs and participant group (ADHD-females and ADHD-males)
= df_all[(df_all['participant_group'].isin(['ADHD-females', 'ADHD-males'])) &
df_grouped 'target']=='DX_01_Cat_new_binarize') &
(df_all['data']=='data')].groupby(['abbrevs', 'participant_group']).mean().reset_index()
(df_all[
# get scores for males and females
'male_scores'] = df_grouped.loc[df_grouped['participant_group']=='ADHD-males', 'roc_auc_score']
df_grouped['female_scores'] = df_grouped.loc[df_grouped['participant_group']=='ADHD-females', 'roc_auc_score']
df_grouped[
# groupby abbreviations
= df_grouped.groupby('abbrevs').mean()
df_new
# calculate difference score between male and female scores
'diff_score'] = df_new['male_scores'] - df_new['female_scores']
df_new[
# decide which scores are male or female
'diff_score']>0, 'males'] = True
df_new.loc[df_new['diff_score']<0, 'males'] = False
df_new.loc[df_new[
= df_new.reset_index() df_new
= df_new[(df_new['males']==False)]
df1
=df1, x='abbrevs', y='roc_auc_score')
sns.barplot(data=45, ha='right');
plt.xticks(rotation.4,1])
plt.ylim([=.5, color = 'k', linestyle = '--')
plt.axhline(y'measures (females > males)'); plt.title(
= df_new[(df_new['males']==True)]
df1
=df1, x='abbrevs', y='roc_auc_score')
sns.barplot(data=8, rotation=45, ha='right');
plt.xticks(fontsize.4,1])
plt.ylim([=.5, color = 'k', linestyle = '--')
plt.axhline(y'measures (males > females)'); plt.title(
All measures - Depression
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'participant_group']=='Depression')]
(df_all[
=df1, x='abbrevs', y='roc_auc_score', title='All measures - Depression') visualize.predictive_modeling_group(df
ASD
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'participant_group']=='ASD')]
(df_all[
=df1, x='abbrevs', y='roc_auc_score', title='All measures - ASD') visualize.predictive_modeling_group(df
Reading Impairment
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'participant_group']=='Reading Impairment')]
(df_all[
=df1, x='abbrevs', y='roc_auc_score', title='All measures - Reading Impairment') visualize.predictive_modeling_group(df