import os
import pandas as pd
import seaborn as sns
from hbn.constants import Defaults
from hbn.visualization import visualize
import warnings
"ignore")
warnings.filterwarnings(
%load_ext autoreload
%autoreload 2
%matplotlib inline
#pio.renderers.default = 'iframe'
import plotly.io as pio
='notebook'
pio.renderers.default
#import ipywidgets as widgets # interactive display
#%config InlineBackend.figure_format = 'svg' # other available formats are: 'retina', 'png', 'jpeg', 'pdf'
Classifier Models across Multiple Measures
# check models
from hbn.models.predictive_modeling import check_models
filter='2023') check_models(
2023-01-25_23:03:07.675954: ['No Diagnosis Given']: ['Sex_binarize']
2023-01-25_22:10:47.981934: ['No Diagnosis Given', 'Other Specified Depressive Disorder', 'Major Depressive Disorder', 'Disruptive Mood Dysregulation Disorder', 'Persistent Depressive Disorder (Dysthymia)', 'Depressive Disorder Due to Another Medical Condition']: ['DX_01_Cat_new_binarize']
2023-01-26_16:52:55.576236: ['No Diagnosis Given', 'Generalized Anxiety Disorder', 'Other Specified Anxiety Disorder', 'Social Anxiety (Social Phobia)', 'Separation Anxiety', 'Specific Phobia', 'Selective Mutism', 'Unspecified Anxiety Disorder', 'Panic Disorder', 'Agoraphobia']: ['DX_01_Cat_new_binarize']
classifier-all-phenotypic-models-performance.csv does not exist for /Users/maedbhking/Documents/healthy_brain_network/data/interim/models/2023-01-25_22:09:57.784889, run `run_second_level.sh`
2023-01-26_16:52:17.468889: ['No Diagnosis Given', 'Autism Spectrum Disorder']: ['DX_01_Cat_new_binarize']
classifier-all-phenotypic-models-performance.csv does not exist for /Users/maedbhking/Documents/healthy_brain_network/data/interim/models/2023-01-25_22:10:46.494914, run `run_second_level.sh`
2023-01-26_16:53:09.184825: ['No Diagnosis Given', 'Other Specified Depressive Disorder', 'Major Depressive Disorder', 'Disruptive Mood Dysregulation Disorder', 'Persistent Depressive Disorder (Dysthymia)', 'Depressive Disorder Due to Another Medical Condition']: ['DX_01_Cat_new_binarize']
2023-01-26_16:49:52.804211: ['ADHD-Combined Type', 'ADHD-Inattentive Type', 'Other Specified Attention-Deficit/Hyperactivity Disorder', 'ADHD-Hyperactive/Impulsive Type', 'Unspecified Attention-Deficit/Hyperactivity Disorder']: ['Sex_binarize']
classifier-all-phenotypic-models-performance.csv does not exist for /Users/maedbhking/Documents/healthy_brain_network/data/interim/models/2023-01-25_22:10:49.655034, run `run_second_level.sh`
2023-01-25_18:53:31.426535: ['No Diagnosis Given', 'ADHD-Combined Type', 'ADHD-Inattentive Type', 'Other Specified Attention-Deficit/Hyperactivity Disorder', 'ADHD-Hyperactive/Impulsive Type', 'Unspecified Attention-Deficit/Hyperactivity Disorder']: ['DX_01_Cat_new_binarize']
2023-01-26_16:51:30.868341: ['No Diagnosis Given', 'ADHD-Combined Type', 'ADHD-Inattentive Type', 'Other Specified Attention-Deficit/Hyperactivity Disorder', 'ADHD-Hyperactive/Impulsive Type', 'Unspecified Attention-Deficit/Hyperactivity Disorder']: ['DX_01_Cat_new_binarize']
2023-01-25_22:10:51.315781: ['No Diagnosis Given', 'Generalized Anxiety Disorder', 'Other Specified Anxiety Disorder', 'Social Anxiety (Social Phobia)', 'Separation Anxiety', 'Specific Phobia', 'Selective Mutism', 'Unspecified Anxiety Disorder', 'Panic Disorder', 'Agoraphobia']: ['DX_01_Cat_new_binarize']
2023-01-26_16:53:33.811355: ['No Diagnosis Given', 'Specific Learning Disorder with Impairment in Reading']: ['DX_01_Cat_new_binarize']
2023-01-25_22:57:22.441936: ['ADHD-Combined Type', 'ADHD-Inattentive Type', 'Other Specified Attention-Deficit/Hyperactivity Disorder', 'ADHD-Hyperactive/Impulsive Type', 'Unspecified Attention-Deficit/Hyperactivity Disorder']: ['Sex_binarize']
2023-01-26_16:51:53.683959: ['No Diagnosis Given']: ['Sex_binarize']
2023-01-25_22:11:05.460227: ['No Diagnosis Given', 'Autism Spectrum Disorder']: ['DX_01_Cat_new_binarize', 'DX_01_binarize', 'Sex_binarize']
2023-01-25_22:10:47.985589: ['No Diagnosis Given', 'Specific Learning Disorder with Impairment in Reading']: ['DX_01_Cat_new_binarize']
2023-01-26_16:51:36.783459: ['ADHD-Combined Type', 'ADHD-Inattentive Type']: ['DX_01_binarize']
2023-01-25_23:01:30.632159: ['ADHD-Combined Type', 'ADHD-Inattentive Type']: ['DX_01_binarize']
# load models
# 2022 models
#models_dict = {'Anxiety': '2022-11-24_15:08:25.974034',
# 'Depression': '2022-11-24_22:47:54.716119',
# 'ADHD': '2022-11-24_14:38:36.039822',
# 'ASD': '2022-11-25_20:00:07.194478',
# 'Learning-Disorder-Reading-Impairment': '2022-11-26_09:57:41.996469',
# 'Neurodevelopmental Disorders': '2022-11-26_16:56:15.015065'
# }
# 2023 models
= {'Anxiety': '2023-01-25_22:10:51.315781',
models_dict 'Depression': '2023-01-25_22:10:47.981934',
'ADHD': '2023-01-25_18:53:31.426535',
'ASD': '2023-01-25_22:11:05.460227',
'Learning-Disorder-Reading-Impairment': '2023-01-25_22:10:47.985589',
}
# loop over models
= pd.DataFrame()
df_all for key, value in models_dict.items():
= os.path.join(Defaults.MODEL_DIR, value)
MODEL_DIR
= pd.read_csv(os.path.join(MODEL_DIR, 'classifier-all-phenotypic-models-performance.csv'))
df_classify 'data'] = df_classify['data'].map({'model-data': 'null', 'model-null': 'data'})
df_classify['participant_group'] = key
df_classify[
= pd.concat([df_all, df_classify]) df_all
# set plotting style
visualize.plotting_style()
- Each figure describes a model (Decision Tree Classifier) trained on one measure (e.g., CELF) from a particular domain (e.g., Language Tasks)
- Models (x axis) show ROC AUC results for diagnoses (e.g., Anxiety, ADHD) vs. no diagnosis
Child Measures - Language Tasks
= 'Language_Tasks'
domain
= df_all[(df_all['domains']==domain) & (df_all['assessment']=='Child Measures')]['abbrevs'].unique()
abbrevs
for abbrev in abbrevs:
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'assessment']=='Child Measures') &
(df_all['domains']==domain) &
(df_all['abbrevs']==abbrev)]
(df_all[
=df1, x='participant_group', y='roc_auc_score', title=abbrev)
visualize.predictive_modeling_group(df
Child Measures - Cognitive Testing
= 'Cognitive_Testing'
domain
= df_all[df_all['domains']==domain]['abbrevs'].unique()
abbrevs
for abbrev in abbrevs:
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'assessment']=='Child Measures') &
(df_all['domains']==domain) &
(df_all['abbrevs']==abbrev)]
(df_all[
=df1, x='participant_group', y='roc_auc_score', title=abbrev) visualize.predictive_modeling_group(df
Child Measures - Interview_of_Emotional_and_Psychological_Function
= 'Interview_of_Emotional_and_Psychological_Function'
domain = 'Child Measures'
assessment
= df_all[(df_all['domains']==domain) & (df_all['assessment']==assessment)]['abbrevs'].unique()
abbrevs
for abbrev in abbrevs:
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'assessment']==assessment) &
(df_all['domains']==domain) &
(df_all['abbrevs']==abbrev)]
(df_all[
=df1, x='participant_group', y='roc_auc_score', title=abbrev) visualize.predictive_modeling_group(df
Child Measures - Interview_of_Emotional_and_Psychological_Function
= 'Interview_of_Emotional_and_Psychological_Function'
domain = 'Child Measures'
assessment
= df_all[(df_all['domains']==domain) & (df_all['assessment']==assessment)]['abbrevs'].unique()
abbrevs
for abbrev in abbrevs:
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'assessment']==assessment) &
(df_all['domains']==domain) &
(df_all['abbrevs']==abbrev)]
(df_all[
=df1, x='participant_group', y='roc_auc_score', title=abbrev) visualize.predictive_modeling_group(df
Child Measures - Medical_Status_Measures
= 'Medical_Status_Measures'
domain = 'Child Measures'
assessment
= df_all[(df_all['domains']==domain) & (df_all['assessment']==assessment)]['abbrevs'].unique()
abbrevs
for abbrev in abbrevs:
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'assessment']==assessment) &
(df_all['domains']==domain) &
(df_all['abbrevs']==abbrev)]
(df_all[
=df1, x='participant_group', y='roc_auc_score', title=abbrev) visualize.predictive_modeling_group(df
Child Measures - Physical_Fitness_and_Status
= 'Physical_Fitness_and_Status'
domain = 'Child Measures'
assessment
= df_all[(df_all['domains']==domain) & (df_all['assessment']==assessment)]['abbrevs'].unique()
abbrevs
for abbrev in abbrevs:
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'assessment']==assessment) &
(df_all['domains']==domain) &
(df_all['abbrevs']==abbrev)]
(df_all[
=df1, x='participant_group', y='roc_auc_score', title=abbrev) visualize.predictive_modeling_group(df
Child Measures - Questionnaire_Measures_of_Emotional_and_Cognitive_Status
= 'Questionnaire_Measures_of_Emotional_and_Cognitive_Status'
domain = 'Child Measures'
assessment
= df_all[(df_all['domains']==domain) & (df_all['assessment']==assessment)]['abbrevs'].unique()
abbrevs
for abbrev in abbrevs:
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'assessment']==assessment) &
(df_all['domains']==domain) &
(df_all['abbrevs']==abbrev)]
(df_all[
=df1, x='participant_group', y='roc_auc_score', title=abbrev) visualize.predictive_modeling_group(df
Child Measures - Questionnaire_Measures_of_Family_Structure_Stress_and_Trauma
= 'Questionnaire_Measures_of_Family_Structure_Stress_and_Trauma'
domain = 'Child Measures'
assessment
= df_all[(df_all['domains']==domain) & (df_all['assessment']==assessment)]['abbrevs'].unique()
abbrevs
for abbrev in abbrevs:
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'assessment']==assessment) &
(df_all['domains']==domain) &
(df_all['abbrevs']==abbrev)]
(df_all[
=df1, x='participant_group', y='roc_auc_score', title=abbrev) visualize.predictive_modeling_group(df
Child Measures - Physiology/Motor/Neurology
= 'Child Measures'
assessment
= ['Neurologic_Function', 'Physiologic_Function', 'Vision', 'Motor_Skills']
cols
= df_all[(df_all['domains'].isin(cols)) & (df_all['assessment']==assessment)]['abbrevs'].unique()
abbrevs
for abbrev in abbrevs:
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'assessment']==assessment) &
(df_all['abbrevs']==abbrev)]
(df_all[
=df1, x='participant_group', y='roc_auc_score', title=abbrev) visualize.predictive_modeling_group(df
Child Measures - Questionnaire_Measures_of_Substance_Use_&_Addiction
= 'Questionnaire_Measures_of_Substance_Use_&_Addiction'
domain = 'Child Measures'
assessment
= df_all[(df_all['domains']==domain) & (df_all['assessment']==assessment)]['abbrevs'].unique()
abbrevs
for abbrev in abbrevs:
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'assessment']==assessment) &
(df_all['domains']==domain) &
(df_all['abbrevs']==abbrev)]
(df_all[
=df1, x='participant_group', y='roc_auc_score', title=abbrev) visualize.predictive_modeling_group(df
Parent Measures - Demographic_Questionnaire_Measures
= 'Demographic_Questionnaire_Measures'
domain = 'Parent Measures'
assessment
= df_all[(df_all['domains']==domain) & (df_all['assessment']==assessment)]['abbrevs'].unique()
abbrevs
for abbrev in abbrevs:
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'assessment']==assessment) &
(df_all['domains']==domain) &
(df_all['abbrevs']==abbrev)]
(df_all[
=df1, x='participant_group', y='roc_auc_score', title=abbrev) visualize.predictive_modeling_group(df
Parent Measures - Interview_of_Emotional_and_Psychological_Function
= 'Interview_of_Emotional_and_Psychological_Function'
domain = 'Parent Measures'
assessment
= df_all[(df_all['domains']==domain) & (df_all['assessment']==assessment)]['abbrevs'].unique()
abbrevs
for abbrev in abbrevs:
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'assessment']==assessment) &
(df_all['domains']==domain) &
(df_all['abbrevs']==abbrev)]
(df_all[
=df1, x='participant_group', y='roc_auc_score', title=abbrev) visualize.predictive_modeling_group(df
Parent Measures - Questionnaire_Measures_of_Family_structure_Stress_and_Trauma
= 'Questionnaire_Measures_of_Family_structure_Stress_and_Trauma'
domain = 'Parent Measures'
assessment
= df_all[(df_all['domains']==domain) & (df_all['assessment']==assessment)]['abbrevs'].unique()
abbrevs
for abbrev in abbrevs:
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'assessment']==assessment) &
(df_all['domains']==domain) &
(df_all['abbrevs']==abbrev)]
(df_all[
=df1, x='participant_group', y='roc_auc_score', title=abbrev) visualize.predictive_modeling_group(df
= 'Teacher Measures'
assessment
= df_all[(df_all['assessment']==assessment)]['abbrevs'].unique()
abbrevs
for abbrev in abbrevs:
= df_all[(df_all['target']=='DX_01_Cat_new_binarize') &
df1 'assessment']==assessment) &
(df_all['abbrevs']==abbrev)]
(df_all[
=df1, x='participant_group', y='roc_auc_score', title=abbrev) visualize.predictive_modeling_group(df