import osimport seaborn as snsimport plotly.express as pximport pandas as pdimport matplotlib.pyplot as pltimport numpy as npimport plotly.io as piofrom matplotlib.gridspec import GridSpecimport matplotlib.patches as mpatchesfrom tabulate import tabulatefrom hbn.constants import Defaultsfrom hbn.visualization import visualize as vis%load_ext autoreload%autoreload 2import warningswarnings.filterwarnings('ignore')
The autoreload extension is already loaded. To reload it, use:
%reload_ext autoreload
## GET DATAfrom hbn.data import make_dataset# HARDCODE PARTICIPANTS + FEATURE_SPECparticipants = make_dataset.get_participants( split='all', disorders=['ADHD-Combined_Type', 'ADHD-Inattentive_Type', 'ADHD-Hyperactive_Impulsive_Type', 'Other_Specified_Attention-Deficit_Hyperactivity_Disorder','No_Diagnosis_Given'] )# get summary of clinical diagnosis + other demographicsdf_all = make_dataset.make_summary(save=False)df_all['Age'] = df_all['Age'].round()#df_all = make_dataset._add_race_ethnicity(dataframe=df_all)df_CGAS = make_dataset.add_CGAS_Score(df_all)# get participantsdf_part = pd.DataFrame(participants, columns=['Identifiers'])# filter large dataframe to include only ADHD + No Diagnosisdf = df_all.merge(df_part, on='Identifiers')df_CGAS = df_CGAS.merge(df_part, on='Identifiers')
## summary statssample_size =len(df_all['Identifiers'].unique())print(f'total sample size of healthy brain network dataset is {sample_size}\n')num_ADHD =len(df[df['DX_01_Cat_new']=='ADHD'])num_no_diagnosis =len(df[df['DX_01_Cat_new']=='No Diagnosis Given'])print(f'there are {num_ADHD} participants with ADHD and {num_no_diagnosis} with No Diagnosis\n')# print out summariesfor diagnosis in ['ADHD', 'No Diagnosis Given']:print(f'PARTICIPANTS WITH {diagnosis}') tmp = df[df['DX_01_Cat_new']==diagnosis] sex = tmp['Sex'].value_counts()print(f'1. there are {sex.male} males and {sex.female} females\n') ages_6_10 =len(tmp[tmp['Age'].round()<=10]) ages_10_21 =len(tmp[tmp['Age'].round()>10])print(f'2. there are {ages_6_10} children ages 6-10 and {ages_10_21} children ages 11-21\n') num_sites =len(tmp['Site'].unique()) sites = tmp['Site'].value_counts() site_numbers = sites.index.tolist() site_count = sites.tolist()print(f'3. there are {num_sites} study sites and following # of participants collected across the following sites are {site_count}: {site_numbers}\n') years = tmp['Enroll_Year'].value_counts().index.astype(str).str.strip('.0').astype(int).tolist() num_years =len(tmp['Enroll_Year'].unique())print(f'4. data were collected across {num_years} years: {years}\n') num_subtypes =len(tmp['DX_01'].unique()) subtypes = tmp['DX_01'].unique()print(f'5. there are {num_subtypes} unique subtypes: {subtypes}\n') comorbid =round((tmp['comorbidities'].value_counts() /len(tmp)) *100) num_comorbid = comorbid[1:].sum()print(f'6. approximately {num_comorbid}% have disorder combordities\n') disorder_cat =round((tmp['DX_01'].value_counts() /len(tmp)) *100).head(1)print(f'7. most prevalent subtype is {disorder_cat.index[0]} - {disorder_cat.values[0]}% of sample\n') disorder =round((tmp['DX_01_Cat_new'].value_counts() /len(tmp)) *100).head(1)print(f'8. most prevalent category of diagnosis is {disorder.index[0]} - {disorder.values[0]}% of sample\n') tmp_CGAS = df_CGAS[df_CGAS['DX_01_Cat_new']==diagnosis] sex = tmp_CGAS.groupby(['Sex']).agg({'CGAS_Score': 'mean'}) f_cgas = sex.loc['male'].values[0] m_cgas = sex.loc['female'].values[0]print(f'9. females have an average general functioning score (CGAS) of {round(f_cgas)}% and males {round(m_cgas)}%\n')
total sample size of healthy brain network dataset is 4767
there are 1905 participants with ADHD and 373 with No Diagnosis
PARTICIPANTS WITH ADHD
1. there are 1370 males and 535 females
2. there are 1235 children ages 6-10 and 670 children ages 11-21
3. there are 5 study sites and following # of participants collected across the following sites are [842, 537, 471, 42, 13]: [1, 4, 3, 5, 2]
4. data were collected across 8 years: [2018, 2019, 2017, 202, 2016, 2021, 2015, 2022]
5. there are 4 unique subtypes: ['ADHD-Combined Type' 'ADHD-Inattentive Type'
'Other Specified Attention-Deficit/Hyperactivity Disorder'
'ADHD-Hyperactive/Impulsive Type']
6. approximately 69.0% have disorder combordities
7. most prevalent subtype is ADHD-Combined Type - 45.0% of sample
8. most prevalent category of diagnosis is ADHD - 100.0% of sample
9. females have an average general functioning score (CGAS) of 64% and males 65%
PARTICIPANTS WITH No Diagnosis Given
1. there are 198 males and 175 females
2. there are 229 children ages 6-10 and 144 children ages 11-21
3. there are 5 study sites and following # of participants collected across the following sites are [223, 75, 68, 4, 3]: [1, 3, 4, 2, 5]
4. data were collected across 8 years: [2017, 2016, 2019, 2018, 202, 2021, 2015, 2022]
5. there are 1 unique subtypes: ['No Diagnosis Given']
6. approximately 0.0% have disorder combordities
7. most prevalent subtype is No Diagnosis Given - 100.0% of sample
8. most prevalent category of diagnosis is No Diagnosis Given - 100.0% of sample
9. females have an average general functioning score (CGAS) of 75% and males 78%
+-------------------+----------------------------------------------------------+--------+---------------------+
| Enrollment Year | Diagnosis | Sex | # of Participants |
+===================+==========================================================+========+=====================+
| 2015 | ADHD-Combined Type | female | 4 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2015 | ADHD-Combined Type | male | 15 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2015 | ADHD-Hyperactive/Impulsive Type | male | 1 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2015 | ADHD-Inattentive Type | female | 5 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2015 | ADHD-Inattentive Type | male | 6 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2015 | No Diagnosis Given | female | 12 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2015 | No Diagnosis Given | male | 9 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2015 | Other Specified Attention-Deficit/Hyperactivity Disorder | female | 1 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2015 | Other Specified Attention-Deficit/Hyperactivity Disorder | male | 3 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2016 | ADHD-Combined Type | female | 25 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2016 | ADHD-Combined Type | male | 78 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2016 | ADHD-Hyperactive/Impulsive Type | female | 3 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2016 | ADHD-Hyperactive/Impulsive Type | male | 10 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2016 | ADHD-Inattentive Type | female | 16 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2016 | ADHD-Inattentive Type | male | 38 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2016 | No Diagnosis Given | female | 49 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2016 | No Diagnosis Given | male | 43 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2016 | Other Specified Attention-Deficit/Hyperactivity Disorder | female | 6 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2016 | Other Specified Attention-Deficit/Hyperactivity Disorder | male | 11 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2017 | ADHD-Combined Type | female | 35 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2017 | ADHD-Combined Type | male | 141 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2017 | ADHD-Hyperactive/Impulsive Type | female | 7 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2017 | ADHD-Hyperactive/Impulsive Type | male | 20 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2017 | ADHD-Inattentive Type | female | 48 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2017 | ADHD-Inattentive Type | male | 99 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2017 | No Diagnosis Given | female | 37 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2017 | No Diagnosis Given | male | 57 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2017 | Other Specified Attention-Deficit/Hyperactivity Disorder | female | 9 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2017 | Other Specified Attention-Deficit/Hyperactivity Disorder | male | 13 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2018 | ADHD-Combined Type | female | 47 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2018 | ADHD-Combined Type | male | 153 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2018 | ADHD-Hyperactive/Impulsive Type | female | 8 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2018 | ADHD-Hyperactive/Impulsive Type | male | 10 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2018 | ADHD-Inattentive Type | female | 68 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2018 | ADHD-Inattentive Type | male | 145 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2018 | No Diagnosis Given | female | 29 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2018 | No Diagnosis Given | male | 25 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2018 | Other Specified Attention-Deficit/Hyperactivity Disorder | female | 10 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2018 | Other Specified Attention-Deficit/Hyperactivity Disorder | male | 17 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2019 | ADHD-Combined Type | female | 43 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2019 | ADHD-Combined Type | male | 139 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2019 | ADHD-Hyperactive/Impulsive Type | female | 9 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2019 | ADHD-Hyperactive/Impulsive Type | male | 26 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2019 | ADHD-Inattentive Type | female | 54 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2019 | ADHD-Inattentive Type | male | 122 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2019 | No Diagnosis Given | female | 25 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2019 | No Diagnosis Given | male | 30 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2019 | Other Specified Attention-Deficit/Hyperactivity Disorder | female | 12 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2019 | Other Specified Attention-Deficit/Hyperactivity Disorder | male | 15 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2020 | ADHD-Combined Type | female | 23 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2020 | ADHD-Combined Type | male | 87 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2020 | ADHD-Hyperactive/Impulsive Type | female | 3 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2020 | ADHD-Hyperactive/Impulsive Type | male | 12 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2020 | ADHD-Inattentive Type | female | 41 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2020 | ADHD-Inattentive Type | male | 73 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2020 | No Diagnosis Given | female | 15 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2020 | No Diagnosis Given | male | 17 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2020 | Other Specified Attention-Deficit/Hyperactivity Disorder | female | 2 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2020 | Other Specified Attention-Deficit/Hyperactivity Disorder | male | 7 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2021 | ADHD-Combined Type | female | 18 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2021 | ADHD-Combined Type | male | 52 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2021 | ADHD-Hyperactive/Impulsive Type | female | 3 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2021 | ADHD-Hyperactive/Impulsive Type | male | 12 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2021 | ADHD-Inattentive Type | female | 29 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2021 | ADHD-Inattentive Type | male | 51 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2021 | No Diagnosis Given | female | 6 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2021 | No Diagnosis Given | male | 16 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2021 | Other Specified Attention-Deficit/Hyperactivity Disorder | female | 3 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2021 | Other Specified Attention-Deficit/Hyperactivity Disorder | male | 6 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2022 | ADHD-Combined Type | male | 5 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2022 | ADHD-Hyperactive/Impulsive Type | female | 1 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2022 | ADHD-Hyperactive/Impulsive Type | male | 2 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2022 | ADHD-Inattentive Type | female | 2 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2022 | ADHD-Inattentive Type | male | 1 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2022 | No Diagnosis Given | female | 2 |
+-------------------+----------------------------------------------------------+--------+---------------------+
| 2022 | No Diagnosis Given | male | 1 |
+-------------------+----------------------------------------------------------+--------+---------------------+
Results from Predictive Models
In this section, we’re building models on different sets of features (Child, Parent, Teacher Measures and basic demographics (e.g., race, ethnicity, age, sex) and determining how well these models classify whether or not someone has a diagnosis (versus no diagnosis).
We are building models separately for males and females to identify whether certain features (or all features) classify males better than females (or vice versa)
We have built models on the following diagnoses: ADHD, ASD, Depression, Anxiety, Reading Impairment (our primary interest is ADHD but we can interpret results for ADHD model relative to the other diagnoses)
Predictive Modeling - ADHD for Child, Parent, Teacher, Demographics
A result of 50% is chance level (model is not distinguishing between either group) – chance is indicated in the figures with a dashed black line. 100% is a perfect classification.
Interpreting the figure: * A) Parent and Child measures are well above chance (80, 85% respectively) while demographics and Teacher measures are close to chance. In distinguishing whether participants have ADHD or no diagnosis, the parent model does best, followed closely by the child model. * B) When we split by sex, we can see that for each model, the males are better classified than females
Interpreting the figure: * A) Models classify depression quite well (around 80%) - demographics and teacher slightly lower. Females are better classified than males for both demographics and teacher measures * B) similar pattern of results for anxiety as depression except that scores are sig. lower * C) demographic and teacher models are close to chance for reading impairment (and no sig. male/female differences). For parent and child measures, the classifier predicts pretty well (about 80%) and there is a dissociation between males and females across these measures. Males are far better classified than females by the parent model and vice versa for the child model. * D) For the ASD group, there are no classification differences between males and females, parent model sig. outperforms child, teacher, demographics at 90% accuracy
ADHD is the only diagnosis where males are consistently better classified than females across all assessments
# FUNCTIONSdef get_data(data_type='raw', diagnoses=['ADHD', 'No Diagnosis'], measures=['CBCL', 'YSR', 'TRF']):""" get data for all assessments, filter based on diagnosis and measures Args: data_type (str): 'raw' or 'preprocessed'. Default is 'raw' diagnoses (list of str or None): list of diagnoses measures (list of str): list of measures """from hbn.data import make_datasetfrom hbn.models import item_analysis assessments = ['Parent', 'Child', 'Teacher'] df_all = pd.DataFrame()# loop over measuresfor measure in measures:# load data files (preprocessed) df_data, df_dict, df_diagnosis = item_analysis.load_data(assessments=assessments, data_type=data_type)# filter data based on measure cols_to_keep = df_data.columns[df_data.columns.str.contains(f'{measure}|Identifiers')] df1 = df_data[cols_to_keep]# filter data dictionary based on measure df2 = df_dict[df_dict['datadic']==measure]# merge diagnosis with data and filter based on diagnosisif diagnoses isnotNone: df_diagnosis = df_diagnosis[df_diagnosis['Diagnosis'].str.contains('|'.join(diagnoses))] df_data_dx = df_diagnosis[['Identifiers', 'Age', 'Sex', 'Diagnosis', 'Race', 'Ethnicity']].merge(df1, on=['Identifiers']) df_data_dx.columns = df_data_dx.columns.str.replace('_x', '_raw').str.replace('_y', '') df_data_dx['Age'] = df_data_dx['Age'].round() df_all = pd.concat([df_all, df_data_dx])return df_alldef hue_regplot(data, x, y, hue, palette=None, **kwargs):from matplotlib.cm import get_cmap regplots = [] levels = data[hue].unique()if palette isNone: default_colors = get_cmap('tab10') palette = {k: default_colors(i) for i, k inenumerate(levels)}for key in levels: regplots.append( sns.regplot( x=x, y=y, data=data[data[hue] == key], color=palette[key], label=key,**kwargs ) )return regplots
# load data (raw data for Parent, Child, Teacher measures), filtered by measuresdf = get_data(data_type='raw', diagnoses=['ADHD', 'No Diagnosis'], measures=['CBCL', 'YSR', 'TRF'] )