import os
import seaborn as sns
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import numpy as np
%load_ext autoreload
%autoreload 2
import warnings
warnings.filterwarnings('ignore')Basic Demographics
## FUNCTIONS
## GET DATA from parent intake interview
def get_data(preprocess=False, target_col='DX_01'):
import pandas as pd
from hbn.features import build_features
from hbn.data import make_dataset
# get features
features = build_features.get_features(
assessment='Parent Measures',
domains=['Interview of Emotional and Psychological Function'],
measures=['Intake Interview'],
min_num_participants=2000,
incl_data_type=None
);
if preprocess:
# preprocess features
clf_info={"numeric": [[
"sklearn.impute",
"SimpleImputer",
{"strategy": "mean"}]
]
}
features = build_features.preprocess(
dataframe=features,
clf_info=clf_info,
cols_to_ignore=['Identifiers']
)
# get targets
targets = build_features.get_targets(target_info = {
"assessment": "Clinical Measures",
"domain": None,
"measure": "Clinical Diagnosis Demographics",
"target_column": target_col,
"transform": None,
"outname":target_col
})
# get participant ids
participants = make_dataset.get_participants(
split='all',
disorders=['ADHD-Combined Type',
'ADHD-Inattentive Type',
'ADHD-Hyperactive_Impulsive_Type',
'No_Diagnosis_Given']
)
features_target = features.merge(
targets, on='Identifiers').merge(
participants, on='Identifiers')
# get feature names
feature_names = [col for col in features_target.columns if target_col not in col]
df_concat = pd.concat([features_target[[target_col]], features_target[feature_names]], axis=1)
return df_concat## RUN THIS CELL ##
from hbn.constants import Defaults
from hbn.data import make_dataset
# make_dataset.make_train_test_splits(out_dir=Defaults.MODEL_SPEC_DIR)
# get summary of clinical diagnosis + other demographics
dx = make_dataset.make_summary(save=False)
dx = make_dataset._add_race_ethnicity(dataframe=dx)
# filter for adhd
adhd_only = ['ADHD-Combined Type', 'ADHD-Hyperactive/Impulsive Type', 'ADHD-Inattentive Type', 'No Diagnosis Given']
dx = dx[dx['DX_01'].isin(adhd_only)]
# get data from intake interview and merge with clinical summary
df_intake = get_data()
df_intake = df_intake.merge(dx[['Sex', 'Age_bracket', 'PreInt_Demos_Fam,Child_Race_cat','Identifiers']], on='Identifiers')The Zen of Python, by Tim Peters
Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!
reading /Users/maedbhking/Documents/healthy_brain_network/data/raw/phenotype/Parent_Measures/Interview_of_Emotional_and_Psychological_Function/Intake_Interview.csv into dataframe
reading /Users/maedbhking/Documents/healthy_brain_network/data/raw/phenotype/Clinical_Measures/Clinical_Diagnosis_Demographics.csv into dataframe
What is the race breakdown of children with adhd?
demographics = dx.groupby(['DX_01', 'PreInt_Demos_Fam,Child_Race_cat']
).agg({'Identifiers': 'count',
}).reset_index()
fig = px.bar(demographics, x="DX_01", y="Identifiers", color="PreInt_Demos_Fam,Child_Race_cat")
fig.show()What is the sex breakdown of children with adhd?
largest M/F ratios are combined and hyperactive, but not inattentive
demographics = dx.groupby(['DX_01', 'Sex']
).agg({'Identifiers': 'count',
}).reset_index()
fig = px.bar(demographics, x="DX_01", y="Identifiers", color="Sex")
fig.show()What is the age breakdown of children with adhd?
demographics = dx.groupby(['DX_01', 'Age_bracket']
).agg({'Identifiers': 'count',
}).reset_index()
fig = px.bar(demographics, x="DX_01", y="Identifiers", color="Age_bracket")
fig.show()How many comorbidities do children with adhd have?
Girls have more combordities on average than boys (except for impulsive type)
demographics = dx.groupby(['DX_01', 'Sex']
).agg({'comorbidities': 'mean',
}).reset_index()
fig = px.bar(demographics, x="DX_01", y="comorbidities", color='Sex')
fig.show()How many comorbidities do children with adhd have?
Childrenn over10 have more combordities on average than children under10
demographics = dx.groupby(['DX_01', 'Age_bracket']
).agg({'comorbidities': 'mean',
}).reset_index()
fig = px.bar(demographics, x="DX_01", y="comorbidities", color='Age_bracket')
fig.show()get data from parent intake interview
What % of children with adhd have parents with adhd?
from hbn.data import make_dataset
### INTAKE INTERVIEW ###
tmp = df_intake.groupby('DX_01').agg({'PreInt_FamHx,m_adhd': 'sum',
'PreInt_FamHx,f_adhd': 'sum',
'PreInt_FamHx,s_adhd': 'sum',
'PreInt_FamHx,f_autism':'sum',
'PreInt_FamHx,m_autism':'sum',
'PreInt_FamHx,s_autism':'sum',
'Identifiers': 'count'})
tmp| PreInt_FamHx,m_adhd | PreInt_FamHx,f_adhd | PreInt_FamHx,s_adhd | PreInt_FamHx,f_autism | PreInt_FamHx,m_autism | PreInt_FamHx,s_autism | Identifiers | |
|---|---|---|---|---|---|---|---|
| DX_01 | |||||||
| ADHD-Combined Type | 3.0 | 6.0 | 4.0 | 0.0 | 0.0 | 3.0 | 753 |
| ADHD-Hyperactive/Impulsive Type | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 106 |
| ADHD-Inattentive Type | 0.0 | 1.0 | 2.0 | 0.0 | 0.0 | 2.0 | 680 |
| No Diagnosis Given | 1.0 | 4.0 | 4.0 | 0.0 | 0.0 | 1.0 | 332 |
df_intake.columns.str.split(',').str.get(0).unique()Index(['DX_01', 'Identifiers', 'PreInt_Demos_Fam', 'PreInt_Demos_Home',
'PreInt_DevHx', 'PreInt_EduHx', 'PreInt_FamHx', 'PreInt_FamHx_RDC',
'PreInt_Lang', 'PreInt_TxHx', 'Sex', 'Age_bracket'],
dtype='object')
Previous diagnoses
Many children with adhd have been previously diagnosed with a psych/learning disorder
colors = ['Sex', 'PreInt_Demos_Fam,Child_Race_cat']
for color in colors:
tmp = df_intake.groupby(['DX_01', color]).agg({'PreInt_TxHx,Past_DX': 'sum',
'Identifiers': 'count'}
).reset_index()
tmp['percent'] = tmp['PreInt_TxHx,Past_DX'] / tmp['Identifiers']
fig = px.bar(tmp, x="DX_01", y='percent', color=color,orientation='v', barmode="group")
fig.show()about 25% of children with adhd are currently taking psych medication
colors = ['Sex', 'PreInt_Demos_Fam,Child_Race_cat']
for color in colors:
tmp = df_intake.groupby(['DX_01',color]).agg({'PreInt_TxHx,psych_meds_cur': 'sum',
'PreInt_TxHx,psych_meds_past': 'sum',
'Identifiers': 'count'}
).reset_index()
tmp['percent_curr'] = tmp['PreInt_TxHx,psych_meds_cur'] / tmp['Identifiers']
tmp['percent_past'] = tmp['PreInt_TxHx,psych_meds_past'] / tmp['Identifiers']
fig = px.bar(tmp, x="DX_01", y='percent_curr', color=color,orientation='v', barmode="group")
fig.show()few children had immunication reactions
colors = ['Sex', 'PreInt_Demos_Fam,Child_Race_cat']
var = 'immunization_reaction'
for color in colors:
tmp = df_intake.groupby(['DX_01',color]).agg({f'PreInt_TxHx,{var}': 'sum',
'Identifiers': 'count'}
).reset_index()
tmp['percent'] = tmp[f'PreInt_TxHx,{var}'] / tmp['Identifiers']
fig = px.bar(tmp, x="DX_01", y='percent', color=color,orientation='v', barmode="group")
fig.show()10-20% of children have had food allergies
colors = ['Sex', 'PreInt_Demos_Fam,Child_Race_cat']
var = 'food_allergy'
for color in colors:
tmp = df_intake.groupby(['DX_01',color]).agg({f'PreInt_TxHx,{var}': 'sum',
'Identifiers': 'count'}
).reset_index()
tmp['percent'] = tmp[f'PreInt_TxHx,{var}'] / tmp['Identifiers']
fig = px.bar(tmp, x="DX_01", y='percent', color=color,orientation='v', barmode="group")
fig.show()Most children have attended an average of 2 schools
colors = ['Sex', 'Age_bracket']
var = 'number_schools'
for color in colors:
tmp = df_intake.groupby(['DX_01',color]).agg({f'PreInt_EduHx,{var}': 'mean',
'Identifiers': 'count'}
).reset_index()
tmp['percent'] = tmp[f'PreInt_EduHx,{var}'] / tmp['Identifiers']
fig = px.bar(tmp, x="DX_01", y=f'PreInt_EduHx,{var}', color=color,orientation='v', barmode="group")
fig.update_yaxes(range=[1,4])
fig.show()50-60% of children have an individualized education plan
more children over10 with hyperactive/impulsive have an IEP but more children under10 with inattentive have an IEP
colors = ['Sex', 'Age_bracket']
var = 'IEP'
for color in colors:
tmp = df_intake.groupby(['DX_01',color]).agg({f'PreInt_EduHx,{var}': 'sum',
'Identifiers': 'count'}
).reset_index()
tmp['percent'] = tmp[f'PreInt_EduHx,{var}'] / tmp['Identifiers']
fig = px.bar(tmp, x="DX_01", y='percent', color=color,orientation='v', barmode="group")
fig.update_yaxes(range=[.1,.7])
fig.show()learning disability?
few children with adhd diagnosed with a learning disability
colors = ['Sex', 'PreInt_Demos_Fam,Child_Race_cat', 'Age_bracket']
var = 'learning_disability'
for color in colors:
tmp = df_intake.groupby(['DX_01',color]).agg({f'PreInt_EduHx,{var}': 'sum',
'Identifiers': 'count'}
).reset_index()
tmp['percent'] = tmp[f'PreInt_EduHx,{var}'] / tmp['Identifiers']
fig = px.bar(tmp, x="DX_01", y='percent', color=color,orientation='v', barmode="group")
fig.show()neuropsych testing? pretty low numbers …
colors = ['Sex', 'PreInt_Demos_Fam,Child_Race_cat', 'Age_bracket']
var = 'NeuroPsych'
for color in colors:
tmp = df_intake.groupby(['DX_01',color]).agg({f'PreInt_EduHx,{var}': 'sum',
'Identifiers': 'count'}
).reset_index()
tmp['percent'] = tmp[f'PreInt_EduHx,{var}'] / tmp['Identifiers']
fig = px.bar(tmp, x="DX_01", y='percent', color=color,orientation='v', barmode="group")
fig.show()Recent grades (1-excellent, 5-failing)
colors = ['Sex','Age_bracket']
var = 'recent_grades'
for color in colors:
tmp = df_intake.groupby(['DX_01',color]).agg({f'PreInt_EduHx,{var}': 'mean',
'Identifiers': 'count'}
).reset_index()
tmp['percent'] = tmp[f'PreInt_EduHx,{var}'] / tmp['Identifiers']
fig = px.bar(tmp, x="DX_01", y=f'PreInt_EduHx,{var}', color=color,orientation='v', barmode="group")
fig.update_yaxes(range=[1,3])
fig.show()number of friends
colors = ['Sex', 'PreInt_Demos_Fam,Child_Race_cat', 'Age_bracket']
var = 'number_friends'
for color in colors:
tmp = df_intake.groupby(['DX_01',color]).agg({f'PreInt_EduHx,{var}': 'mean',
'Identifiers': 'count'}
).reset_index()
tmp['percent'] = tmp[f'PreInt_EduHx,{var}'] / tmp['Identifiers']
fig = px.bar(tmp, x="DX_01", y=f'PreInt_EduHx,{var}', color=color,orientation='v', barmode="group")
fig.update_yaxes(range=[1,4])
fig.show()outside school tutoring
40% of children with inattentive type adhd have outside tutoring
colors = ['Sex', 'PreInt_Demos_Fam,Child_Race_cat', 'Age_bracket']
var = 'tutor'
for color in colors:
tmp = df_intake.groupby(['DX_01',color]).agg({f'PreInt_EduHx,{var}': 'sum',
'Identifiers': 'count'}
).reset_index()
tmp['percent'] = tmp[f'PreInt_EduHx,{var}'] / tmp['Identifiers']
fig = px.bar(tmp, x="DX_01", y='percent', color=color,orientation='v', barmode="group")
fig.show()start of puberty
girls with adhd are starting puberty a lot earlier than boys - this tracks with children without a diagnosis. exception is boys with hyperactive adhd
colors = ['Sex', 'PreInt_Demos_Fam,Child_Race_cat']
var = 'puberty_age'
for color in colors:
tmp = df_intake.groupby(['DX_01',color]).agg({f'PreInt_DevHx,{var}': 'mean',
'Identifiers': 'count'}
).reset_index()
tmp['percent'] = tmp[f'PreInt_DevHx,{var}'] / tmp['Identifiers']
fig = px.bar(tmp, x="DX_01", y=f'PreInt_DevHx,{var}', color=color,orientation='v', barmode="group")
fig.update_yaxes(range=[8,12])
fig.show()girls with hyperactive/impulsive adhd are starting menstruation earlier than other subtypes
var = 'menstruation_age'
tmp = df_intake.groupby(['DX_01']).agg({f'PreInt_DevHx,{var}': 'mean',
'Identifiers': 'count'}
).reset_index()
fig = px.bar(tmp, x="DX_01", y=f'PreInt_DevHx,{var}',orientation='v', barmode="group")
fig.update_yaxes(range=[10,12])
fig.show()