import os
import seaborn as sns
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import numpy as np
%load_ext autoreload
%autoreload 2
import warnings
'ignore') warnings.filterwarnings(
Basic Demographics
## FUNCTIONS
## GET DATA from parent intake interview
def get_data(preprocess=False, target_col='DX_01'):
import pandas as pd
from hbn.features import build_features
from hbn.data import make_dataset
# get features
= build_features.get_features(
features ='Parent Measures',
assessment=['Interview of Emotional and Psychological Function'],
domains=['Intake Interview'],
measures=2000,
min_num_participants=None
incl_data_type;
)
if preprocess:
# preprocess features
={"numeric": [[
clf_info"sklearn.impute",
"SimpleImputer",
"strategy": "mean"}]
{
]
}= build_features.preprocess(
features =features,
dataframe=clf_info,
clf_info=['Identifiers']
cols_to_ignore
)
# get targets
= build_features.get_targets(target_info = {
targets "assessment": "Clinical Measures",
"domain": None,
"measure": "Clinical Diagnosis Demographics",
"target_column": target_col,
"transform": None,
"outname":target_col
})# get participant ids
= make_dataset.get_participants(
participants ='all',
split=['ADHD-Combined Type',
disorders'ADHD-Inattentive Type',
'ADHD-Hyperactive_Impulsive_Type',
'No_Diagnosis_Given']
)
= features.merge(
features_target ='Identifiers').merge(
targets, on='Identifiers')
participants, on# get feature names
= [col for col in features_target.columns if target_col not in col]
feature_names
= pd.concat([features_target[[target_col]], features_target[feature_names]], axis=1)
df_concat
return df_concat
## RUN THIS CELL ##
from hbn.constants import Defaults
from hbn.data import make_dataset
# make_dataset.make_train_test_splits(out_dir=Defaults.MODEL_SPEC_DIR)
# get summary of clinical diagnosis + other demographics
= make_dataset.make_summary(save=False)
dx = make_dataset._add_race_ethnicity(dataframe=dx)
dx
# filter for adhd
= ['ADHD-Combined Type', 'ADHD-Hyperactive/Impulsive Type', 'ADHD-Inattentive Type', 'No Diagnosis Given']
adhd_only = dx[dx['DX_01'].isin(adhd_only)]
dx
# get data from intake interview and merge with clinical summary
= get_data()
df_intake = df_intake.merge(dx[['Sex', 'Age_bracket', 'PreInt_Demos_Fam,Child_Race_cat','Identifiers']], on='Identifiers') df_intake
The Zen of Python, by Tim Peters
Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!
reading /Users/maedbhking/Documents/healthy_brain_network/data/raw/phenotype/Parent_Measures/Interview_of_Emotional_and_Psychological_Function/Intake_Interview.csv into dataframe
reading /Users/maedbhking/Documents/healthy_brain_network/data/raw/phenotype/Clinical_Measures/Clinical_Diagnosis_Demographics.csv into dataframe
What is the race breakdown of children with adhd?
= dx.groupby(['DX_01', 'PreInt_Demos_Fam,Child_Race_cat']
demographics 'Identifiers': 'count',
).agg({
}).reset_index()
= px.bar(demographics, x="DX_01", y="Identifiers", color="PreInt_Demos_Fam,Child_Race_cat")
fig fig.show()
What is the sex breakdown of children with adhd?
largest M/F ratios are combined and hyperactive, but not inattentive
= dx.groupby(['DX_01', 'Sex']
demographics 'Identifiers': 'count',
).agg({
}).reset_index()
= px.bar(demographics, x="DX_01", y="Identifiers", color="Sex")
fig fig.show()
What is the age breakdown of children with adhd?
= dx.groupby(['DX_01', 'Age_bracket']
demographics 'Identifiers': 'count',
).agg({
}).reset_index()
= px.bar(demographics, x="DX_01", y="Identifiers", color="Age_bracket")
fig fig.show()
How many comorbidities do children with adhd have?
Girls have more combordities on average than boys (except for impulsive type)
= dx.groupby(['DX_01', 'Sex']
demographics 'comorbidities': 'mean',
).agg({
}).reset_index()
= px.bar(demographics, x="DX_01", y="comorbidities", color='Sex')
fig fig.show()
How many comorbidities do children with adhd have?
Childrenn over10 have more combordities on average than children under10
= dx.groupby(['DX_01', 'Age_bracket']
demographics 'comorbidities': 'mean',
).agg({
}).reset_index()
= px.bar(demographics, x="DX_01", y="comorbidities", color='Age_bracket')
fig fig.show()
get data from parent intake interview
What % of children with adhd have parents with adhd?
from hbn.data import make_dataset
### INTAKE INTERVIEW ###
= df_intake.groupby('DX_01').agg({'PreInt_FamHx,m_adhd': 'sum',
tmp 'PreInt_FamHx,f_adhd': 'sum',
'PreInt_FamHx,s_adhd': 'sum',
'PreInt_FamHx,f_autism':'sum',
'PreInt_FamHx,m_autism':'sum',
'PreInt_FamHx,s_autism':'sum',
'Identifiers': 'count'})
tmp
PreInt_FamHx,m_adhd | PreInt_FamHx,f_adhd | PreInt_FamHx,s_adhd | PreInt_FamHx,f_autism | PreInt_FamHx,m_autism | PreInt_FamHx,s_autism | Identifiers | |
---|---|---|---|---|---|---|---|
DX_01 | |||||||
ADHD-Combined Type | 3.0 | 6.0 | 4.0 | 0.0 | 0.0 | 3.0 | 753 |
ADHD-Hyperactive/Impulsive Type | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 106 |
ADHD-Inattentive Type | 0.0 | 1.0 | 2.0 | 0.0 | 0.0 | 2.0 | 680 |
No Diagnosis Given | 1.0 | 4.0 | 4.0 | 0.0 | 0.0 | 1.0 | 332 |
str.split(',').str.get(0).unique() df_intake.columns.
Index(['DX_01', 'Identifiers', 'PreInt_Demos_Fam', 'PreInt_Demos_Home',
'PreInt_DevHx', 'PreInt_EduHx', 'PreInt_FamHx', 'PreInt_FamHx_RDC',
'PreInt_Lang', 'PreInt_TxHx', 'Sex', 'Age_bracket'],
dtype='object')
Previous diagnoses
Many children with adhd have been previously diagnosed with a psych/learning disorder
= ['Sex', 'PreInt_Demos_Fam,Child_Race_cat']
colors
for color in colors:
= df_intake.groupby(['DX_01', color]).agg({'PreInt_TxHx,Past_DX': 'sum',
tmp 'Identifiers': 'count'}
).reset_index()'percent'] = tmp['PreInt_TxHx,Past_DX'] / tmp['Identifiers']
tmp[
= px.bar(tmp, x="DX_01", y='percent', color=color,orientation='v', barmode="group")
fig fig.show()
about 25% of children with adhd are currently taking psych medication
= ['Sex', 'PreInt_Demos_Fam,Child_Race_cat']
colors
for color in colors:
= df_intake.groupby(['DX_01',color]).agg({'PreInt_TxHx,psych_meds_cur': 'sum',
tmp 'PreInt_TxHx,psych_meds_past': 'sum',
'Identifiers': 'count'}
).reset_index()'percent_curr'] = tmp['PreInt_TxHx,psych_meds_cur'] / tmp['Identifiers']
tmp['percent_past'] = tmp['PreInt_TxHx,psych_meds_past'] / tmp['Identifiers']
tmp[
= px.bar(tmp, x="DX_01", y='percent_curr', color=color,orientation='v', barmode="group")
fig fig.show()
few children had immunication reactions
= ['Sex', 'PreInt_Demos_Fam,Child_Race_cat']
colors = 'immunization_reaction'
var
for color in colors:
= df_intake.groupby(['DX_01',color]).agg({f'PreInt_TxHx,{var}': 'sum',
tmp 'Identifiers': 'count'}
).reset_index()'percent'] = tmp[f'PreInt_TxHx,{var}'] / tmp['Identifiers']
tmp[
= px.bar(tmp, x="DX_01", y='percent', color=color,orientation='v', barmode="group")
fig fig.show()
10-20% of children have had food allergies
= ['Sex', 'PreInt_Demos_Fam,Child_Race_cat']
colors = 'food_allergy'
var
for color in colors:
= df_intake.groupby(['DX_01',color]).agg({f'PreInt_TxHx,{var}': 'sum',
tmp 'Identifiers': 'count'}
).reset_index()'percent'] = tmp[f'PreInt_TxHx,{var}'] / tmp['Identifiers']
tmp[
= px.bar(tmp, x="DX_01", y='percent', color=color,orientation='v', barmode="group")
fig fig.show()
Most children have attended an average of 2 schools
= ['Sex', 'Age_bracket']
colors = 'number_schools'
var
for color in colors:
= df_intake.groupby(['DX_01',color]).agg({f'PreInt_EduHx,{var}': 'mean',
tmp 'Identifiers': 'count'}
).reset_index()'percent'] = tmp[f'PreInt_EduHx,{var}'] / tmp['Identifiers']
tmp[
= px.bar(tmp, x="DX_01", y=f'PreInt_EduHx,{var}', color=color,orientation='v', barmode="group")
fig range=[1,4])
fig.update_yaxes( fig.show()
50-60% of children have an individualized education plan
more children over10 with hyperactive/impulsive have an IEP but more children under10 with inattentive have an IEP
= ['Sex', 'Age_bracket']
colors = 'IEP'
var
for color in colors:
= df_intake.groupby(['DX_01',color]).agg({f'PreInt_EduHx,{var}': 'sum',
tmp 'Identifiers': 'count'}
).reset_index()'percent'] = tmp[f'PreInt_EduHx,{var}'] / tmp['Identifiers']
tmp[
= px.bar(tmp, x="DX_01", y='percent', color=color,orientation='v', barmode="group")
fig range=[.1,.7])
fig.update_yaxes( fig.show()
learning disability?
few children with adhd diagnosed with a learning disability
= ['Sex', 'PreInt_Demos_Fam,Child_Race_cat', 'Age_bracket']
colors = 'learning_disability'
var
for color in colors:
= df_intake.groupby(['DX_01',color]).agg({f'PreInt_EduHx,{var}': 'sum',
tmp 'Identifiers': 'count'}
).reset_index()'percent'] = tmp[f'PreInt_EduHx,{var}'] / tmp['Identifiers']
tmp[
= px.bar(tmp, x="DX_01", y='percent', color=color,orientation='v', barmode="group")
fig fig.show()
neuropsych testing? pretty low numbers …
= ['Sex', 'PreInt_Demos_Fam,Child_Race_cat', 'Age_bracket']
colors = 'NeuroPsych'
var
for color in colors:
= df_intake.groupby(['DX_01',color]).agg({f'PreInt_EduHx,{var}': 'sum',
tmp 'Identifiers': 'count'}
).reset_index()'percent'] = tmp[f'PreInt_EduHx,{var}'] / tmp['Identifiers']
tmp[
= px.bar(tmp, x="DX_01", y='percent', color=color,orientation='v', barmode="group")
fig fig.show()
Recent grades (1-excellent, 5-failing)
= ['Sex','Age_bracket']
colors = 'recent_grades'
var
for color in colors:
= df_intake.groupby(['DX_01',color]).agg({f'PreInt_EduHx,{var}': 'mean',
tmp 'Identifiers': 'count'}
).reset_index()'percent'] = tmp[f'PreInt_EduHx,{var}'] / tmp['Identifiers']
tmp[
= px.bar(tmp, x="DX_01", y=f'PreInt_EduHx,{var}', color=color,orientation='v', barmode="group")
fig range=[1,3])
fig.update_yaxes( fig.show()
number of friends
= ['Sex', 'PreInt_Demos_Fam,Child_Race_cat', 'Age_bracket']
colors = 'number_friends'
var
for color in colors:
= df_intake.groupby(['DX_01',color]).agg({f'PreInt_EduHx,{var}': 'mean',
tmp 'Identifiers': 'count'}
).reset_index()'percent'] = tmp[f'PreInt_EduHx,{var}'] / tmp['Identifiers']
tmp[
= px.bar(tmp, x="DX_01", y=f'PreInt_EduHx,{var}', color=color,orientation='v', barmode="group")
fig range=[1,4])
fig.update_yaxes( fig.show()
outside school tutoring
40% of children with inattentive type adhd have outside tutoring
= ['Sex', 'PreInt_Demos_Fam,Child_Race_cat', 'Age_bracket']
colors = 'tutor'
var
for color in colors:
= df_intake.groupby(['DX_01',color]).agg({f'PreInt_EduHx,{var}': 'sum',
tmp 'Identifiers': 'count'}
).reset_index()'percent'] = tmp[f'PreInt_EduHx,{var}'] / tmp['Identifiers']
tmp[
= px.bar(tmp, x="DX_01", y='percent', color=color,orientation='v', barmode="group")
fig fig.show()
start of puberty
girls with adhd are starting puberty a lot earlier than boys - this tracks with children without a diagnosis. exception is boys with hyperactive adhd
= ['Sex', 'PreInt_Demos_Fam,Child_Race_cat']
colors = 'puberty_age'
var
for color in colors:
= df_intake.groupby(['DX_01',color]).agg({f'PreInt_DevHx,{var}': 'mean',
tmp 'Identifiers': 'count'}
).reset_index()'percent'] = tmp[f'PreInt_DevHx,{var}'] / tmp['Identifiers']
tmp[
= px.bar(tmp, x="DX_01", y=f'PreInt_DevHx,{var}', color=color,orientation='v', barmode="group")
fig range=[8,12])
fig.update_yaxes( fig.show()
girls with hyperactive/impulsive adhd are starting menstruation earlier than other subtypes
= 'menstruation_age'
var
= df_intake.groupby(['DX_01']).agg({f'PreInt_DevHx,{var}': 'mean',
tmp 'Identifiers': 'count'}
).reset_index()
= px.bar(tmp, x="DX_01", y=f'PreInt_DevHx,{var}',orientation='v', barmode="group")
fig range=[10,12])
fig.update_yaxes( fig.show()