Basic Demographics

import os
import seaborn as sns
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import numpy as np

%load_ext autoreload
%autoreload 2

import warnings
warnings.filterwarnings('ignore')

## FUNCTIONS

## GET DATA from parent intake interview

def get_data(preprocess=False, target_col='DX_01'):

    import pandas as pd
    from hbn.features import build_features
    from hbn.data import make_dataset

    # get features
    features = build_features.get_features(
                assessment='Parent Measures',
                domains=['Interview of Emotional and Psychological Function'],
                measures=['Intake Interview'],
                min_num_participants=2000,
                incl_data_type=None
                );

    if preprocess:
        # preprocess features 
        clf_info={"numeric": [[
                        "sklearn.impute",
                        "SimpleImputer",
                        {"strategy": "mean"}]
                ]
                }
        features = build_features.preprocess(
                                            dataframe=features,
                                            clf_info=clf_info,
                                            cols_to_ignore=['Identifiers']
                                            )

    # get targets
    targets = build_features.get_targets(target_info = {
                                        "assessment": "Clinical Measures",
                                        "domain": None,
                                        "measure": "Clinical Diagnosis Demographics",
                                        "target_column": target_col,
                                        "transform": None,
                                        "outname":target_col
                                        })
    # get participant ids
    participants = make_dataset.get_participants(
                                split='all', 
                                disorders=['ADHD-Combined Type', 
                                            'ADHD-Inattentive Type', 
                                            'ADHD-Hyperactive_Impulsive_Type', 
                                            'No_Diagnosis_Given']
                                )

    features_target = features.merge(
                                targets, on='Identifiers').merge(
                                participants, on='Identifiers')
    # get feature names
    feature_names = [col for col in features_target.columns if target_col not in col]
    
    df_concat = pd.concat([features_target[[target_col]], features_target[feature_names]], axis=1)

    return df_concat

## RUN THIS CELL ## 
from hbn.constants import Defaults
from hbn.data import make_dataset

# make_dataset.make_train_test_splits(out_dir=Defaults.MODEL_SPEC_DIR)

# get summary of clinical diagnosis + other demographics
dx = make_dataset.make_summary(save=False)
dx = make_dataset._add_race_ethnicity(dataframe=dx)

# filter for adhd
adhd_only = ['ADHD-Combined Type', 'ADHD-Hyperactive/Impulsive Type', 'ADHD-Inattentive Type', 'No Diagnosis Given']
dx = dx[dx['DX_01'].isin(adhd_only)]

# get data from intake interview and merge with clinical summary
df_intake = get_data()
df_intake = df_intake.merge(dx[['Sex', 'Age_bracket', 'PreInt_Demos_Fam,Child_Race_cat','Identifiers']], on='Identifiers')

The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!
reading /Users/maedbhking/Documents/healthy_brain_network/data/raw/phenotype/Parent_Measures/Interview_of_Emotional_and_Psychological_Function/Intake_Interview.csv into dataframe
reading /Users/maedbhking/Documents/healthy_brain_network/data/raw/phenotype/Clinical_Measures/Clinical_Diagnosis_Demographics.csv into dataframe

What is the race breakdown of children with adhd?

demographics = dx.groupby(['DX_01', 'PreInt_Demos_Fam,Child_Race_cat']
                          ).agg({'Identifiers': 'count',
                                }).reset_index()

fig = px.bar(demographics, x="DX_01", y="Identifiers", color="PreInt_Demos_Fam,Child_Race_cat")
fig.show()

What is the sex breakdown of children with adhd?

largest M/F ratios are combined and hyperactive, but not inattentive

demographics = dx.groupby(['DX_01', 'Sex']
                          ).agg({'Identifiers': 'count',
                                }).reset_index()

fig = px.bar(demographics, x="DX_01", y="Identifiers", color="Sex")
fig.show()

What is the age breakdown of children with adhd?

demographics = dx.groupby(['DX_01', 'Age_bracket']
                          ).agg({'Identifiers': 'count',
                                }).reset_index()

fig = px.bar(demographics, x="DX_01", y="Identifiers", color="Age_bracket")
fig.show()

How many comorbidities do children with adhd have?

Girls have more combordities on average than boys (except for impulsive type)

demographics = dx.groupby(['DX_01', 'Sex']
                          ).agg({'comorbidities': 'mean',
                                }).reset_index()

fig = px.bar(demographics, x="DX_01", y="comorbidities", color='Sex')
fig.show()

How many comorbidities do children with adhd have?

Childrenn over10 have more combordities on average than children under10

demographics = dx.groupby(['DX_01', 'Age_bracket']
                          ).agg({'comorbidities': 'mean',
                                }).reset_index()

fig = px.bar(demographics, x="DX_01", y="comorbidities", color='Age_bracket')
fig.show()

get data from parent intake interview

What % of children with adhd have parents with adhd?

from hbn.data import make_dataset

### INTAKE INTERVIEW ###

tmp = df_intake.groupby('DX_01').agg({'PreInt_FamHx,m_adhd': 'sum', 
                               'PreInt_FamHx,f_adhd': 'sum', 
                               'PreInt_FamHx,s_adhd': 'sum',
                               'PreInt_FamHx,f_autism':'sum',
                               'PreInt_FamHx,m_autism':'sum',
                               'PreInt_FamHx,s_autism':'sum',
                               'Identifiers': 'count'})
tmp

	PreInt_FamHx,m_adhd	PreInt_FamHx,f_adhd	PreInt_FamHx,s_adhd	PreInt_FamHx,f_autism	PreInt_FamHx,m_autism	PreInt_FamHx,s_autism	Identifiers
DX_01
ADHD-Combined Type	3.0	6.0	4.0	0.0	0.0	3.0	753
ADHD-Hyperactive/Impulsive Type	0.0	0.0	0.0	0.0	0.0	1.0	106
ADHD-Inattentive Type	0.0	1.0	2.0	0.0	0.0	2.0	680
No Diagnosis Given	1.0	4.0	4.0	0.0	0.0	1.0	332

df_intake.columns.str.split(',').str.get(0).unique()

Index(['DX_01', 'Identifiers', 'PreInt_Demos_Fam', 'PreInt_Demos_Home',
       'PreInt_DevHx', 'PreInt_EduHx', 'PreInt_FamHx', 'PreInt_FamHx_RDC',
       'PreInt_Lang', 'PreInt_TxHx', 'Sex', 'Age_bracket'],
      dtype='object')

Previous diagnoses

Many children with adhd have been previously diagnosed with a psych/learning disorder

colors = ['Sex', 'PreInt_Demos_Fam,Child_Race_cat']

for color in colors:
    tmp = df_intake.groupby(['DX_01', color]).agg({'PreInt_TxHx,Past_DX': 'sum',
                                   'Identifiers': 'count'}
                                        ).reset_index()
    tmp['percent'] = tmp['PreInt_TxHx,Past_DX'] / tmp['Identifiers']

    fig = px.bar(tmp, x="DX_01", y='percent', color=color,orientation='v', barmode="group")
    fig.show()

about 25% of children with adhd are currently taking psych medication

colors = ['Sex', 'PreInt_Demos_Fam,Child_Race_cat']

for color in colors:
    tmp = df_intake.groupby(['DX_01',color]).agg({'PreInt_TxHx,psych_meds_cur': 'sum',
                                            'PreInt_TxHx,psych_meds_past': 'sum',
                                   'Identifiers': 'count'}
                                        ).reset_index()
    tmp['percent_curr'] = tmp['PreInt_TxHx,psych_meds_cur'] / tmp['Identifiers']
    tmp['percent_past'] = tmp['PreInt_TxHx,psych_meds_past'] / tmp['Identifiers']

    fig = px.bar(tmp, x="DX_01", y='percent_curr', color=color,orientation='v', barmode="group")
    fig.show()

few children had immunication reactions

colors = ['Sex', 'PreInt_Demos_Fam,Child_Race_cat']
var = 'immunization_reaction'

for color in colors:
    tmp = df_intake.groupby(['DX_01',color]).agg({f'PreInt_TxHx,{var}': 'sum',
                                   'Identifiers': 'count'}
                                        ).reset_index()
    tmp['percent'] = tmp[f'PreInt_TxHx,{var}'] / tmp['Identifiers']

    fig = px.bar(tmp, x="DX_01", y='percent', color=color,orientation='v', barmode="group")
    fig.show()

10-20% of children have had food allergies

colors = ['Sex', 'PreInt_Demos_Fam,Child_Race_cat']
var = 'food_allergy'

for color in colors:
    tmp = df_intake.groupby(['DX_01',color]).agg({f'PreInt_TxHx,{var}': 'sum',
                                   'Identifiers': 'count'}
                                        ).reset_index()
    tmp['percent'] = tmp[f'PreInt_TxHx,{var}'] / tmp['Identifiers']

    fig = px.bar(tmp, x="DX_01", y='percent', color=color,orientation='v', barmode="group")
    fig.show()

Most children have attended an average of 2 schools

colors = ['Sex', 'Age_bracket']
var = 'number_schools'

for color in colors:
    tmp = df_intake.groupby(['DX_01',color]).agg({f'PreInt_EduHx,{var}': 'mean',
                                   'Identifiers': 'count'}
                                        ).reset_index()
    tmp['percent'] = tmp[f'PreInt_EduHx,{var}'] / tmp['Identifiers']

    fig = px.bar(tmp, x="DX_01", y=f'PreInt_EduHx,{var}', color=color,orientation='v', barmode="group")
    fig.update_yaxes(range=[1,4])
    fig.show()

50-60% of children have an individualized education plan

more children over10 with hyperactive/impulsive have an IEP but more children under10 with inattentive have an IEP

colors = ['Sex', 'Age_bracket']
var = 'IEP'

for color in colors:
    tmp = df_intake.groupby(['DX_01',color]).agg({f'PreInt_EduHx,{var}': 'sum',
                                   'Identifiers': 'count'}
                                        ).reset_index()
    tmp['percent'] = tmp[f'PreInt_EduHx,{var}'] / tmp['Identifiers']

    fig = px.bar(tmp, x="DX_01", y='percent', color=color,orientation='v', barmode="group")
    fig.update_yaxes(range=[.1,.7])
    fig.show()

learning disability?

few children with adhd diagnosed with a learning disability

colors = ['Sex', 'PreInt_Demos_Fam,Child_Race_cat', 'Age_bracket']
var = 'learning_disability'

for color in colors:
    tmp = df_intake.groupby(['DX_01',color]).agg({f'PreInt_EduHx,{var}': 'sum',
                                   'Identifiers': 'count'}
                                        ).reset_index()
    tmp['percent'] = tmp[f'PreInt_EduHx,{var}'] / tmp['Identifiers']

    fig = px.bar(tmp, x="DX_01", y='percent', color=color,orientation='v', barmode="group")
    fig.show()

neuropsych testing? pretty low numbers …

colors = ['Sex', 'PreInt_Demos_Fam,Child_Race_cat', 'Age_bracket']
var = 'NeuroPsych'

for color in colors:
    tmp = df_intake.groupby(['DX_01',color]).agg({f'PreInt_EduHx,{var}': 'sum',
                                   'Identifiers': 'count'}
                                        ).reset_index()
    tmp['percent'] = tmp[f'PreInt_EduHx,{var}'] / tmp['Identifiers']

    fig = px.bar(tmp, x="DX_01", y='percent', color=color,orientation='v', barmode="group")
    fig.show()

Recent grades (1-excellent, 5-failing)

colors = ['Sex','Age_bracket']
var = 'recent_grades'

for color in colors:
    tmp = df_intake.groupby(['DX_01',color]).agg({f'PreInt_EduHx,{var}': 'mean',
                                   'Identifiers': 'count'}
                                        ).reset_index()
    tmp['percent'] = tmp[f'PreInt_EduHx,{var}'] / tmp['Identifiers']

    fig = px.bar(tmp, x="DX_01", y=f'PreInt_EduHx,{var}', color=color,orientation='v', barmode="group")
    fig.update_yaxes(range=[1,3])
    fig.show()

number of friends

colors = ['Sex', 'PreInt_Demos_Fam,Child_Race_cat', 'Age_bracket']
var = 'number_friends'

for color in colors:
    tmp = df_intake.groupby(['DX_01',color]).agg({f'PreInt_EduHx,{var}': 'mean',
                                   'Identifiers': 'count'}
                                        ).reset_index()
    tmp['percent'] = tmp[f'PreInt_EduHx,{var}'] / tmp['Identifiers']

    fig = px.bar(tmp, x="DX_01", y=f'PreInt_EduHx,{var}', color=color,orientation='v', barmode="group")
    fig.update_yaxes(range=[1,4])
    fig.show()

outside school tutoring

40% of children with inattentive type adhd have outside tutoring

colors = ['Sex', 'PreInt_Demos_Fam,Child_Race_cat', 'Age_bracket']
var = 'tutor'

for color in colors:
    tmp = df_intake.groupby(['DX_01',color]).agg({f'PreInt_EduHx,{var}': 'sum',
                                   'Identifiers': 'count'}
                                        ).reset_index()
    tmp['percent'] = tmp[f'PreInt_EduHx,{var}'] / tmp['Identifiers']

    fig = px.bar(tmp, x="DX_01", y='percent', color=color,orientation='v', barmode="group")
    fig.show()

start of puberty

girls with adhd are starting puberty a lot earlier than boys - this tracks with children without a diagnosis. exception is boys with hyperactive adhd


colors = ['Sex', 'PreInt_Demos_Fam,Child_Race_cat']
var = 'puberty_age'

for color in colors:
    tmp = df_intake.groupby(['DX_01',color]).agg({f'PreInt_DevHx,{var}': 'mean',
                                   'Identifiers': 'count'}
                                        ).reset_index()
    tmp['percent'] = tmp[f'PreInt_DevHx,{var}'] / tmp['Identifiers']

    fig = px.bar(tmp, x="DX_01", y=f'PreInt_DevHx,{var}', color=color,orientation='v', barmode="group")
    fig.update_yaxes(range=[8,12])
    fig.show()

girls with hyperactive/impulsive adhd are starting menstruation earlier than other subtypes

var = 'menstruation_age'

tmp = df_intake.groupby(['DX_01']).agg({f'PreInt_DevHx,{var}': 'mean',
                               'Identifiers': 'count'}
                                    ).reset_index()

fig = px.bar(tmp, x="DX_01", y=f'PreInt_DevHx,{var}',orientation='v', barmode="group")
fig.update_yaxes(range=[10,12])
fig.show()