Intergenerational transmission of genetic risk for hyperactivity and inattention. Vertical transmission or genetic nurture?

Author

Ivan Voronin

This is the code from the manuscript submitted to Journal Psychology and Psychiatry:

Intergenerational transmission of genetic risk for hyperactivity and inattention. Vertical transmission or genetic nurture?

Ivan Voronin, Isabelle Ouellet-Morin, Amélie Petitclerc, Geneviève Morneau-Vaillancourt, Mara Brendgen, Ginette Dione, Frank Vitaro, Michel Boivin

Libraries

Code
## Libraries
library(haven)
library(dplyr)
library(glue)
library(knitr)
library(kableExtra)
library(tidyr)
library(ggplot2)
library(extrafont)
library(grid)
library(gridExtra)
library(OpenMx)
# devtools::install_github('ivanvoronin/mlth.data.frame')
# devtools::install_github('ivanvoronin/TwinAnalysis')
library(mlth.data.frame)
library(TwinAnalysis)
library(stringr)
library(magrittr)
library(psych)
library(tibble)
Code
## White background for combined plots
white_bg <- function(gg) {
  grobTree(
    rectGrob(
      gp = gpar(
        fill = 'white', 
        lwd = 0)), 
    gg) %>%
    invisible
}

## Pull out the legend of a plot
g_legend <- function(p){
  tmp <- ggplot_gtable(ggplot_build(p))
  leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
  tmp$grobs[[leg]]
}

Data preparation

Code
## Reading the phenotype data
pheno <- read_spss(
  'data/phenotypic.sav'
)

pheno <- pheno %>%
  transmute(
    id = id,
    fam_id = nofamill,
    gender = factor(
      sexe,
      levels = 2:1,
      labels = c('Female', 'Male')
    ),
    zygosity = factor(
      zygotie,
      levels = 1:2,
      labels = c('MZ', 'DZ')
    ),
    
    # Inattention and hyperactivity
    IH_hyp_18to60m = rowMeans(
      data.frame(
        bbelthy, cbelthy, 
        dbelthy, ebelthy
      ),
      na.rm = TRUE
    ),
    IH_hyp_KtoG6 = rowMeans(
      data.frame(
        fbplhyp, gbplhyp,  
        ibplhyp, jbplhyp,
        kbplhyp
      ),
      na.rm = TRUE
    ),
    IH_ina_18to60m = rowMeans(
      data.frame(
        bbeltina, cbeltina, 
        dbeltina, ebeltina
      ),
      na.rm = TRUE
    ),
    IH_ina_KtoG6 = rowMeans(
      data.frame(
        fbplina, gbplina,  
        ibplina, jbplina,
        kbplina
      ),
      na.rm = TRUE
    )
  ) %>%
  select(
    id, fam_id, gender, zygosity,
    IH_hyp_18to60m,
    IH_ina_18to60m,
    IH_hyp_KtoG6,
    IH_ina_KtoG6
  )
Code
## Reading the PGS data
## EA-PGS
edu_prs <- read.table(
  'data/EA-PGS.profile', 
  header = TRUE
) %>%
  filter(
    grepl('EJNQ', FID)
  ) %>%
  filter(
    !(
      grepl('wrongFather', FID) | 
        grepl('Klinefelter', FID) | 
        grepl('dupl', IID)
    )
  ) %>%
  transmute(
    ID = as.numeric(gsub('I', '', IID)),
    Edu_PRS = SCORESUM,
    fam_id = as.numeric(gsub('EJNQ.*_', '', FID))
  )

## ADHD-PGS
adhd_prs <- read.table(
  'data/ADHD-PGS.profile',
  header = TRUE
) %>%
  filter(
    grepl('EJNQ', FID)
  ) %>%
  filter(
    !(
      grepl('wrongFather', FID) |
        grepl('Klinefelter', FID) |
        grepl('Klnefelter', FID) |
        grepl('dupl', IID)
    )
  ) %>%
  transmute(
    ID = as.numeric(gsub('I', '', IID)),
    ADHD_PRS = PRScs_ADHD_phi01,
    fam_id = as.numeric(gsub('EJNQ.*_', '', FID))
  )

## Reading the ID and zygosity variables  
ids <- read_spss(
  'data/zygosity.sav'
) %>%
  rename(
    zygosity = zygotie
  )

## Reading genetic principle components
pca <- read.table(
  'data/genetic_PC.eigenvec',
  col.names = c('fam_id', 'id', glue('PC{1:20}'))
) %>%
  filter(
    grepl('EJNQ', fam_id)
  ) %>%
  filter(
    !(
      grepl('wrongFather', fam_id) | 
        grepl('Klinefelter', fam_id) | 
        grepl('dupl', id)
    )
  ) %>%
  mutate(
    fam_id = as.numeric(gsub('EJNQ.*_', '', fam_id)),
    id = as.numeric(gsub('I', '', id))
  )

## Combining PGS data
all_prs <- ids %>%
  merge(
    edu_prs,
    by.x = 'id',
    by.y = 'ID',
    all = TRUE
  ) %>%
  merge(
    adhd_prs,
    by.x = 'id',
    by.y = 'ID',
    all = TRUE
  ) %>%
  mutate(
    fam_id = id %/% 100,
    fam_mem = sapply(
      id %% 10,
      switch,
      'mother', 'twin1', 
      'twin2', 'father')
  ) %>%
  select(
    id, fam_id, fam_mem, 
    zygosity, Edu_PRS, ADHD_PRS
  ) %>%
  filter(
    !is.na(Edu_PRS) | !is.na(ADHD_PRS)
  ) %>% 
  merge(
    pca,
    by = c('id', 'fam_id')
  )

## Adjusting PGS for the first ten principal components
all_prs$Edu_PRS_adj <- lm(
  Edu_PRS ~ PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10,
  data = all_prs, 
  na.action = na.omit
) %>%
  residuals

all_prs$ADHD_PRS_adj <- lm(
  ADHD_PRS ~ PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10,
  data = all_prs,
  na.action = na.exclude
) %>%
  residuals

## Selecting only necessary variables
all_prs <- all_prs %>%
  select(
    id, fam_id, fam_mem, 
    Edu_PRS = Edu_PRS_adj,
    ADHD_PRS = ADHD_PRS_adj
  ) %>%
  merge(
    unique(
      pheno[, c('fam_id', 'zygosity')]
    ),
    by = 'fam_id',
    all.x = TRUE,
    all.y = FALSE
  )

## Filling in the PGS of MZ twins
for (i in unique(all_prs$fam_id)) {
  row_i <- all_prs %>%
    filter(
      fam_id == i & 
        fam_mem %in% c('twin1', 'twin2')
    )
  
  if (nrow(row_i) == 1 && 
      !is.na(row_i$zygosity) && 
      row_i$zygosity == 'MZ') {
    snd_row <- row_i
    snd_row$id <- 
      row_i$fam_id * 100 + 
      ifelse(row_i$fam_mem == 'twin1', 3, 2)
    snd_row$fam_mem <-
      ifelse(row_i$fam_mem == 'twin1', 'twin2', 'twin1')

    all_prs <- rbind(all_prs, snd_row)
  }
}

## Selecting parental PGS
prs_parents <- all_prs %>%
  filter(
    fam_mem %in% c('mother', 'father')
  ) %>%
  pivot_wider(
    id_cols = fam_id,
    names_from = fam_mem,
    values_from = c(Edu_PRS, ADHD_PRS)
  )

## Selecting twins' PGS and re-merging parental and twins' PGS
prs_twins <- all_prs %>%
  filter(
    fam_mem %in% c('twin1', 'twin2')  
  ) %>%
  select(
    fam_id, id, fam_mem,
    Edu_PRS_tw = Edu_PRS,
    ADHD_PRS_tw = ADHD_PRS
  ) %>%
  merge(
    prs_parents,
    by = 'fam_id',
    all.x = TRUE,
    all.y = FALSE
  )
Code
## Merging hyperactivity and inattention scores and PGS
data <-
  merge(
    prs_twins,
    pheno,
    by = c('id', 'fam_id'),
    all.x = TRUE,
    all.y = FALSE
  ) %>%
  rename(
    Hyp1 = IH_hyp_18to60m,
    Hyp2 = IH_hyp_KtoG6,
    Ina1 = IH_ina_18to60m,
    Ina2 = IH_ina_KtoG6
  ) %>%
  filter(
    !is.na(zygosity)
  ) 

## Short and long variable names
var_names <- c(
  'Hyp1',
  'Ina1',
  'Hyp2',
  'Ina2'
)

long_var_names <- c(
  Hyp1 = 'Hyperactivity, early childhood',
  Ina1 = 'Inattention, early childhood',
  Hyp2 = 'Hyperactivity, primary school',
  Ina2 = 'Inattention, primary school'
)
Code
## Generate a synthetic dataset of data

Preliminary analysis

Helper functions

Code
## Function that makes a table with descriptive statistics and correlations
descr_and_cor <- function(d) {
  k <- ncol(d)
  rn <- names(d)
  N <- sapply(d, function(x) length(na.omit(x)))
  M <- sapply(d, mean, na.rm = TRUE)
  SD <- sapply(d, sd, na.rm = TRUE)
  Med <- sapply(d, median, na.rm = TRUE)
  Min <- sapply(d, min, na.rm = TRUE)
  Max <- sapply(d, max, na.rm = TRUE)
  
  Cors <- Hmisc::rcorr(as.matrix(d))
  cors <- Map(
    function(r, p) {
      sprintf(
        '%0.3f (%0.3f)',
        r,
        p
      )
    },
    Cors$r,
    Cors$P
  )
  
  cors <- 
    matrix(
      unlist(cors),
      nrow = k,
      dimnames = list(
        rn, 1:k
      )
    )
  
  cors[upper.tri(cors, diag = TRUE)] <- NA
  cors <- cors[, -k]
  
  data.frame(
    n = N,
    M = M,
    SD = SD,
    median = Med,
    range = paste0(
      round(Min, 2),
      '-',
      round(Max, 2)
    ),
    cors,
    row.names = paste0(1:k, '. ', rn),
    check.names = FALSE
  )
}
Code
## Functions that plot a hystogram with density

## Return adjusted density
my_dens <- function(x, D, bin_width, ...) {
  dens <- density(D, ...)
  y <- approx(dens, xout = x)$y
  y * bin_width * dens$n
}

## Return a histogram with density
my_hist <- function(d, v, title = v) {
  x <- na.omit(d[, v, drop = TRUE])
  xl <- min(x)
  xu <- max(x)
  nb <- 15
  
  hist_col <- '#8B9EA1'
  dens_col <- '#46515A'
  quant_col <- '#46515A'
  
  quant <- quantile(
    x, 
    c(0.25, 0.5, 0.75),
    na.rm = TRUE
  )

  ggplot(d, aes_string(x = v)) +
    geom_histogram(
      breaks = seq(xl, xu, len = nb + 1),
      color = NA,
      fill = hist_col,
      alpha = 0.6
    ) +
    geom_vline(
      xintercept = quant,
      linetype = 'dashed',
      size = 0.8,
      color = quant_col
    ) +
    annotate(
      geom = 'text',
      x = quant,
      y = 0,
      label = c('25%', '50%', '75%'),
      hjust = -0.2,
      vjust = -0.4,
      alpha = 0.6,
      size = 3.5
    ) +
    stat_function(
      geom = 'line',
      fun = my_dens,
      args = list(
        D = x,
        bin_width = (xu - xl) / nb
      ),
      color = dens_col,
      alpha = 1,
      size = 0.8
    ) +
    labs(
      title = title
    ) +
    theme_classic() +
    theme(
      axis.title.x = element_text(
        margin = margin(
          t = 10,
          b = 10
        ),
        size = 15
      )
    )
}
Code
cor_table <- function(d, x, y, ...) {
  tab <- list()
  for (j in y) {
    column <- data.frame()
    for (i in x) {
      Cor <- cor.test(
        d[, i],
        d[, j]      )
      column <- rbind(
        column,
        data.frame(
          r = sprintf('%0.3f', Cor$estimate),
          n = Cor$parameter + 2,
          SE = sprintf(
            '%0.3f',
            sqrt((1 - Cor$estimate^2)/Cor$parameter)
          ),
          `96% CI` = sprintf(
            '[%0.3f; %0.3f]',
            Cor$conf.int[1],
            Cor$conf.int[2]
          ),
          p = sprintf('%0.3f', Cor$p.value),
          check.names = FALSE
        )
      )
    }
    tab[[j]] <- column
  }
  
  as.mlth.data.frame(
    tab,
    row.names = x
  )
}

Descriptive analysis

Code
## Descriptive statistics of hyperactivity and inattention
data %>%
  select(
    all_of(var_names)
  ) %>%
  rename_at(
    all_of(var_names),
    ~ long_var_names[.]
  ) %>%
  descr_and_cor %>%
  kable2(
    register_output = TRUE,
    caption = 'Descriptive statistics for hyperactivity and inattention (both twins)',
    footnote = 'Pearson correlation with standard error is presented on the right',
    align = 'c',
    digits = 2
  ) %>%
  kable_styling
Descriptive statistics for hyperactivity and inattention (both twins)
n M SD median range 1 2 3
1. Hyperactivity, early childhood 724 0.83 0.40 0.80 0-2
2. Inattention, early childhood 724 0.63 0.35 0.67 0-2 0.703 (0.000)
3. Hyperactivity, primary school 722 0.45 0.45 0.32 0-2 0.309 (0.000) 0.263 (0.000)
4. Inattention, primary school 722 0.77 0.55 0.67 0-2 0.244 (0.000) 0.259 (0.000) 0.723 (0.000)
Note:
Pearson correlation with standard error is presented on the right
Code
## Sex differences

lapply(
  setNames(nm = var_names),
  function(v) {
    means <- tapply(
      data[, v],
      data$gender,
      function(x)
        sprintf(
          '%0.2f (%0.2f)',
          mean(x, na.rm = TRUE),
          sd(x, na.rm = TRUE)
        )
    )
    tt <- t.test(
      data[, v] ~ data$gender
    )
    lev <- car::leveneTest(
      data[, v] ~ data$gender
    )
    
    mlth.data.frame(
      `M (SD)` = as.list(means),
      `t-test` = list(
        t = tt$statistic,
        df = tt$parameter,
        p = tt$p.value
      ),
      `Levene's test` = list(
        F = lev[['F value']][1],
        df = paste(lev$Df, collapse = ','),
        p = lev[['Pr(>F)']][1]
      )
    )
  }
) %>%
  do.call('rbind', .) %>%
  `row.names<-`(long_var_names) %>%
  kable2(
    register_output = TRUE,
    caption = 'Sex differences in hyperactivity and inattention',
    align = 'c',
    digits = 3
  ) %>%
  kable_styling
Sex differences in hyperactivity and inattention
M (SD)
t-test
Levene's test
Female Male t df p F df p
Hyperactivity, early childhood 0.80 (0.40) 0.87 (0.39) -2.627 719.713 0.009 0.219 1,722 0.640
Inattention, early childhood 0.59 (0.34) 0.67 (0.36) -3.217 721.302 0.001 0.625 1,722 0.430
Hyperactivity, primary school 0.31 (0.36) 0.60 (0.49) -9.158 660.887 0.000 41.195 1,720 0.000
Inattention, primary school 0.62 (0.50) 0.92 (0.56) -7.515 708.589 0.000 10.041 1,720 0.002
Code
## Distribution of untransformed ADHD scores

Hs1 <- list()

for (i in var_names) {
  Hs1[[i]] <- data %>%
    my_hist(
      i,
      title = long_var_names[i]
    )
}

grid.arrange(
  grobs = Hs1,
  ncol = 2
) %>%
  white_bg