Intergenerational transmission of genetic risk for hyperactivity and inattention. Vertical transmission or genetic nurture?

Author

Ivan Voronin

This is the code from the manuscript submitted to Journal Psychology and Psychiatry:

Intergenerational transmission of genetic risk for hyperactivity and inattention. Vertical transmission or genetic nurture?

Ivan Voronin, Isabelle Ouellet-Morin, Amélie Petitclerc, Geneviève Morneau-Vaillancourt, Mara Brendgen, Ginette Dione, Frank Vitaro, Michel Boivin

Libraries

Code

## Libraries
library(haven)
library(dplyr)
library(glue)
library(knitr)
library(kableExtra)
library(tidyr)
library(ggplot2)
library(extrafont)
library(grid)
library(gridExtra)
library(OpenMx)
# devtools::install_github('ivanvoronin/mlth.data.frame')
# devtools::install_github('ivanvoronin/TwinAnalysis')
library(mlth.data.frame)
library(TwinAnalysis)
library(stringr)
library(magrittr)
library(psych)
library(tibble)

Code

## White background for combined plots
white_bg <- function(gg) {
  grobTree(
    rectGrob(
      gp = gpar(
        fill = 'white', 
        lwd = 0)), 
    gg) %>%
    invisible
}

## Pull out the legend of a plot
g_legend <- function(p){
  tmp <- ggplot_gtable(ggplot_build(p))
  leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
  tmp$grobs[[leg]]
}

Data preparation

Code

## Reading the phenotype data
pheno <- read_spss(
  'data/phenotypic.sav'
)

pheno <- pheno %>%
  transmute(
    id = id,
    fam_id = nofamill,
    gender = factor(
      sexe,
      levels = 2:1,
      labels = c('Female', 'Male')
    ),
    zygosity = factor(
      zygotie,
      levels = 1:2,
      labels = c('MZ', 'DZ')
    ),
    
    # Inattention and hyperactivity
    IH_hyp_18to60m = rowMeans(
      data.frame(
        bbelthy, cbelthy, 
        dbelthy, ebelthy
      ),
      na.rm = TRUE
    ),
    IH_hyp_KtoG6 = rowMeans(
      data.frame(
        fbplhyp, gbplhyp,  
        ibplhyp, jbplhyp,
        kbplhyp
      ),
      na.rm = TRUE
    ),
    IH_ina_18to60m = rowMeans(
      data.frame(
        bbeltina, cbeltina, 
        dbeltina, ebeltina
      ),
      na.rm = TRUE
    ),
    IH_ina_KtoG6 = rowMeans(
      data.frame(
        fbplina, gbplina,  
        ibplina, jbplina,
        kbplina
      ),
      na.rm = TRUE
    )
  ) %>%
  select(
    id, fam_id, gender, zygosity,
    IH_hyp_18to60m,
    IH_ina_18to60m,
    IH_hyp_KtoG6,
    IH_ina_KtoG6
  )

Code

## Reading the PGS data
## EA-PGS
edu_prs <- read.table(
  'data/EA-PGS.profile', 
  header = TRUE
) %>%
  filter(
    grepl('EJNQ', FID)
  ) %>%
  filter(
    !(
      grepl('wrongFather', FID) | 
        grepl('Klinefelter', FID) | 
        grepl('dupl', IID)
    )
  ) %>%
  transmute(
    ID = as.numeric(gsub('I', '', IID)),
    Edu_PRS = SCORESUM,
    fam_id = as.numeric(gsub('EJNQ.*_', '', FID))
  )

## ADHD-PGS
adhd_prs <- read.table(
  'data/ADHD-PGS.profile',
  header = TRUE
) %>%
  filter(
    grepl('EJNQ', FID)
  ) %>%
  filter(
    !(
      grepl('wrongFather', FID) |
        grepl('Klinefelter', FID) |
        grepl('Klnefelter', FID) |
        grepl('dupl', IID)
    )
  ) %>%
  transmute(
    ID = as.numeric(gsub('I', '', IID)),
    ADHD_PRS = PRScs_ADHD_phi01,
    fam_id = as.numeric(gsub('EJNQ.*_', '', FID))
  )

## Reading the ID and zygosity variables  
ids <- read_spss(
  'data/zygosity.sav'
) %>%
  rename(
    zygosity = zygotie
  )

## Reading genetic principle components
pca <- read.table(
  'data/genetic_PC.eigenvec',
  col.names = c('fam_id', 'id', glue('PC{1:20}'))
) %>%
  filter(
    grepl('EJNQ', fam_id)
  ) %>%
  filter(
    !(
      grepl('wrongFather', fam_id) | 
        grepl('Klinefelter', fam_id) | 
        grepl('dupl', id)
    )
  ) %>%
  mutate(
    fam_id = as.numeric(gsub('EJNQ.*_', '', fam_id)),
    id = as.numeric(gsub('I', '', id))
  )

## Combining PGS data
all_prs <- ids %>%
  merge(
    edu_prs,
    by.x = 'id',
    by.y = 'ID',
    all = TRUE
  ) %>%
  merge(
    adhd_prs,
    by.x = 'id',
    by.y = 'ID',
    all = TRUE
  ) %>%
  mutate(
    fam_id = id %/% 100,
    fam_mem = sapply(
      id %% 10,
      switch,
      'mother', 'twin1', 
      'twin2', 'father')
  ) %>%
  select(
    id, fam_id, fam_mem, 
    zygosity, Edu_PRS, ADHD_PRS
  ) %>%
  filter(
    !is.na(Edu_PRS) | !is.na(ADHD_PRS)
  ) %>% 
  merge(
    pca,
    by = c('id', 'fam_id')
  )

## Adjusting PGS for the first ten principal components
all_prs$Edu_PRS_adj <- lm(
  Edu_PRS ~ PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10,
  data = all_prs, 
  na.action = na.omit
) %>%
  residuals

all_prs$ADHD_PRS_adj <- lm(
  ADHD_PRS ~ PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10,
  data = all_prs,
  na.action = na.exclude
) %>%
  residuals

## Selecting only necessary variables
all_prs <- all_prs %>%
  select(
    id, fam_id, fam_mem, 
    Edu_PRS = Edu_PRS_adj,
    ADHD_PRS = ADHD_PRS_adj
  ) %>%
  merge(
    unique(
      pheno[, c('fam_id', 'zygosity')]
    ),
    by = 'fam_id',
    all.x = TRUE,
    all.y = FALSE
  )

## Filling in the PGS of MZ twins
for (i in unique(all_prs$fam_id)) {
  row_i <- all_prs %>%
    filter(
      fam_id == i & 
        fam_mem %in% c('twin1', 'twin2')
    )
  
  if (nrow(row_i) == 1 && 
      !is.na(row_i$zygosity) && 
      row_i$zygosity == 'MZ') {
    snd_row <- row_i
    snd_row$id <- 
      row_i$fam_id * 100 + 
      ifelse(row_i$fam_mem == 'twin1', 3, 2)
    snd_row$fam_mem <-
      ifelse(row_i$fam_mem == 'twin1', 'twin2', 'twin1')

    all_prs <- rbind(all_prs, snd_row)
  }
}

## Selecting parental PGS
prs_parents <- all_prs %>%
  filter(
    fam_mem %in% c('mother', 'father')
  ) %>%
  pivot_wider(
    id_cols = fam_id,
    names_from = fam_mem,
    values_from = c(Edu_PRS, ADHD_PRS)
  )

## Selecting twins' PGS and re-merging parental and twins' PGS
prs_twins <- all_prs %>%
  filter(
    fam_mem %in% c('twin1', 'twin2')  
  ) %>%
  select(
    fam_id, id, fam_mem,
    Edu_PRS_tw = Edu_PRS,
    ADHD_PRS_tw = ADHD_PRS
  ) %>%
  merge(
    prs_parents,
    by = 'fam_id',
    all.x = TRUE,
    all.y = FALSE
  )

Code

## Merging hyperactivity and inattention scores and PGS
data <-
  merge(
    prs_twins,
    pheno,
    by = c('id', 'fam_id'),
    all.x = TRUE,
    all.y = FALSE
  ) %>%
  rename(
    Hyp1 = IH_hyp_18to60m,
    Hyp2 = IH_hyp_KtoG6,
    Ina1 = IH_ina_18to60m,
    Ina2 = IH_ina_KtoG6
  ) %>%
  filter(
    !is.na(zygosity)
  ) 

## Short and long variable names
var_names <- c(
  'Hyp1',
  'Ina1',
  'Hyp2',
  'Ina2'
)

long_var_names <- c(
  Hyp1 = 'Hyperactivity, early childhood',
  Ina1 = 'Inattention, early childhood',
  Hyp2 = 'Hyperactivity, primary school',
  Ina2 = 'Inattention, primary school'
)

Code

## Generate a synthetic dataset of data

Preliminary analysis

Helper functions

Code

## Function that makes a table with descriptive statistics and correlations
descr_and_cor <- function(d) {
  k <- ncol(d)
  rn <- names(d)
  N <- sapply(d, function(x) length(na.omit(x)))
  M <- sapply(d, mean, na.rm = TRUE)
  SD <- sapply(d, sd, na.rm = TRUE)
  Med <- sapply(d, median, na.rm = TRUE)
  Min <- sapply(d, min, na.rm = TRUE)
  Max <- sapply(d, max, na.rm = TRUE)
  
  Cors <- Hmisc::rcorr(as.matrix(d))
  cors <- Map(
    function(r, p) {
      sprintf(
        '%0.3f (%0.3f)',
        r,
        p
      )
    },
    Cors$r,
    Cors$P
  )
  
  cors <- 
    matrix(
      unlist(cors),
      nrow = k,
      dimnames = list(
        rn, 1:k
      )
    )
  
  cors[upper.tri(cors, diag = TRUE)] <- NA
  cors <- cors[, -k]
  
  data.frame(
    n = N,
    M = M,
    SD = SD,
    median = Med,
    range = paste0(
      round(Min, 2),
      '-',
      round(Max, 2)
    ),
    cors,
    row.names = paste0(1:k, '. ', rn),
    check.names = FALSE
  )
}

Code

## Functions that plot a hystogram with density

## Return adjusted density
my_dens <- function(x, D, bin_width, ...) {
  dens <- density(D, ...)
  y <- approx(dens, xout = x)$y
  y * bin_width * dens$n
}

## Return a histogram with density
my_hist <- function(d, v, title = v) {
  x <- na.omit(d[, v, drop = TRUE])
  xl <- min(x)
  xu <- max(x)
  nb <- 15
  
  hist_col <- '#8B9EA1'
  dens_col <- '#46515A'
  quant_col <- '#46515A'
  
  quant <- quantile(
    x, 
    c(0.25, 0.5, 0.75),
    na.rm = TRUE
  )

  ggplot(d, aes_string(x = v)) +
    geom_histogram(
      breaks = seq(xl, xu, len = nb + 1),
      color = NA,
      fill = hist_col,
      alpha = 0.6
    ) +
    geom_vline(
      xintercept = quant,
      linetype = 'dashed',
      size = 0.8,
      color = quant_col
    ) +
    annotate(
      geom = 'text',
      x = quant,
      y = 0,
      label = c('25%', '50%', '75%'),
      hjust = -0.2,
      vjust = -0.4,
      alpha = 0.6,
      size = 3.5
    ) +
    stat_function(
      geom = 'line',
      fun = my_dens,
      args = list(
        D = x,
        bin_width = (xu - xl) / nb
      ),
      color = dens_col,
      alpha = 1,
      size = 0.8
    ) +
    labs(
      title = title
    ) +
    theme_classic() +
    theme(
      axis.title.x = element_text(
        margin = margin(
          t = 10,
          b = 10
        ),
        size = 15
      )
    )
}

Code

cor_table <- function(d, x, y, ...) {
  tab <- list()
  for (j in y) {
    column <- data.frame()
    for (i in x) {
      Cor <- cor.test(
        d[, i],
        d[, j]      )
      column <- rbind(
        column,
        data.frame(
          r = sprintf('%0.3f', Cor$estimate),
          n = Cor$parameter + 2,
          SE = sprintf(
            '%0.3f',
            sqrt((1 - Cor$estimate^2)/Cor$parameter)
          ),
          `96% CI` = sprintf(
            '[%0.3f; %0.3f]',
            Cor$conf.int[1],
            Cor$conf.int[2]
          ),
          p = sprintf('%0.3f', Cor$p.value),
          check.names = FALSE
        )
      )
    }
    tab[[j]] <- column
  }
  
  as.mlth.data.frame(
    tab,
    row.names = x
  )
}

Descriptive analysis

Code

## Descriptive statistics of hyperactivity and inattention
data %>%
  select(
    all_of(var_names)
  ) %>%
  rename_at(
    all_of(var_names),
    ~ long_var_names[.]
  ) %>%
  descr_and_cor %>%
  kable2(
    register_output = TRUE,
    caption = 'Descriptive statistics for hyperactivity and inattention (both twins)',
    footnote = 'Pearson correlation with standard error is presented on the right',
    align = 'c',
    digits = 2
  ) %>%
  kable_styling

Descriptive statistics for hyperactivity and inattention (both twins)
	n	M	SD	median	range	1	2	3
1. Hyperactivity, early childhood	724	0.83	0.40	0.80	0-2
2. Inattention, early childhood	724	0.63	0.35	0.67	0-2	0.703 (0.000)
3. Hyperactivity, primary school	722	0.45	0.45	0.32	0-2	0.309 (0.000)	0.263 (0.000)
4. Inattention, primary school	722	0.77	0.55	0.67	0-2	0.244 (0.000)	0.259 (0.000)	0.723 (0.000)
Note:
Pearson correlation with standard error is presented on the right

Code

## Sex differences

lapply(
  setNames(nm = var_names),
  function(v) {
    means <- tapply(
      data[, v],
      data$gender,
      function(x)
        sprintf(
          '%0.2f (%0.2f)',
          mean(x, na.rm = TRUE),
          sd(x, na.rm = TRUE)
        )
    )
    tt <- t.test(
      data[, v] ~ data$gender
    )
    lev <- car::leveneTest(
      data[, v] ~ data$gender
    )
    
    mlth.data.frame(
      `M (SD)` = as.list(means),
      `t-test` = list(
        t = tt$statistic,
        df = tt$parameter,
        p = tt$p.value
      ),
      `Levene's test` = list(
        F = lev[['F value']][1],
        df = paste(lev$Df, collapse = ','),
        p = lev[['Pr(>F)']][1]
      )
    )
  }
) %>%
  do.call('rbind', .) %>%
  `row.names<-`(long_var_names) %>%
  kable2(
    register_output = TRUE,
    caption = 'Sex differences in hyperactivity and inattention',
    align = 'c',
    digits = 3
  ) %>%
  kable_styling

Sex differences in hyperactivity and inattention
	M (SD)		t-test			Levene's test
	Female	Male	t	df	p	F	df	p
Hyperactivity, early childhood	0.80 (0.40)	0.87 (0.39)	-2.627	719.713	0.009	0.219	1,722	0.640
Inattention, early childhood	0.59 (0.34)	0.67 (0.36)	-3.217	721.302	0.001	0.625	1,722	0.430
Hyperactivity, primary school	0.31 (0.36)	0.60 (0.49)	-9.158	660.887	0.000	41.195	1,720	0.000
Inattention, primary school	0.62 (0.50)	0.92 (0.56)	-7.515	708.589	0.000	10.041	1,720	0.002

Code

## Distribution of untransformed ADHD scores

Hs1 <- list()

for (i in var_names) {
  Hs1[[i]] <- data %>%
    my_hist(
      i,
      title = long_var_names[i]
    )
}

grid.arrange(
  grobs = Hs1,
  ncol = 2
) %>%
  white_bg

Distributions of untransformed ADHD measures

Code

## Sqrt-transformation of ADHD scores and adjustment for sex

for (i in var_names) {
  data[, i] <- 
    lm(
      sqrt(data[, i]) ~ data$gender, 
      na.action = na.exclude
    ) %>% resid
}

Code

## Distribution of adjusted and sqrt-transformed ADHD scores

Hs2 <- list()

for (i in var_names) {
  Hs2[[i]] <- data %>%
    my_hist(
      i,
      title = long_var_names[i]
    )
}

grid.arrange(
  grobs = Hs2,
  ncol = 2
) %>%
  white_bg

Distributions of square root transformed ADHD measures

Code

## Correlations between PGS of all family members

## Correlation between parental PGS
data %>%
  select(
    fam_id, 
    ADHD_PRS_mother, ADHD_PRS_father,
    Edu_PRS_mother, Edu_PRS_father
  ) %>%
  unique %>%
  with(
    list(
      ADHD_par = cor.test(ADHD_PRS_mother, ADHD_PRS_father),
      Edu_par = cor.test(Edu_PRS_mother, Edu_PRS_father)
    )
  ) %>%
  lapply(
    \(x) {
      c(
        r = x$estimate,
        t = x$statistic,
        df = x$parameter,
        p = x$p.value,
        lCI = x$conf.int[1],
        uCI = x$conf.int[2]
      ) %>%
        setNames(
          c('r', 't', 'df', 'p', 'lCI', 'uCI')
        )
    }
  ) %>%
  do.call('rbind', .) %>%
  `row.names<-`(c('ADHD-PGS', 'EA-PGS')) %>%
  kable2(
    register_output = TRUE,
    caption = 'Correlation across parental PGS',
    align = 'c',
    digits = 3,
    footnote = 'lCI, uCI = 95%CI'
  ) %>%
  kable_styling

Correlation across parental PGS
	r	t	df	p	lCI	uCI
ADHD-PGS	0.069	0.860	157	0.391	-0.088	0.222
EA-PGS	0.078	0.986	157	0.325	-0.078	0.231
Note:
lCI, uCI = 95%CI

Code

## Correlation between parental and twins' PGS
data %>%
  with({
    fisher_z_test <- 
      function(ct, r0 = 0) { # ct is output of cor.test
        r <- unname(ct$estimate)
        n <- unname(ct$parameter) + 2
        s <- 1/sqrt(n - 3)
        Z <- 0.5 * log((1 + r)/(1 - r)) / s
        Z0 <- 0.5 * log((1 + r0)/(1 - r0)) / s

        c(r = r, Z = Z, p = 2 * (1 - pnorm(abs(Z - Z0))))
      }
    list(
      ADHD_tw_m = cor.test(ADHD_PRS_mother, ADHD_PRS_tw) %>%
        fisher_z_test(0.5),
      ADHD_tw_f = cor.test(ADHD_PRS_father, ADHD_PRS_tw) %>%
        fisher_z_test(0.5),
      Edu_tw_m = cor.test(Edu_PRS_mother, Edu_PRS_tw) %>%
        fisher_z_test(0.5),
      Edu_tw_f = cor.test(Edu_PRS_father, Edu_PRS_tw) %>%
        fisher_z_test(0.5)
    )
  }) %>%
  do.call('rbind', .) %>%
  `row.names<-`(c(
    'ADHD-PGS: twin-mother',
    'ADHD-PGS: twin-father',
    'EA-PGS: twin-mother',
    'EA-PGS: twin-father'
  )) %>%
  kable2(
    register_output = TRUE,
    caption = "Correlation between parental and twins' PGS",
    align = 'c',
    digits = 3,
    footnote = 'Z, p = Fisher-test of the deviation from 0.5'
  ) %>%
  kable_styling

Correlation between parental and twins' PGS
	r	Z	p
ADHD-PGS: twin-mother	0.538	11.874	0.298
ADHD-PGS: twin-father	0.524	10.909	0.546
EA-PGS: twin-mother	0.554	12.311	0.140
EA-PGS: twin-father	0.585	12.580	0.023
Note:
Z, p = Fisher-test of the deviation from 0.5

Correlations between PGS and ADHD scores

Code

## Correlations between PGS and ADHD scores
## Table 2
list(
  ADHD_PRS = data %>%
    rename_at(
      vars(var_names),
      ~ long_var_names[.]
    ) %>%
    mutate(
      `PRS twin` = ADHD_PRS_tw,
      `PRS mother` = ADHD_PRS_mother,
      `PRS father` = ADHD_PRS_father
    ),
  EduYears_PRS = data %>%
    rename_at(
      vars(var_names),
      ~ long_var_names[.]
    ) %>%
    mutate(
      `PRS twin` = Edu_PRS_tw,
      `PRS mother` = Edu_PRS_mother,
      `PRS father` = Edu_PRS_father
    )
) %>%
  lapply(
    cor_table,
    long_var_names,
    c(
      'PRS twin',
      'PRS mother',
      'PRS father'
    )
  ) %>%
  kable2(
    register_output = TRUE,
    caption = 'Correlations between polygenic scores and the measures of hyperactivity and inattention',
    align_first = 'l',
    align = 'c'
  ) %>%
  kable_styling

Correlations between polygenic scores and the measures of hyperactivity and inattention
	PRS twin					PRS mother					PRS father
	r	n	SE	96% CI	p	r	n	SE	96% CI	p	r	n	SE	96% CI	p
ADHD_PRS
Hyperactivity, early childhood	0.025	724	0.037	[-0.048; 0.098]	0.494	0.058	386	0.051	[-0.042; 0.157]	0.256	0.010	349	0.054	[-0.096; 0.114]	0.860
Inattention, early childhood	-0.024	724	0.037	[-0.097; 0.049]	0.519	0.020	386	0.051	[-0.080; 0.120]	0.694	-0.042	349	0.054	[-0.146; 0.063]	0.436
Hyperactivity, primary school	0.095	722	0.037	[0.022; 0.167]	0.011	-0.022	391	0.051	[-0.121; 0.077]	0.660	0.031	352	0.053	[-0.074; 0.135]	0.565
Inattention, primary school	0.109	722	0.037	[0.036; 0.180]	0.003	0.038	391	0.051	[-0.061; 0.137]	0.449	0.017	352	0.053	[-0.088; 0.121]	0.754
EduYears_PRS
Hyperactivity, early childhood	-0.124	724	0.037	[-0.195; -0.051]	0.001	-0.105	386	0.051	[-0.202; -0.005]	0.040	-0.055	349	0.054	[-0.159; 0.050]	0.307
Inattention, early childhood	-0.076	724	0.037	[-0.148; -0.003]	0.042	-0.098	386	0.051	[-0.196; 0.002]	0.054	0.022	349	0.054	[-0.083; 0.127]	0.682
Hyperactivity, primary school	-0.144	722	0.037	[-0.215; -0.072]	0.000	-0.035	391	0.051	[-0.133; 0.065]	0.494	-0.110	352	0.053	[-0.212; -0.006]	0.039
Inattention, primary school	-0.237	722	0.036	[-0.305; -0.167]	0.000	-0.184	391	0.050	[-0.278; -0.087]	0.000	-0.143	352	0.053	[-0.244; -0.039]	0.007

Model of intergenerational transmission

Code

## Transforming the data to the wide format

data2 <- data %>%
  rename(
    Edu_PRS = Edu_PRS_tw,
    ADHD_PRS = ADHD_PRS_tw
  ) %>%
  mutate(
    fam_mem = case_when(
      fam_mem == 'twin1' ~ 'tw1',
      fam_mem == 'twin2' ~ 'tw2',
      TRUE ~ NA_character_
    )
  ) %>%
  pivot_wider(
    id_cols = fam_id,
    values_from = c(
      Edu_PRS,
      ADHD_PRS,
      Hyp1, Hyp2,
      Ina1, Ina2
    ),
    names_from = fam_mem
  ) %>%
  merge(
    data %>%
      select(
        fam_id,
        zygosity,
        Edu_PRS_m = Edu_PRS_mother,
        Edu_PRS_f = Edu_PRS_father,
        ADHD_PRS_m = ADHD_PRS_mother,
        ADHD_PRS_f = ADHD_PRS_father
      ) %>%
      unique,
    by = 'fam_id'
  )

Code

## Generate a synthetic dataset of data2

Definition of the transmission model

The figure is published on OSF under Creative Commons license (CC BY-NC-SA 4.0)

Code

# Definition of the single-PGS transmission model
D_MZ <- data2 %>%
  filter(
    zygosity == 'MZ'
  ) %>%
  transmute(
      PRS_m = ADHD_PRS_m,
      PRS_f = ADHD_PRS_f,
      PRS_tw = ADHD_PRS_tw1,
      X1 = Hyp2_tw1,
      X2 = Hyp2_tw2
    )

D_DZ <- data2 %>%
  filter(
    zygosity == 'DZ'
  ) %>%
  transmute(
      PRS_m = ADHD_PRS_m,
      PRS_f = ADHD_PRS_f,
      PRS_tw1 = ADHD_PRS_tw1,
      PRS_tw2 = ADHD_PRS_tw2,
      X1 = Hyp2_tw1,
      X2 = Hyp2_tw2
    )

new_transmission_model <- mxModel(
  name = 'Transmission',
  ## PRS covariance structure
  mxMatrix(
    type = 'Full',
    nrow = 1,
    ncol = 1,
    values = 0.6,
    free = TRUE,
    name = 'SD_PRS'
  ),
  mxAlgebra(
    SD_PRS * SD_PRS,
    name = 'V_PRS'
  ),
  mxMatrix(
    type = 'Full',
    nrow = 1,
    ncol = 1,
    values = 0.05,
    free = TRUE,
    name = 'r_a'
  ),
  mxAlgebra(
    rbind(
      cbind(
        V_PRS, V_PRS * r_a, 0.5 * V_PRS * (1 + r_a)
      ),
      cbind(
        V_PRS * r_a, V_PRS, 0.5 * V_PRS * (1 + r_a)
      ),
      cbind(
        0.5 * V_PRS * (1 + r_a), 
        0.5 * V_PRS * (1 + r_a),
        V_PRS
      )
    ),
    name = 'Cov_PRS_MZ'
  ),
  mxAlgebra(
    rbind(
      cbind(
        V_PRS, V_PRS * r_a, 
        0.5 * V_PRS * (1 + r_a),
        0.5 * V_PRS * (1 + r_a)
      ),
      cbind(
        V_PRS * r_a, V_PRS, 
        0.5 * V_PRS * (1 + r_a),
        0.5 * V_PRS * (1 + r_a)
      ),
      cbind(
        0.5 * V_PRS * (1 + r_a), 
        0.5 * V_PRS * (1 + r_a),
        V_PRS,
        0.5 * V_PRS * (1 + 0.5 * r_a)
      ),
      cbind(
        0.5 * V_PRS * (1 + r_a), 
        0.5 * V_PRS * (1 + r_a),
        0.5 * V_PRS * (1 + 0.5 * r_a),
        V_PRS
      )
    ),
    name = 'Cov_PRS_DZ'
  ),
  ## Transmission effects (PRS -> X)
  mxMatrix(
    type = 'Full',
    nrow = 1,
    ncol = 3,
    values = 0,
    free = TRUE,
    labels = c(
      'tm', 'tf', 'tt'
    ),
    name = 'T'
  ),
  ## Structural matrices
  mxMatrix(
    type = 'Iden',
    nrow = 5,
    ncol = 5,
    name = 'I5x5'
  ),
  mxMatrix(
    type = 'Iden',
    nrow = 6,
    ncol = 6,
    name = 'I6x6'
  ),
  mxMatrix(
    type = 'Zero',
    nrow = 3,
    ncol = 3,
    name = 'Z3x3'
  ),
  mxMatrix(
    type = 'Zero',
    nrow = 3,
    ncol = 2,
    name = 'Z3x2'
  ),
  mxMatrix(
    type = 'Zero',
    nrow = 4,
    ncol = 4,
    name = 'Z4x4'
  ),
  mxMatrix(
    type = 'Zero',
    nrow = 4,
    ncol = 2,
    name = 'Z4x2'
  ),
  ## MZ
  ## Means
  mxMatrix(
    type = 'Full',
    nrow = 1,
    ncol = 5,
    values = 0,
    free = TRUE,
    labels = c(
      'M_PRS', 'M_PRS', 'M_PRS', 'MX', 'MX'
    ),
    name = 'expMean_MZ'
  ),
  ## Residual variance of twins' trait
  mxMatrix(
    type = 'Symm',
    nrow = 2,
    ncol = 2,
    values = c(0.9, 0.6, 0.9),
    free = TRUE,
    labels = c(
      'V_X', 'Cov_X_MZ', 'V_X'
    ),
    name = 'E_MZ'
  ),
  mxAlgebra(
    rbind(
      cbind(
        Cov_PRS_MZ,
        Z3x2
      ),
      cbind(
        t(Z3x2),
        E_MZ
      )
    ),
    name = 'S_MZ'
  ),
  mxAlgebra(
    rbind(
      cbind(
        Z3x3,
        Z3x2
      ),
      cbind(
        T, 0, 0
      ),
      cbind(
        T, 0, 0
      )
    ),
    name = 'A_MZ'
  ),
  mxAlgebra(
    solve(I5x5 - A_MZ) %&% S_MZ,
    name = 'expCov_MZ'
  ),
  mxModel(
    name = 'MZ',
    mxExpectationNormal(
      'Transmission.expCov_MZ',
      'Transmission.expMean_MZ',
      dimnames = c(
        'PRS_m',
        'PRS_f',
        'PRS_tw',
        'X1',
        'X2'
      )
    ),
    mxFitFunctionML(),
    mxData(
      D_MZ,
      type = 'raw'
    )
  ),
  ## DZ
  ## Means
  mxMatrix(
    type = 'Full',
    nrow = 1,
    ncol = 6,
    values = 0,
    free = TRUE,
    labels = c(
      'M_PRS', 'M_PRS', 'M_PRS', 'M_PRS', 'MX', 'MX'
    ),
    name = 'expMean_DZ'
  ),
  ## Residual variance of twins' trait
  mxMatrix(
    type = 'Symm',
    nrow = 2,
    ncol = 2,
    values = c(0.9, 0.6, 0.9),
    free = TRUE,
    labels = c(
      'V_X', 'Cov_X_DZ', 'V_X'
    ),
    name = 'E_DZ'
  ),
  mxAlgebra(
    rbind(
      cbind(
        Cov_PRS_DZ,
        Z4x2
      ),
      cbind(
        t(Z4x2),
        E_DZ
      )
    ),
    name = 'S_DZ'
  ),
  mxAlgebra(
    rbind(
      cbind(
        Z4x4,
        Z4x2
      ),
      cbind(
        T, 0, 0, 0
      ),
      cbind(
        T[, 1], T[, 2], 0, T[, 3], 0, 0
      )
    ),
    name = 'A_DZ'
  ),
  mxAlgebra(
    solve(I6x6 - A_DZ) %&% S_DZ,
    name = 'expCov_DZ'
  ),
  mxModel(
    name = 'DZ',
    mxExpectationNormal(
      'Transmission.expCov_DZ',
      'Transmission.expMean_DZ',
      dimnames = c(
        'PRS_m',
        'PRS_f',
        'PRS_tw1',
        'PRS_tw2',
        'X1',
        'X2'
      )
    ),
    mxFitFunctionML(),
    mxData(
      D_DZ,
      type = 'raw'
    )
  ),
  mxFitFunctionMultigroup(
    c('MZ', 'DZ')
  ),
  ## Output algebra
  ## Standardized estimates
  mxAlgebra(
    sqrt(expCov_MZ * I5x5),
    name = 'SD'
  ),
  mxAlgebra(
    solve(SD) %&% S_MZ,
    name = 'Sst'
  ),
  mxAlgebra(
    solve(SD) %*% A_MZ %*% SD,
    name = 'Ast'
  ),
  ## Total explained variance
  mxAlgebra(
    1 - Sst[5, 5],
    name = 'Rsq'
  ),
  ## Split of the variance by the type of transmission 
  ## (vertical vs. genetic nurture)
  mxMatrix(
    type = 'Zero',
    nrow = 2,
    ncol = 2,
    name = 'Z2x2'
  ),
  mxAlgebra(
    rbind(
      Ast[1:3, ],
      cbind(
        Z2x2,
        Ast[4:5, 3:5]
      )
    ),
    name = 'Ast_vt'
  ),
  mxAlgebra(
    1 - (solve(I5x5 - Ast_vt) %&% Sst)[5, 5],
    name = 'Rsq_gn'
  ),
  mxAlgebra(
    Rsq - Rsq_gn,
    name = 'Rsq_vt'
  ),
  ## Main output in one place
  mxAlgebra(
    rbind(
      Ast[5, 3],
      Ast[5, 1],
      Ast[5, 2],
      Rsq,
      Rsq_vt,
      Rsq_gn      
    ),
    name = 'output'
  )
)

Code

## Running the transmission model without bootstrap

models_noboot <- list()

for (i in c('Hyp1', 'Ina1', 'Hyp2', 'Ina2')) {
  for (j in c('ADHD_PRS', 'Edu_PRS')) {
      D_MZ <- data2 %>%
      filter(
        zygosity == 'MZ'
      ) %>%
      select(
        PRS_m = !!paste0(j, '_m'),
        PRS_f = !!paste0(j, '_f'),
        PRS_tw = !!paste0(j, '_tw1'),
        X1 = !!paste0(i, '_tw1'),
        X2 = !!paste0(i, '_tw2')
      )

    D_DZ <- data2 %>%
      filter(
        zygosity == 'DZ'
      ) %>%
      select(
        PRS_m = !!paste0(j, '_m'),
        PRS_f = !!paste0(j, '_f'),
        PRS_tw1 = !!paste0(j, '_tw1'),
        PRS_tw2 = !!paste0(j, '_tw2'),
        X1 = !!paste0(i, '_tw1'),
        X2 = !!paste0(i, '_tw2')
      )

    models_noboot[[paste0(j, ', ', i)]] <- 
      mxModel(
        model = new_transmission_model,
        mxModel(
          model = new_transmission_model$MZ,
          name = 'MZ',
          mxData(
            observed = D_MZ,
            type = 'raw'
          )
        ),
        mxModel(
          model = new_transmission_model$DZ,
          name = 'DZ',
          mxData(
            observed = D_DZ,
            type = 'raw'
          )
        )
      ) %>%
      mxRun
  }
}

Code

## Run the models

# models <- models_noboot
# 
# for (m in names(models)) {
#   print(paste('Processing', m))
#   M <- models[[m]]
#   M <- M %>%
#       mxBootstrap(
#         replications = 10000
#       )
# 
#   models[[m]] <- M
# }
# 
## Save to cache
# save(
#   models,
#   file = 'cache/boot_transmission_models.RData'
# )

## Load from cache
load(
  'cache/boot_transmission_models.RData'
)

Parameter estimates

Code

## Extract the estimates with bootstrapped 95% CIs

# par_table <- list()
# 
# for (i in names(models)) {
#   print(paste('Processing', i))
#   par_table[[i]] <- cbind(
#     est = mxEval(
#       output,
#       models[[i]]
#     ),
#     mxBootstrapEval(
#       output,
#       models[[i]],
#       bq = c(0.025, 0.975)
#     )
#   ) %>%
#     `row.names<-`(
#       c(
#         'g_tw',
#         'g_m',
#         'g_f',
#         'R^2',
#         'R^2_vt',
#         'R^2_gn'
#       )
#     )
# }
# 
## Save to cache
# save(
#   par_table,
#   file = 'cache/par_table.RData'
# )

## Load from cache
load(
  'cache/par_table.RData'
)

Code

## Function to extract and return the key parameters

get_model_params <- function(model) {
  model %>%
    summary %>%
    .$CI %>%
    select(
      estimate,
      CI_low = lbound,
      CI_up = ubound,
    ) %>%
    mutate(
      SE = sapply(
        row.names(.),
        mxSE, 
        model, 
        silent = TRUE
      ),
      .after = estimate,
    ) %>%
    mutate(
      sig = ifelse(round(CI_up, 3) < 0 | round(CI_low, 3) > 0, '*', '')
    ) %>%
    mutate(
      `95% CI` = sprintf(
        '[%0.3f; %0.3f]',
        CI_low,
        CI_up
      )
    ) %>%
    select(
      estimate, SE, `95% CI`, sig
    ) %>%
    `row.names<-`(
      c(
        'g_tw',
        'g_m',
        'g_f',
        'R^2',
        'R^2_vt',
        'R^2_gn'
      )
    )
}

Code

## The tables with parameter estimates
## Table 3 and Table 4

## Function that makes a table
par_tables <- par_table %>%
  lapply(
    function(x)
      x %>%
      as.data.frame %>%
      rownames_to_column %>%
      mutate(
        sig = ifelse(round(`97.5%`, 3) < 0 | round(`2.5%`, 3) > 0, '*', '')
      ) %>%
      mutate(
        `95% CI` = sprintf(
          '[%0.3f; %0.3f]',
          `2.5%`,
          `97.5%`
        )
      ) %>%
      column_to_rownames %>%
      select(
        est = V1,
        sig, `95% CI`
      )
  )

## ADHD-PGS
list(
  'Early childhood' = mlth.data.frame(
    Hyperactivity = par_tables[['ADHD_PRS, Hyp1']],
    Inattention = par_tables[['ADHD_PRS, Ina1']]
  ),
  'Primary school' = mlth.data.frame(
    Hyperactivity = par_tables[['ADHD_PRS, Hyp2']],
    Inattention = par_tables[['ADHD_PRS, Ina2']]
  )
) %>%
  kable2(
    register_output = TRUE,
    caption = 'The estimates of direct ADHD PRS effects and explained variance',
    footnote = 'est = estimate; SE = standard error; 95% CI = 95% confidence interval; sig = statistical significance based on 95% CI',
    align = 'c',
    digits = 3
  ) %>%
  kable_styling

The estimates of direct ADHD PRS effects and explained variance
	Hyperactivity			Inattention
	est	sig	95% CI	est	sig	95% CI
Early childhood
g_tw	0.016		[-0.091; 0.116]	0.002		[-0.100; 0.099]
g_m	0.056		[-0.056; 0.161]	0.000		[-0.106; 0.105]
g_f	-0.038		[-0.164; 0.092]	-0.055		[-0.180; 0.068]
R^2	0.005		[0.000; 0.016]	0.003		[0.000; 0.010]
R^2_vt	0.000		[0.000; 0.003]	0.000		[0.000; 0.000]
R^2_gn	0.005		[-0.006; 0.026]	0.003		[-0.006; 0.019]
Primary school
g_tw	0.157	*	[0.055; 0.249]	0.120	*	[0.024; 0.217]
g_m	-0.089		[-0.187; 0.006]	-0.026		[-0.128; 0.072]
g_f	-0.015		[-0.114; 0.097]	-0.009		[-0.110; 0.099]
R^2	0.016	*	[0.003; 0.034]	0.011	*	[0.001; 0.023]
R^2_vt	0.025	*	[0.003; 0.062]	0.014	*	[0.001; 0.047]
R^2_gn	-0.008		[-0.034; 0.011]	-0.003		[-0.026; 0.011]
Note:
est = estimate; SE = standard error; 95% CI = 95% confidence interval; sig = statistical significance based on 95% CI

Code

## EA-PGS
list(
  'Early childhood' = mlth.data.frame(
    Hyperactivity = par_tables[['Edu_PRS, Hyp1']],
    Inattention = par_tables[['Edu_PRS, Ina1']]
  ),
  'Primary school' = mlth.data.frame(
    Hyperactivity = par_tables[['Edu_PRS, Hyp2']],
    Inattention = par_tables[['Edu_PRS, Ina2']]
  )
) %>%
  kable2(
    register_output = TRUE,
    caption = 'The estimates of direct EA PRS effects and explained variance',
    footnote = 'est = estimate; SE = standard error; 95% CI = 95% confidence interval; sig = statistical significance based on 95% CI',
    align = 'c',
    digits = 3
  ) %>%
  kable_styling

The estimates of direct EA PRS effects and explained variance
	Hyperactivity			Inattention
	est	sig	95% CI	est	sig	95% CI
Early childhood
g_tw	-0.129	*	[-0.226; -0.030]	-0.150	*	[-0.251; -0.047]
g_m	-0.001		[-0.100; 0.090]	0.013		[-0.095; 0.116]
g_f	0.004		[-0.103; 0.108]	0.100	*	[0.001; 0.209]
R^2	0.016	*	[0.003; 0.030]	0.014	*	[0.002; 0.032]
R^2_vt	0.017	*	[0.001; 0.051]	0.022	*	[0.002; 0.063]
R^2_gn	0.000		[-0.026; 0.020]	-0.008		[-0.038; 0.011]
Primary school
g_tw	-0.100	*	[-0.196; -0.002]	-0.153	*	[-0.253; -0.051]
g_m	0.020		[-0.083; 0.122]	-0.070		[-0.165; 0.028]
g_f	-0.091		[-0.193; 0.016]	-0.060		[-0.173; 0.064]
R^2	0.026	*	[0.007; 0.049]	0.055	*	[0.028; 0.083]
R^2_vt	0.010		[0.000; 0.038]	0.023	*	[0.003; 0.064]
R^2_gn	0.016		[-0.013; 0.048]	0.032		[-0.014; 0.074]
Note:
est = estimate; SE = standard error; 95% CI = 95% confidence interval; sig = statistical significance based on 95% CI

Fit statistics

Code

## Fit statistics of transmission model
## Supplementary Table 3

models_noboot %>%
  lapply(
    function(l) {
      Sum <- summary(
        l,
        refModels = mxRefModels(l, run = TRUE)
      )
      
      data.frame(
        ep = Sum$estimatedParameters,
        df = Sum$degreesOfFreedom,
        '-2LL' = Sum$Minus2LogLikelihood,
        CFI = ifelse(
          Sum$CFI < 1,
          Sum$CFI,
          1
        ),
        TLI = ifelse(
          Sum$TLI < 1,
          Sum$TLI,
          1
        ),
        RMSEA = Sum$RMSEA,
        'dLL' = Sum$Chi,
        'ddf' = Sum$ChiDoF,
        p = Sum$p,
        check.names = FALSE
      )
    }
  ) %>%
  do.call('rbind', .) %>%
  {
    list(
      'Early childhood' = .[
        c(
          'ADHD_PRS, Hyp1', 'ADHD_PRS, Ina1',
          'Edu_PRS, Hyp1', 'Edu_PRS, Ina1'
        ),
      ],
      'Primary school' = .[
        c(
          'ADHD_PRS, Hyp2', 'ADHD_PRS, Ina2',
          'Edu_PRS, Hyp2', 'Edu_PRS, Ina2'
        ),
      ]
    )
  } %>%
  kable2(
    register_output = TRUE,
    caption = 'Fit statistics of the transmission model',
    footnote = "ep = # of estimated parameters",
    align = 'c',
    digits = c(0, 0, 0, 2, 3, 3, 3, 3, 0, 3)
  ) %>%
  kable_styling

Fit statistics of the transmission model
	ep	df	-2LL	CFI	TLI	RMSEA	dLL	ddf	p
Early childhood
ADHD_PRS, Hyp1	10	1723	1588.47	0.963	0.975	0.029	49.741	37	0.079
ADHD_PRS, Ina1	10	1723	1739.02	1.000	1.000	0.000	33.158	37	0.650
Edu_PRS, Hyp1	10	1723	78.30	0.967	0.977	0.029	49.938	37	0.076
Edu_PRS, Ina1	10	1723	230.88	0.964	0.976	0.029	50.306	37	0.071
Primary school
ADHD_PRS, Hyp2	10	1721	2128.50	1.000	1.000	0.000	24.580	37	0.941
ADHD_PRS, Ina2	10	1721	2172.28	1.000	1.000	0.000	30.026	37	0.785
Edu_PRS, Hyp2	10	1721	622.19	0.988	0.992	0.019	42.556	37	0.244
Edu_PRS, Ina2	10	1721	645.91	0.989	0.992	0.018	41.800	37	0.270
Note:
ep = # of estimated parameters

R^2 plot

Code

## Extracting R^2 from the model output
r2s <- 
  par_tables %>%
  lapply(
    `[`,
    c('R^2', 'R^2_vt', 'R^2_gn'),
    c('est', 'sig')
  ) %>%
  lapply(
    function(x)
      data.frame(
        var = row.names(x),
        x,
        row.names = NULL
      )
  ) %>%
  do.call('rbind', .) %>%
  cbind(
    `colnames<-`(
      str_match(
        row.names(.), 
        '(.*), (.*)(\\d)\\.\\d'
      ),
      paste0('N', 1:4)
    )
  ) %>%
  transmute(
    prs = sapply(
      N2,
      switch,
      Edu_PRS = 'EA PRS (inv)',
      ADHD_PRS = 'ADHD PRS'
    ),
    var,
    value = est*100,
    age = sapply(
      N4,
      switch,
      `1` = 'Early childhood',
      `2` = 'Primary school'
    ),
    measure = paste0(N3, '.'),
    ss = sig
  ) %>%
  merge(
    filter(., var == 'R^2'),
    by = c('prs', 'age', 'measure'),
    suffixes = c('', '2')
  ) %>%
  filter(
    var %in% c('R^2_vt', 'R^2_gn')
  ) %>%
  mutate(
    ss2 = case_when(
      var == 'R^2_gn' ~ ss2,
      TRUE ~ NA_character_
    ),
    var = case_when(
      var == 'R^2_gn' ~ 'genetic nurture',
      var == 'R^2_vt' ~ 'vertical transmission'
    ),
    value2 = case_when(
      is.na(ss2) ~ NA_real_,
      TRUE ~ value2
    )
  ) %>%
  select(-var2)

Code

## The ggplot styling
my_bar <- function(){
  list(
    geom_bar(
      stat = 'identity',
      position = 'stack',
      width = 0.5
    ),
    geom_bar(
      aes(
        y = value2
      ),
      stat = 'identity',
      width = 0.6,
      size = 1.2,
      color = '#7788DD',
      fill = NA,
      na.rm = TRUE
    ),
    geom_text(
      position = position_stack(
        vjust = 1
      ),
      color = 'white',
      vjust = 1.2,
      hjust = 1,
      size = 11,
      na.rm = TRUE
    ),
    geom_text(
       aes(
         y = value2,
         label = ss2
       ),
       size = 12,
       color = '#7788DD',
       vjust = 0.4,
       hjust = -1.5,
       na.rm = TRUE
    ),
    geom_hline(
      yintercept = 0,
      color = 'black',
      size = 1
    ),
    scale_fill_manual(
      name = 'Type of transmission',
      values = c("#CC6677", "#DDCC77"),
      limits = c('vertical transmission', 'genetic nurture')
    ),
    theme_classic(),
    theme(
      text = element_text(
        family = 'Noto Sans',
        color = 'grey35'
      ),
      # X
      axis.line.x = element_blank(),
      axis.ticks.x = element_blank(),
      axis.title.x = element_blank(),
      axis.text.x = element_text(
        size = 18,
        margin = margin(
          t = 10
        )
      ),
      # Y
      axis.line.y = element_line(),
      axis.ticks.y = element_blank(),
      axis.text.y = element_text(
        size = 16,
        vjust = 0.3,
        margin = margin(
          r = 7
        )
      ),
      panel.grid.major.y = element_line(
        colour = 'grey',
        linetype = 'dashed'
      ),
      axis.title.y = element_text(
        size = 18,
        angle = 0,
        vjust = 0.7,
        margin = margin(
          r = 10
        )
      ),
      # Title
      plot.title = element_text(
        hjust=0.5, 
        size=18,
        margin = margin(
          t = 15,
          b = 15
        )
      ),
      plot.caption = element_text(
        hjust=0.5, 
        size=18,
        margin = margin(
          t = 15
        )
      ),
      # Legend
      legend.position = 'none',
      legend.margin = margin(
        t = 30,
        b = 30
      ),
      legend.title = element_text(
        size = 18,
        margin = margin(
          r = 20
        )
      ),
      legend.text = element_text(
        size = 16,
        margin = margin(
          r = 20
        )
      ),
      # Background
      panel.background = element_blank(),
      plot.background = element_blank(),
      legend.background = element_blank(),
      # Plot margins
      plot.margin = unit(c(30, 5, 20, 5), 'points')
    )
  )
}

## Styling the axes
left_axis <- function() {
  list(
    scale_y_continuous(
      name = '',
      limits = c(-1, 6),
      breaks = -1:6,
      expand = c(0, 0)
    )
  )
}

right_axis <- function() {
  list(
    scale_y_continuous(
      name = '',
      limits = c(-1, 6),
      breaks = -1:6,
      expand = c(0, 0),
      position = 'right'
    )
  )
}

no_right_labs <- function() {
  list(
    theme(
      axis.text.y.right = element_blank()
    )
  )
}

no_left_labs <- function() {
  list(
    theme(
      axis.text.y.left = element_blank()
    )
  )
}

Code

## Barplot with R^2
## Figure 2
p11 <- 
  r2s %>%
  filter(
    prs == 'ADHD PRS',
    age == 'Early childhood'
  ) %>%
  ggplot(
    aes(
      x = measure,
      y = value,
      fill = var,
      label = ss
    )
  ) +
  my_bar() +
  left_axis() +
  labs(
    caption = 'Early childhood'
  )

p12 <- r2s %>%
  filter(
    prs == 'ADHD PRS',
    age == 'Primary school'  
  ) %>%
  ggplot(
    aes(
      x = measure,
      y = value,
      fill = var,
      label = ss
    )
  ) +
  my_bar() +
  right_axis() +
  no_right_labs() +
  labs(
    caption = 'Primary school'
  )

p21 <- r2s %>%
  filter(
    prs == 'EA PRS (inv)',
    age == 'Early childhood'
  ) %>%
  ggplot(
    aes(
      x = measure,
      y = value,
      fill = var,
      label = ss
    )
  ) +
  my_bar() +
  left_axis() +
  no_left_labs() + 
  labs(
    caption = 'Early childhood'
  )

p22 <- r2s %>%
  filter(
    prs == 'EA PRS (inv)',
    age == 'Primary school'  
  ) %>%
  ggplot(
    aes(
      x = measure,
      y = value,
      fill = var,
      label = ss
    )
  ) +
  my_bar() +
  right_axis() +
  labs(
    caption = 'Primary school'
  )

p1 <- arrangeGrob(
  grobs = list(
    p11, p12
  ),
  top = textGrob(
    'ADHD-PGS',
    # x = unit(35, 'points'),
    # hjust = 0,
    gp = gpar(
      fontsize = 20
    )
  ),
  ncol = 2
)

p2 <- arrangeGrob(
  grobs = list(
    p21, p22
  ),
  top = textGrob(
    'EA-PGS',
    # x = unit(35, 'points'),
    # hjust = 0,
    gp = gpar(
      fontsize = 20
    )
  ),
  ncol = 2
) 

full_plot <- arrangeGrob(
  grobs = list(
    p1, p2
  ),
  left = textGrob(
    expression(paste(R^{2},'(%)')),
    x = unit(1, 'npc'),
    hjust = 1,
    gp = gpar(
      fontsize = 14
    )
  ),
  right = textGrob(
    expression(paste(R^{2},'(%)')),
    x = unit(0, 'npc'),
    hjust = 0,
    gp = gpar(
      fontsize = 14
    )
  ),
  top = g_legend(
    p11 + theme(legend.position = 'top')
  ),
  ncol = 2
) 

ggsave(
  'figures/Figure2.pdf',
  plot = full_plot,
  width = 12,
  height = 7
)

full_plot %>%
  grid.draw()

The figure is published on OSF under Creative Commons license (CC BY-NC-SA 4.0)

Supplementary analysis

Code

## Supplementary Table 1: Ns

count_data <- data %>%
  rename(
    Edu_PRS = Edu_PRS_tw,
    ADHD_PRS = ADHD_PRS_tw
  ) %>%
  mutate(
    fam_mem = case_when(
      fam_mem == 'twin1' ~ 'tw1',
      fam_mem == 'twin2' ~ 'tw2',
      TRUE ~ NA_character_
    )
  ) %>%
  pivot_wider(
    id_cols = fam_id,
    values_from = c(
      Edu_PRS,
      ADHD_PRS,
      Hyp1, Hyp2,
      Ina1, Ina2
    ),
    names_from = fam_mem
  ) %>%
  merge(
    data %>%
      select(
        fam_id,
        zygosity,
        Edu_PRS_m = Edu_PRS_mother,
        Edu_PRS_f = Edu_PRS_father,
        ADHD_PRS_m = ADHD_PRS_mother,
        ADHD_PRS_f = ADHD_PRS_father
      ) %>%
      unique,
    by = 'fam_id'
  ) %>%
  transmute(
    zygosity = zygosity,
    tw = factor(
      as.numeric(!is.na(Edu_PRS_tw1)) + as.numeric(!is.na(Edu_PRS_tw2)),
      levels = 2:1,
      labels = c('Two twins', 'One twin')
    ),
    par = factor(
      as.numeric(!is.na(Edu_PRS_m)) + as.numeric(!is.na(Edu_PRS_f)),
      levels = 2:0,
      labels = c('Two parents', 'One parent', 'No parents')
    )
  )

counts <- with(
  count_data,
  by(
    count_data,
    zygosity,
    function(d) {
      tab <- table(d$tw, d$par)
      tab <- cbind(tab, All = rowSums(tab))
      tab <- rbind(tab, All = colSums(tab))
    }
  )
)

counts$All <- counts$MZ + counts$DZ

counts %>%
  unclass %>%
  kable2(
    register_output = TRUE,
    caption = 'The summary of genotype data available in QNTS families',
    align = 'c'
  ) %>%
  kable_styling

The summary of genotype data available in QNTS families
	Two parents	One parent	No parents	All
MZ
Two twins	73	19	77	169
One twin	0	0	0	0
All	73	19	77	169
DZ
Two twins	78	41	90	209
One twin	8	7	22	37
All	86	48	112	246
All
Two twins	151	60	167	378
One twin	8	7	22	37
All	159	67	189	415

Code

## Socio-demographic characteristics of the sample
## and comparison of included and excluded families

## Reading the data
pheno2 <- read_spss(
  'data/phenotypic.sav'
) %>%
  mutate(
    # Parental measures
    medu_5m = aedpd04,
    medu_60m = eedpd04,
    fedu_5m = aedsd04,
    fedu_60m = eedsd04,
    maab = agepnaij,
    faab = agesnaij,

    # Family measures
    fam_income_5m = ainhe13,
    fam_income_18m = binhe13,
    fam_income_30m = cinhe13,
    fam_income_48m = dinhe13,
    fam_income_S2 = minpe13,
    
    fam_status_b = statut0,
    fam_status_5m = astatut,
    fam_status_18m = bstatut,
    fam_status_30m = cstatut,
    fam_status_48m = dstatut,
    fam_status_60m = estatut,
    
    # Inattention and hyperactivity
    hyp1_1 = bbelthy, 
    hyp1_2 = cbelthy, 
    hyp1_3 = dbelthy, 
    hyp1_4 = ebelthy,
    hyp2_1 = fbplhyp, 
    hyp2_2 = gbplhyp,
    hyp2_3 = ibplhyp, 
    hyp2_4 = jbplhyp,
    hyp2_5 = kbplhyp,
    ina1_1 = bbeltina, 
    ina1_2 = cbeltina,
    ina1_3 = dbeltina, 
    ina1_4 = ebeltina,
    ina2_1 = fbplina, 
    ina2_2 = gbplina,  
    ina2_3 = ibplina, 
    ina2_4 = jbplina,
    ina2_5 = kbplina
  ) %>%
  mutate(
    # Composite measures
    fam_income = rowMeans(
      select(., starts_with('fam_income')),
      na.rm = TRUE
    ),
    fam_status = select(., starts_with('fam_status')) %>%
      apply(1, function(x) {
        if (all(is.na(x)))
          return(NA)
        else if (all(na.omit(x) == 1))
          return(1)
        else
          return(2)
      }),
    fam_status = factor(
      fam_status,
      levels = 1:2,
      labels = c(
        'two bio parents',
        'other'
      )
    ),
    medu = apply(
      .[, c('medu_5m', 'medu_60m')],
      1,
      function(x) {
        if (all(is.na(x)))
          return(NA)
        else
          max(x, na.rm = TRUE)
      }
    ),
    fedu = apply(
      .[, c('fedu_5m', 'fedu_60m')],
      1,
      function(x) {
        if (all(is.na(x)))
          return(NA)
        else
          max(x, na.rm = TRUE)
      }
    )
  ) %>%
  mutate(
    select = case_when(
      nofamill %in% data2$fam_id ~ 'included',
      TRUE ~ 'excluded'
    )
  ) %>%
  select(
    nofamill,
    select,
    starts_with('medu'),
    starts_with('fedu'),
    starts_with('fam_income'),
    starts_with('fam_status'),
    starts_with('hyp'),
    starts_with('ina')
  )

## Selecting socio-demographic data
pheno_demo <- pheno2 %>%
  select(
    nofamill,
    select,
    starts_with('medu'),
    starts_with('fedu'),
    starts_with('fam_income'),
    starts_with('fam_status')
  )

## Selecting ADHD data
pheno_adhd <- pheno2 %>%
  select(
    nofamill,
    select,
    starts_with('hyp'),
    starts_with('ina')
  ) %>%
  mutate(
    Hyp1 = rowMeans(
      data.frame(
        hyp1_1, hyp1_2,  
        hyp1_3, hyp1_4
      ),
      na.rm = TRUE
    ),
    Hyp2 = rowMeans(
      data.frame(
        hyp2_1, hyp2_2,  
        hyp2_3, hyp2_4,
        hyp2_5
      ),
      na.rm = TRUE
    ),
    Ina1 = rowMeans(
      data.frame(
        ina1_1, ina1_2,  
        ina1_3, ina1_4
      ),
      na.rm = TRUE
    ),
    Ina2 = rowMeans(
      data.frame(
        ina2_1, ina2_2,  
        ina2_3, ina2_4,
        ina2_5
      ),
      na.rm = TRUE
    )
  )
  
## Sociodemographic characteristics
# Family status
pheno_demo %>%
  by(
    .$select,
    \(d)
      with(d,
        sum(fam_status == 'two bio parents', na.rm = TRUE) / 
          length(fam_status)
      )
    ) %>%
  unclass

# Parental education
pheno_demo %>%
  by(
    .$select,
    \(d) mean(d$medu, na.rm = TRUE)
  ) %>%
  unclass

pheno_demo %>%
  by(
    .$select,
    \(d) mean(d$fedu, na.rm = TRUE)
  ) %>%
  unclass


# Family income
pheno_demo %>%
  by(
    .$select,
    \(d)
      with(d,
        sum(fam_income_5m >= 8, na.rm = TRUE) / length(fam_income_5m)
      )
  ) %>% 
  unclass

# Hyperactivity and inattention
pheno_adhd %>%
  by(
    .$select,
    \(d)
      sapply(d[, c('Hyp1', 'Ina1', 'Hyp2', 'Ina2')], mean, na.rm = TRUE)
  ) %>%
  unclass

## Supplementary Table 2 was filled in 'by hand'

Code

## Percent of missing data in each transmission model
models_noboot %>%
  sapply(
    \(m) {
      NA_MZ <- m$MZ$data@observed %>% is.na %>% sum
      NA_DZ <- m$DZ$data@observed %>% is.na %>% sum
      NO_MZ <- m$MZ$data@observed %>% {nrow(.) * ncol(.)}
      NO_DZ <- m$DZ$data@observed %>% {nrow(.) * ncol(.)}
      (NA_MZ + NA_DZ) / (NO_MZ + NO_DZ)
    }
  ) %>% 
  {100 * .} %>%
  t %>% t %>%
  `colnames<-`('% of missing') %>%
  `row.names<-`(c(
    'ADHD-PGS, hyperactivity in early childhood',
    'EA-PGS, hyperactivity in early childhood',
    'ADHD-PGS, inattention in early childhood',
    'EA-PGS, inattention in early childhood',
    'ADHD-PGS, hyperactivity in primary school',
    'EA-PGS, hyperactivity in primary school',
    'ADHD-PGS, inattention in primary school',
    'EA-PGS, inattention in primary school'
  )) %>%
  kable2(
    register_output = TRUE,
    caption = 'Percent of missing data in each transmission model',
    digits = 1,
    align = 'c'
  ) %>%
  kable_styling

Percent of missing data in each transmission model
	% of missing
ADHD-PGS, hyperactivity in early childhood	25.3
EA-PGS, hyperactivity in early childhood	25.3
ADHD-PGS, inattention in early childhood	25.3
EA-PGS, inattention in early childhood	25.3
ADHD-PGS, hyperactivity in primary school	25.4
EA-PGS, hyperactivity in primary school	25.4
ADHD-PGS, inattention in primary school	25.4
EA-PGS, inattention in primary school	25.4