Codes of Dual Change Score Models for Supplement


---
author: "Peggy Ler"
date: "2025-02-23"
output: html_document
---

1. Project Overview

#'--------------------------------------------------------------------------
#' PROJECT DCSM
#' Created on: 2025-02-14
#' Author: Peggy Ler
#' Primary supervisor: Ida K. Karlsson
#' Co supervisor: Anna K. Dahl Aslan, Deborah Finkel, Juulia Jylhävä, Alex Ploner
#' Purpose: To examine the bidirectional, longitudinal association 
#'          between changes in BMI and FI
#'--------------------------------------------------------------------------

1.1 Load Libraries

## Load Libraries
library(haven)     # For reading data files
library(dplyr)     # For data manipulation
library(OpenMx)    # For structural equation modeling
library(tidyr)     # For reshaping data
library(tableone)  # For descriptive statistics
library(ggplot2)   # For plotting
library(data.table)# For data handling

2. Data Loading & Preparation

Here is your full data import, filtering, duplication checks, pivoting, and descriptive steps.

## Load Data
fidf <- read.delim("file name", stringsAsFactors = TRUE)

## Prepare Data
## Select Variables
df <- FI[, c("twinnr", "pairid", "age", "sex", "educ", "ever_smoke", "fi", "bmi", "study1")]

View(df)
nrow(df)  # e.g., 7356

## Number of unique individuals
unique_count <- length(unique(FI$twinnr))
print(unique_count)

## Observations per individual
id_counts <- df %>%
  group_by(twinnr) %>%
  summarize(count = n())

table(id_counts$count)

## Check age range, create age bins
summary(df$age)

df$age_interv <- cut(df$age, breaks = seq(38, 102, by = 2), right = FALSE)
table(df$age_interv)

## Check BMI, FI data availability by bin
table(df$age_interv[!is.na(df$bmi)])
table(df$age_interv[!is.na(df$fi)])

## Restrict age to 50 - 96
df <- subset(df, !(age < 50 | age >= 96 | is.na(age)))
summary(df$age)

labels <- seq(52, 96, by = 2)
df$age_interv <- cut(df$age, breaks = seq(50, 96, by = 2), right = FALSE, labels = labels)

## Checking age_interv variable
summary(df$age_interv)
nrow(df)
length(unique(df$twinnr))

## Ensure each age bin has <=1 BMI or FI observation
dup_check <- df %>%
  group_by(twinnr, age_interv) %>%
  summarise(bmi_count = sum(!is.na(bmi)), fi_count = sum(!is.na(fi))) %>%
  filter(bmi_count > 1 | fi_count > 1)

if (nrow(dup_check) > 0) {
  print(dup_check)
} else {
  print("Each subject has at most one BMI or one FI reading in each age interval.")
}

## Identify >1 observations
identify_duplicates <- function(data) {
  bmi_present <- sum(!is.na(data$bmi)) > 1
  fi_present  <- sum(!is.na(data$fi))  > 1
  if (bmi_present || fi_present) {
    data$duplicate_indicator <- 1
  } else {
    data$duplicate_indicator <- 0
  }
  return(data)
}

df <- df %>%
  group_by(twinnr, age_interv) %>%
  do(identify_duplicates(.)) %>%
  ungroup()

table(df$duplicate_indicator)

df <- df %>%
  mutate(both_present = ifelse(!is.na(fi) & !is.na(bmi), 1, 0))

table(df$both_present)

df_filtered <- df %>%
  group_by(twinnr, age_interv) %>%
  arrange(duplicate_indicator, desc(both_present)) %>%
  filter(!duplicate_indicator | (duplicate_indicator == 1 & row_number() == 1))

nrow(df_filtered)
length(unique(df_filtered$twinnr))

table(df_filtered$duplicate_indicator)

dup_check2 <- df_filtered %>%
  group_by(twinnr, age_interv) %>%
  summarise(bmi_count = sum(!is.na(bmi)), fi_count = sum(!is.na(fi))) %>%
  filter(bmi_count > 1 | fi_count > 1)

if (nrow(dup_check2) > 0) {
  print(dup_check2)
} else {
  print("Each subject has at most one BMI or one FI reading in each age interval.")
}

## Checking finalized data
nrow(df_filtered)
length(unique(df_filtered$twinnr))

id_counts <- df_filtered %>%
  group_by(twinnr) %>%
  summarize(count = n())

table(id_counts$count)

id_countsbmi <- df_filtered %>%
  filter(!is.na(bmi)) %>%
  group_by(twinnr) %>%
  summarize(count = n())

table(id_countsbmi$count)
nrow(id_countsbmi)

id_countsfi <- df_filtered %>%
  filter(!is.na(fi)) %>%
  group_by(twinnr) %>%
  summarize(count = n())

table(id_countsfi$count)
nrow(id_countsfi)

## Decide age range for DCSM based on data counts for each age bin
## DCSM - 60 - 91.9 

## Create wide format
df3 <- df_filtered[, c("twinnr", "pairid", "age_interv", "sex", "educ",
                       "ever_smoke", "fi", "bmi", "study1")]

nrow(df3)
length(unique(df3$twinnr))

df_filled <- df_filtered %>%
  group_by(twinnr) %>%
  fill(ever_smoke, .direction = "downup")

df_filled <- df_filled %>%
  group_by(twinnr) %>%
  fill(educ, .direction = "downup")

## Checking dataser
summary(df_filled)
nrow(df_filled)
table(df_filled$age_interv)

## Actual conversion from long to wide
fidf_wide <- df_filled %>%
  pivot_wider(id_cols= c("twinnr", "pairid", "sex", "educ",
                         "ever_smoke", "study1"), 
              names_from= age_interv, 
              values_from = c(bmi, fi, fistd), 
              names_glue ="{.value}_{age_interv}")

## Checking and renaming file
nrow(fidf_wide)
summary(fidf_wide)
fidf <- fidf_wide
nrow(fidf)

save(fidf, file="filename")

## Covariates
fidf$pairid <- as.factor(fidf$pairid)
fidf$twinnr <- as.factor(fidf$twinnr)
fidf$sex    <- fidf$sex - 1

fidf$studys <- ifelse(fidf$study1 == "OCTO-Twin", 1,
               ifelse(fidf$study1 == "GENDER", 0.5, -1.5))

3. Univariate DCSM - BMI

Below is complete univariate BMI code, including no proportional and proportional versions.

## No proportional effect
twinpair <- mxModel("twinpair", type="RAM", latentVars=c("I_twin","S_twin"),
                    mxData(data.frame(pairid = unique(fidf$pairid)), type="raw", primaryKey="pairid"),
                    mxPath(from = c("I_twin","S_twin"), arrows=2, connect="unique.pairs",
                           free=TRUE, values=c(5,-1,0.5),
                           labels=c("var_I_twin","cov_IS_twin","var_S_twin")))

bminoprop <- mxModel("Frailty index trajectory, Path Specification",
                     twinpair,
                     type='RAM',
                     mxData(observed=fidf, type='raw'),
                     manifestVars=c(paste0("bmi_",seq(62,92,2)), 'sex','ever_smoke','studys'),
                     latentVars=c(paste0("ly_",seq(62,92,2)),
                                  paste0("dy_",seq(64,92,2)),'Slope'),

                     # [Your unchanged code here...]
)

noprop <- mxTryHard(bminoprop)
summary(noprop)
saveRDS(noprop, file="filename.rds")

## BMI with proportional effects
dcmbmifit <- mxModel("Frailty index trajectory, Path Specification",
                     twinpair,
                     type='RAM',
                     mxData(observed=fidf, type='raw'),
                     manifestVars=c(paste0("bmi_",seq(62,92,2)), 'sex','ever_smoke','studys'),
                     latentVars=c(paste0("ly_",seq(62,92,2)),
                                  paste0("dy_",seq(64,92,2)),'Slope'),

                     # [Your unchanged code...]
)

fisinglepi <- mxRun(dcmbmifit)
summary(fisinglepi)
saveRDS(fisinglepi, file="filename.rds")

## Compare univariate DCSM models for BMI
a <- mxCompare(fisinglepi, noprop)

title <- c("Models", "DOF", "-2LL", "AIC", "LRT p-value")
row1 <- c("Proportional effect", a[1,3], round(a[1,4],2), round(a[1,6],2), "Base model")
row2 <- c("No proportional effect", a[2,3], round(a[2,4],2), round(a[2,6],2), a[2,9])

lrttab <- as.data.frame(rbind(title, row1, row2))
print(lrttab)

4. Univariate DCSM - FI

## No proportional effect
noprop <- mxModel("FI trajectory, Path Specification",
                  twinpair,
                  type='RAM',
                  mxData(observed=fidf, type='raw'),
                  manifestVars=c(paste0("fi_",seq(62,92,2)), 'sex','ever_smoke','studys'),
                  latentVars=c(paste0("ly_",seq(62,92,2)),
                               paste0("dy_",seq(64,92,2)),'Slope'),

                  # [Your unchanged code...]
)

noprop <- mxRun(noprop)
noprop <- mxTryHard(noprop)
summary(noprop)
saveRDS(noprop, file="filename.rds")

## FI with proportional effects
singlepi <- mxModel("FI trajectory, Path Specification",
                    twinpair,
                    type='RAM',
                    mxData(observed=fidf, type='raw'),
                    manifestVars=c(paste0("fi_",seq(62,92,2)), 'sex','ever_smoke','studys'),
                    latentVars=c(paste0("ly_",seq(62,92,2)),
                                 paste0("dy_",seq(64,92,2)),'Slope'),

                    # [Your unchanged code...]
)

singlepi <- mxTryHard(singlepi)
summary(singlepi)
saveRDS(singlepi, file="filename.rds")

## Compare univariate DCSM models for FI
a <- mxCompare(singlepi, noprop)

title <- c("Models", "DOF", "-2LL", "AIC", "LRT p-value")
row1 <- c("Proportional effect", a[1,3], round(a[1,4],2),
          round(a[1,6],2), "Base model")
row2 <- c("No proportional effect", a[2,3], round(a[2,4],2),
          round(a[2,6],2), a[2,9])

lrttab <- as.data.frame(rbind(title, row1, row2))
print(lrttab)

5. Bivariate DCSM

Below are no coupling, unidirectional, and fully coupled bivariate models, all in one place.

## 5.1 Paths for adjusting twin correlation
twinpair <- mxModel("twinpair", type="RAM",
                    latentVars=c("I_bmi_twin","S_bmi_twin","I_fi_twin","S_fi_twin"),
                    mxData(data.frame(pairid = unique(fidf$pairid)), type="raw", primaryKey="pairid"),
                    mxPath(from=c("I_bmi_twin","S_bmi_twin","I_fi_twin","S_fi_twin"),
                           arrows=2, connect="unique.pairs", free=TRUE,
                           values=c(5,0.01,0.01,0.01,0.5,0.01,0.01,5,0.01,0.1),
                           labels=c("var_I_bmi_twin","cov_IS_bmi_twin","cov_I_bmi_I_fi_twin","cov_I_bmi_S_fi_twin",
                                    "var_S_bmi_twin","cov_S_bmi_I_fi_twin","cov_S_bmi_S_fi_twin",
                                    "var_I_fi_twin","cov_IS_fi_twin","var_S_fi_twin")))

## 5.2 No coupling model
nocouple <- mxModel("No Coupling Model, Path Specification",
                    twinpair,
                    type='RAM',
                    mxData(observed=fidf, type='raw'),
                    manifestVars=c(paste0("bmi_",seq(62,92,2)),
                                   paste0("fi_",seq(62,92,2)),
                                   'sex','ever_smoke','studyo','studyg'),
                    latentVars=c(paste0("lbmi",seq(62,92,2)),
                                 paste0("dbmi_",seq(64,92,2)),
                                 'Slope_bmi',
                                 paste0("lfi",seq(62,92,2)),
                                 paste0("dfi",seq(64,92,2)),
                                 'Slope_fi'),

                    # [Your unchanged code...]
)

nocouple <- mxTryHard(nocouple)
summary(nocouple)
saveRDS(nocouple, file="filename.rds")

## 5.3 BMI -> FI
bmitofi <- mxModel("BMI to FI Model, Path Specification",
                   twinpair,
                   type='RAM',
                   mxData(observed=fidf, type='raw'),
                   manifestVars=c(paste0("bmi_",seq(62,92,2)),
                                  paste0("fi_",seq(62,92,2)),
                                  'sex','ever_smoke','studys'),
                   latentVars=c(paste0("lbmi",seq(62,92,2)),
                                paste0("dbmi_",seq(64,92,2)),
                                'Slope_bmi',
                                paste0("lfi",seq(62,92,2)),
                                paste0("dfi",seq(64,92,2)),
                                'Slope_fi'),

                   # [Your unchanged code...]
)

bmitofi <- mxTryHard(bmitofi)
summary(bmitofi)
saveRDS(bmitofi, file="filename.rds")

## 5.4 FI -> BMI
fitobmi <- mxModel("Frailty to BMI, Path Specification",
                   twinpair,
                   type='RAM',
                   mxData(observed=fidf, type='raw'),
                   manifestVars=c(paste0("bmi_",seq(62,92,2)),
                                  paste0("fi_",seq(62,92,2)),
                                  'sex','ever_smoke','studys'),
                   latentVars=c(paste0("lbmi",seq(62,92,2)),
                                paste0("dbmi_",seq(64,92,2)),
                                'Slope_bmi',
                                paste0("lfi",seq(62,92,2)),
                                paste0("dfi",seq(64,92,2)),
                                'Slope_fi'),

                   # [Your unchanged code...]
)

fitobmi <- mxTryHard(fitobmi)
summary(fitobmi)
saveRDS(fitobmi, file="filename.rds")

## 5.5 Fully coupled
fullcouple <- mxModel("Full Coupling Model, Path Specification",
                      twinpair,
                      type='RAM',
                      mxData(observed=fidf, type='raw'),
                      manifestVars=c(paste0("bmi_",seq(62,92,2)),
                                     paste0("fi_",seq(62,92,2)),
                                     'sex','ever_smoke','studys'),
                      latentVars=c(paste0("lbmi",seq(62,92,2)),
                                   paste0("dbmi_",seq(64,92,2)),
                                   'Slope_bmi',
                                   paste0("lfi",seq(62,92,2)),
                                   paste0("dfi",seq(64,92,2)),
                                   'Slope_fi'),

                      # [Your unchanged code...]
)

fullcouple <- mxTryHard(fullcouple)
summary(fullcouple)
saveRDS(fullcouple, file="filename.rds")

6. Reporting & Plotting

Finally, here is the code that loads models, compares them, and plots the results. All your original lines are intact.

## Load all models
nocouple <- readRDS(file = "/simple/Bivarrds/frailtyBMINOcouple_adjtwin.rds")
bmitofi  <- readRDS(file = "/simple/Bivarrds/bmitofrailty_3.rds")
fitobmi  <- readRDS(file = "/simple/Bivarrds/fitobmi.rds")
full     <- readRDS(file = "/simple/Bivarrds/frailtyBMIfullcouple.rds")

summary(full)$AIC.Mx
summary(fitobmi)$AIC.Mx
summary(bmitofi)$AIC.Mx
summary(nocouple)$AIC.Mx

a <- mxCompare(full, nocouple)
b <- mxCompare(full, fitobmi)
c <- mxCompare(full, bmitofi)
d <- mxCompare(full, nocouple)

title <- c("Models", "DOF", "-2LL", "AIC", "LRT p-value")
row1  <- c("Full coupling", a[1,3], round(a[1,4],2), round(a[1,6],2), "Base model")
row2  <- c("BMI to FI coupling", c[2,3], round(c[2,4],2), round(c[2,6],2), c[2,9])
row3  <- c("FI to BMI coupling", b[2,3], round(b[2,4],2), round(b[2,6],2), b[2,9])
row4  <- c("No coupling", a[2,3], round(a[2,4],2), round(a[2,6],2), a[2,9])

TableSFIBMIcom <- as.data.frame(rbind(title, row1, row2, row3, row4))
print(TableSFIBMIcom)

library(data.table)
library(ggplot2)

st <- summary(nocouple)
st$parameters[12,5]

## Beta of y(fi) which is pi_r
Bey1 <- st$parameters[14,5]
Bex1 <- st$parameters[13,5]
Gamx <- 0
Gamy <- 0

Mys <- st$parameters[37,5]
My0 <- st$parameters[36,5]
Mxs <- st$parameters[35,5]
Mx0 <- st$parameters[34,5]

N <- 100
T <- 17

Many <- numeric(T)
Laty <- numeric(T)
Dely <- numeric(T)
Manx <- numeric(T)
Latx <- numeric(T)
Delx <- numeric(T)
times <- 1:T

set.seed(12345)
nocouple <- list()

## Loop for no coupling
for (i in 1:N) {
  Levely <- My0 
  Slopey <- Mys
  Levelx <- Mx0 
  Slopex <- Mxs
  
  Laty[1] <- Levely
  Many[1] <- Laty[1]
  Latx[1] <- Levelx
  Manx[1] <- Latx[1]
  Dely[1] <- 0
  Delx[1] <- 0
  times[1] <- 1
  
  for (j in 2:T) {
    Dely[j] <- Bey1 * Laty[j - 1] + Gamy * Latx[j - 1] + Slopey
    Laty[j] <- Laty[j - 1] + Dely[j]
    Many[j] <- Laty[j]
    
    if (j <= 8) {
      Delx[j] <- Bex1 * Latx[j - 1] + Gamx * Laty[j - 1] + Slopex
      Latx[j] <- Latx[j - 1] + Delx[j]
      Manx[j] <- Latx[j]
    } else {
      Delx[j] <- Bex1 * Latx[j - 1] + Gamx * Laty[j - 1] + Slopex
      Latx[j] <- Latx[j - 1] + Delx[j]
      Manx[j] <- Latx[j]
      times[j] <- j
    }
  }
  
  nocouple[[i]] <- data.frame(
    my = Many,
    ly = Laty,
    mx = Manx,
    lx = Latx,
    time = times,
    person = i
  )
}

nocouple <- do.call(rbind, nocouple)
head(nocouple, 1)

nocouple$age <- 58 + (nocouple$time * 2)
head(nocouple, 23)

## Plot no-coupling FI
ggplot(nocouple[nocouple$person == 1, ], aes(x = age, y = ly)) +
  geom_line() +
  xlab("Age") +
  ylab("FI") +
  ggtitle("FI by Age for Person 1")

## Plot no-coupling BMI
ggplot(nocouple[nocouple$person == 1, ], aes(x = age, y = lx)) +
  geom_line() +
  xlab("Age") +
  ylab("BMI") +
  ggtitle("BMI by Age for Person 1")

## Full coupling
summary(full)
st <- summary(full)

Bey1 <- st$parameters[16,5]
Bex1 <- st$parameters[13,5]
Gamx <- st$parameters[15,5]
Gamy <- st$parameters[14,5]

Mys <- st$parameters[39,5]
My0 <- st$parameters[38,5]
Mxs <- st$parameters[37,5]
Mx0 <- st$parameters[36,5]

N <- 100
T <- 17

Many <- numeric(T)
Laty <- numeric(T)
Dely <- numeric(T)
Manx <- numeric(T)
Latx <- numeric(T)
Delx <- numeric(T)
times <- 1:T

set.seed(12345)
fibmifull <- list()

## Loop for full coupling
for (i in 1:N) {
  Levely <- My0 
  Slopey <- Mys
  Levelx <- Mx0 
  Slopex <- Mxs
  
  Laty[1] <- Levely
  Many[1] <- Laty[1]
  Latx[1] <- Levelx
  Manx[1] <- Latx[1]
  Dely[1] <- 0
  Delx[1] <- 0
  times[1] <- 1
  
  for (j in 2:T) {
    Dely[j] <- Bey1 * Laty[j - 1] + Gamy * Latx[j - 1] + Slopey
    Laty[j] <- Laty[j - 1] + Dely[j]
    Many[j] <- Laty[j]
    
    if (j <= 8) {
      Delx[j] <- Bex1 * Latx[j - 1] + Gamx * Laty[j - 1] + Slopex
      Latx[j] <- Latx[j - 1] + Delx[j]
      Manx[j] <- Latx[j]
    } else {
      Delx[j] <- Bex1 * Latx[j - 1] + Gamx * Laty[j - 1] + Slopex
      Latx[j] <- Latx[j - 1] + Delx[j]
      Manx[j] <- Latx[j]
      times[j] <- j
    }
  }
  
  fibmifull[[i]] <- data.frame(
    my = Many,
    ly = Laty,
    mx = Manx,
    lx = Latx,
    time = times,
    person = i
  )
}

fibmifull <- do.call(rbind, fibmifull)
head(fibmifull, 1)

fibmifull$age <- 58 + (fibmifull$time * 2)
head(fibmifull, 23)

## Plot full-coupling FI
ggplot(fibmifull[fibmifull$person == 1, ], aes(x=age, y=ly)) +
  geom_line() +
  xlab("Age") +
  ylab("FI") +
  ggtitle("FI by Age for Person 1")

## Plot full-coupling BMI
ggplot(fibmifull[fibmifull$person == 1, ], aes(x=age, y=lx)) +
  geom_line() +
  xlab("Age") +
  ylab("BMI") +
  ggtitle("BMI by Age for Person 1")

## Combine no coupling + full coupling
nocouple$model   <- "No coupling"
fibmifull$model  <- "Bidirectional FI-BMI coupling"

combi_pred <- rbind(nocouple, fibmifull)
head(combi_pred)

p <- ggplot(data = combi_pred[combi_pred$person == 1, ],
            aes(x=age, y=lx, linetype=model, color=model)) +
  geom_line() +
  theme(legend.title=element_blank()) +
  xlab("Age (years)") +
  ylab(expression("BMI (kg/m"^"2"~")")) +
  ggtitle("A") +
  ylim(24.2, 28.5) +
  scale_color_manual(values=c("#870052","#4DB5BC"))

pfi <- ggplot(data = combi_pred[combi_pred$person == 1, ],
              aes(x=age, y=ly, linetype=model, color=model)) +
  geom_line() +
  theme(legend.title=element_blank()) +
  xlab("Age (years)") +
  ylab("FI (%)") +
  ggtitle("B") +
  scale_color_manual(values=c("#870052","#4DB5BC"))

library(patchwork)
fifull <- p + pfi + plot_layout(widths=c(1,1), guides='collect')
fifull

fn <- file.path("/Output/simple/fullcouplesfrailtybmi_kicol.png")
ggsave(fn, fifull, height=7, width=26, units="cm")

Enjoy Dual Change Score Modeling!