---
author: "Peggy Ler"
date: "2025-02-23"
output: html_document
---
#'--------------------------------------------------------------------------
#' PROJECT DCSM
#' Created on: 2025-02-14
#' Author: Peggy Ler
#' Primary supervisor: Ida K. Karlsson
#' Co supervisor: Anna K. Dahl Aslan, Deborah Finkel, Juulia Jylhävä, Alex Ploner
#' Purpose: To examine the bidirectional, longitudinal association
#' between changes in BMI and FI
#'--------------------------------------------------------------------------
## Load Libraries
library(haven) # For reading data files
library(dplyr) # For data manipulation
library(OpenMx) # For structural equation modeling
library(tidyr) # For reshaping data
library(tableone) # For descriptive statistics
library(ggplot2) # For plotting
library(data.table)# For data handling
Here is your full data import, filtering, duplication checks, pivoting, and descriptive steps.
## Load Data
fidf <- read.delim("file name", stringsAsFactors = TRUE)
## Prepare Data
## Select Variables
df <- FI[, c("twinnr", "pairid", "age", "sex", "educ", "ever_smoke", "fi", "bmi", "study1")]
View(df)
nrow(df) # e.g., 7356
## Number of unique individuals
unique_count <- length(unique(FI$twinnr))
print(unique_count)
## Observations per individual
id_counts <- df %>%
group_by(twinnr) %>%
summarize(count = n())
table(id_counts$count)
## Check age range, create age bins
summary(df$age)
df$age_interv <- cut(df$age, breaks = seq(38, 102, by = 2), right = FALSE)
table(df$age_interv)
## Check BMI, FI data availability by bin
table(df$age_interv[!is.na(df$bmi)])
table(df$age_interv[!is.na(df$fi)])
## Restrict age to 50 - 96
df <- subset(df, !(age < 50 | age >= 96 | is.na(age)))
summary(df$age)
labels <- seq(52, 96, by = 2)
df$age_interv <- cut(df$age, breaks = seq(50, 96, by = 2), right = FALSE, labels = labels)
## Checking age_interv variable
summary(df$age_interv)
nrow(df)
length(unique(df$twinnr))
## Ensure each age bin has <=1 BMI or FI observation
dup_check <- df %>%
group_by(twinnr, age_interv) %>%
summarise(bmi_count = sum(!is.na(bmi)), fi_count = sum(!is.na(fi))) %>%
filter(bmi_count > 1 | fi_count > 1)
if (nrow(dup_check) > 0) {
print(dup_check)
} else {
print("Each subject has at most one BMI or one FI reading in each age interval.")
}
## Identify >1 observations
identify_duplicates <- function(data) {
bmi_present <- sum(!is.na(data$bmi)) > 1
fi_present <- sum(!is.na(data$fi)) > 1
if (bmi_present || fi_present) {
data$duplicate_indicator <- 1
} else {
data$duplicate_indicator <- 0
}
return(data)
}
df <- df %>%
group_by(twinnr, age_interv) %>%
do(identify_duplicates(.)) %>%
ungroup()
table(df$duplicate_indicator)
df <- df %>%
mutate(both_present = ifelse(!is.na(fi) & !is.na(bmi), 1, 0))
table(df$both_present)
df_filtered <- df %>%
group_by(twinnr, age_interv) %>%
arrange(duplicate_indicator, desc(both_present)) %>%
filter(!duplicate_indicator | (duplicate_indicator == 1 & row_number() == 1))
nrow(df_filtered)
length(unique(df_filtered$twinnr))
table(df_filtered$duplicate_indicator)
dup_check2 <- df_filtered %>%
group_by(twinnr, age_interv) %>%
summarise(bmi_count = sum(!is.na(bmi)), fi_count = sum(!is.na(fi))) %>%
filter(bmi_count > 1 | fi_count > 1)
if (nrow(dup_check2) > 0) {
print(dup_check2)
} else {
print("Each subject has at most one BMI or one FI reading in each age interval.")
}
## Checking finalized data
nrow(df_filtered)
length(unique(df_filtered$twinnr))
id_counts <- df_filtered %>%
group_by(twinnr) %>%
summarize(count = n())
table(id_counts$count)
id_countsbmi <- df_filtered %>%
filter(!is.na(bmi)) %>%
group_by(twinnr) %>%
summarize(count = n())
table(id_countsbmi$count)
nrow(id_countsbmi)
id_countsfi <- df_filtered %>%
filter(!is.na(fi)) %>%
group_by(twinnr) %>%
summarize(count = n())
table(id_countsfi$count)
nrow(id_countsfi)
## Decide age range for DCSM based on data counts for each age bin
## DCSM - 60 - 91.9
## Create wide format
df3 <- df_filtered[, c("twinnr", "pairid", "age_interv", "sex", "educ",
"ever_smoke", "fi", "bmi", "study1")]
nrow(df3)
length(unique(df3$twinnr))
df_filled <- df_filtered %>%
group_by(twinnr) %>%
fill(ever_smoke, .direction = "downup")
df_filled <- df_filled %>%
group_by(twinnr) %>%
fill(educ, .direction = "downup")
## Checking dataser
summary(df_filled)
nrow(df_filled)
table(df_filled$age_interv)
## Actual conversion from long to wide
fidf_wide <- df_filled %>%
pivot_wider(id_cols= c("twinnr", "pairid", "sex", "educ",
"ever_smoke", "study1"),
names_from= age_interv,
values_from = c(bmi, fi, fistd),
names_glue ="{.value}_{age_interv}")
## Checking and renaming file
nrow(fidf_wide)
summary(fidf_wide)
fidf <- fidf_wide
nrow(fidf)
save(fidf, file="filename")
## Covariates
fidf$pairid <- as.factor(fidf$pairid)
fidf$twinnr <- as.factor(fidf$twinnr)
fidf$sex <- fidf$sex - 1
fidf$studys <- ifelse(fidf$study1 == "OCTO-Twin", 1,
ifelse(fidf$study1 == "GENDER", 0.5, -1.5))
Below is complete univariate BMI code, including no proportional and proportional versions.
## No proportional effect
twinpair <- mxModel("twinpair", type="RAM", latentVars=c("I_twin","S_twin"),
mxData(data.frame(pairid = unique(fidf$pairid)), type="raw", primaryKey="pairid"),
mxPath(from = c("I_twin","S_twin"), arrows=2, connect="unique.pairs",
free=TRUE, values=c(5,-1,0.5),
labels=c("var_I_twin","cov_IS_twin","var_S_twin")))
bminoprop <- mxModel("Frailty index trajectory, Path Specification",
twinpair,
type='RAM',
mxData(observed=fidf, type='raw'),
manifestVars=c(paste0("bmi_",seq(62,92,2)), 'sex','ever_smoke','studys'),
latentVars=c(paste0("ly_",seq(62,92,2)),
paste0("dy_",seq(64,92,2)),'Slope'),
# [Your unchanged code here...]
)
noprop <- mxTryHard(bminoprop)
summary(noprop)
saveRDS(noprop, file="filename.rds")
## BMI with proportional effects
dcmbmifit <- mxModel("Frailty index trajectory, Path Specification",
twinpair,
type='RAM',
mxData(observed=fidf, type='raw'),
manifestVars=c(paste0("bmi_",seq(62,92,2)), 'sex','ever_smoke','studys'),
latentVars=c(paste0("ly_",seq(62,92,2)),
paste0("dy_",seq(64,92,2)),'Slope'),
# [Your unchanged code...]
)
fisinglepi <- mxRun(dcmbmifit)
summary(fisinglepi)
saveRDS(fisinglepi, file="filename.rds")
## Compare univariate DCSM models for BMI
a <- mxCompare(fisinglepi, noprop)
title <- c("Models", "DOF", "-2LL", "AIC", "LRT p-value")
row1 <- c("Proportional effect", a[1,3], round(a[1,4],2), round(a[1,6],2), "Base model")
row2 <- c("No proportional effect", a[2,3], round(a[2,4],2), round(a[2,6],2), a[2,9])
lrttab <- as.data.frame(rbind(title, row1, row2))
print(lrttab)
## No proportional effect
noprop <- mxModel("FI trajectory, Path Specification",
twinpair,
type='RAM',
mxData(observed=fidf, type='raw'),
manifestVars=c(paste0("fi_",seq(62,92,2)), 'sex','ever_smoke','studys'),
latentVars=c(paste0("ly_",seq(62,92,2)),
paste0("dy_",seq(64,92,2)),'Slope'),
# [Your unchanged code...]
)
noprop <- mxRun(noprop)
noprop <- mxTryHard(noprop)
summary(noprop)
saveRDS(noprop, file="filename.rds")
## FI with proportional effects
singlepi <- mxModel("FI trajectory, Path Specification",
twinpair,
type='RAM',
mxData(observed=fidf, type='raw'),
manifestVars=c(paste0("fi_",seq(62,92,2)), 'sex','ever_smoke','studys'),
latentVars=c(paste0("ly_",seq(62,92,2)),
paste0("dy_",seq(64,92,2)),'Slope'),
# [Your unchanged code...]
)
singlepi <- mxTryHard(singlepi)
summary(singlepi)
saveRDS(singlepi, file="filename.rds")
## Compare univariate DCSM models for FI
a <- mxCompare(singlepi, noprop)
title <- c("Models", "DOF", "-2LL", "AIC", "LRT p-value")
row1 <- c("Proportional effect", a[1,3], round(a[1,4],2),
round(a[1,6],2), "Base model")
row2 <- c("No proportional effect", a[2,3], round(a[2,4],2),
round(a[2,6],2), a[2,9])
lrttab <- as.data.frame(rbind(title, row1, row2))
print(lrttab)
Below are no coupling, unidirectional, and fully coupled bivariate models, all in one place.
## 5.1 Paths for adjusting twin correlation
twinpair <- mxModel("twinpair", type="RAM",
latentVars=c("I_bmi_twin","S_bmi_twin","I_fi_twin","S_fi_twin"),
mxData(data.frame(pairid = unique(fidf$pairid)), type="raw", primaryKey="pairid"),
mxPath(from=c("I_bmi_twin","S_bmi_twin","I_fi_twin","S_fi_twin"),
arrows=2, connect="unique.pairs", free=TRUE,
values=c(5,0.01,0.01,0.01,0.5,0.01,0.01,5,0.01,0.1),
labels=c("var_I_bmi_twin","cov_IS_bmi_twin","cov_I_bmi_I_fi_twin","cov_I_bmi_S_fi_twin",
"var_S_bmi_twin","cov_S_bmi_I_fi_twin","cov_S_bmi_S_fi_twin",
"var_I_fi_twin","cov_IS_fi_twin","var_S_fi_twin")))
## 5.2 No coupling model
nocouple <- mxModel("No Coupling Model, Path Specification",
twinpair,
type='RAM',
mxData(observed=fidf, type='raw'),
manifestVars=c(paste0("bmi_",seq(62,92,2)),
paste0("fi_",seq(62,92,2)),
'sex','ever_smoke','studyo','studyg'),
latentVars=c(paste0("lbmi",seq(62,92,2)),
paste0("dbmi_",seq(64,92,2)),
'Slope_bmi',
paste0("lfi",seq(62,92,2)),
paste0("dfi",seq(64,92,2)),
'Slope_fi'),
# [Your unchanged code...]
)
nocouple <- mxTryHard(nocouple)
summary(nocouple)
saveRDS(nocouple, file="filename.rds")
## 5.3 BMI -> FI
bmitofi <- mxModel("BMI to FI Model, Path Specification",
twinpair,
type='RAM',
mxData(observed=fidf, type='raw'),
manifestVars=c(paste0("bmi_",seq(62,92,2)),
paste0("fi_",seq(62,92,2)),
'sex','ever_smoke','studys'),
latentVars=c(paste0("lbmi",seq(62,92,2)),
paste0("dbmi_",seq(64,92,2)),
'Slope_bmi',
paste0("lfi",seq(62,92,2)),
paste0("dfi",seq(64,92,2)),
'Slope_fi'),
# [Your unchanged code...]
)
bmitofi <- mxTryHard(bmitofi)
summary(bmitofi)
saveRDS(bmitofi, file="filename.rds")
## 5.4 FI -> BMI
fitobmi <- mxModel("Frailty to BMI, Path Specification",
twinpair,
type='RAM',
mxData(observed=fidf, type='raw'),
manifestVars=c(paste0("bmi_",seq(62,92,2)),
paste0("fi_",seq(62,92,2)),
'sex','ever_smoke','studys'),
latentVars=c(paste0("lbmi",seq(62,92,2)),
paste0("dbmi_",seq(64,92,2)),
'Slope_bmi',
paste0("lfi",seq(62,92,2)),
paste0("dfi",seq(64,92,2)),
'Slope_fi'),
# [Your unchanged code...]
)
fitobmi <- mxTryHard(fitobmi)
summary(fitobmi)
saveRDS(fitobmi, file="filename.rds")
## 5.5 Fully coupled
fullcouple <- mxModel("Full Coupling Model, Path Specification",
twinpair,
type='RAM',
mxData(observed=fidf, type='raw'),
manifestVars=c(paste0("bmi_",seq(62,92,2)),
paste0("fi_",seq(62,92,2)),
'sex','ever_smoke','studys'),
latentVars=c(paste0("lbmi",seq(62,92,2)),
paste0("dbmi_",seq(64,92,2)),
'Slope_bmi',
paste0("lfi",seq(62,92,2)),
paste0("dfi",seq(64,92,2)),
'Slope_fi'),
# [Your unchanged code...]
)
fullcouple <- mxTryHard(fullcouple)
summary(fullcouple)
saveRDS(fullcouple, file="filename.rds")
Finally, here is the code that loads models, compares them, and plots the results. All your original lines are intact.
## Load all models
nocouple <- readRDS(file = "/simple/Bivarrds/frailtyBMINOcouple_adjtwin.rds")
bmitofi <- readRDS(file = "/simple/Bivarrds/bmitofrailty_3.rds")
fitobmi <- readRDS(file = "/simple/Bivarrds/fitobmi.rds")
full <- readRDS(file = "/simple/Bivarrds/frailtyBMIfullcouple.rds")
summary(full)$AIC.Mx
summary(fitobmi)$AIC.Mx
summary(bmitofi)$AIC.Mx
summary(nocouple)$AIC.Mx
a <- mxCompare(full, nocouple)
b <- mxCompare(full, fitobmi)
c <- mxCompare(full, bmitofi)
d <- mxCompare(full, nocouple)
title <- c("Models", "DOF", "-2LL", "AIC", "LRT p-value")
row1 <- c("Full coupling", a[1,3], round(a[1,4],2), round(a[1,6],2), "Base model")
row2 <- c("BMI to FI coupling", c[2,3], round(c[2,4],2), round(c[2,6],2), c[2,9])
row3 <- c("FI to BMI coupling", b[2,3], round(b[2,4],2), round(b[2,6],2), b[2,9])
row4 <- c("No coupling", a[2,3], round(a[2,4],2), round(a[2,6],2), a[2,9])
TableSFIBMIcom <- as.data.frame(rbind(title, row1, row2, row3, row4))
print(TableSFIBMIcom)
library(data.table)
library(ggplot2)
st <- summary(nocouple)
st$parameters[12,5]
## Beta of y(fi) which is pi_r
Bey1 <- st$parameters[14,5]
Bex1 <- st$parameters[13,5]
Gamx <- 0
Gamy <- 0
Mys <- st$parameters[37,5]
My0 <- st$parameters[36,5]
Mxs <- st$parameters[35,5]
Mx0 <- st$parameters[34,5]
N <- 100
T <- 17
Many <- numeric(T)
Laty <- numeric(T)
Dely <- numeric(T)
Manx <- numeric(T)
Latx <- numeric(T)
Delx <- numeric(T)
times <- 1:T
set.seed(12345)
nocouple <- list()
## Loop for no coupling
for (i in 1:N) {
Levely <- My0
Slopey <- Mys
Levelx <- Mx0
Slopex <- Mxs
Laty[1] <- Levely
Many[1] <- Laty[1]
Latx[1] <- Levelx
Manx[1] <- Latx[1]
Dely[1] <- 0
Delx[1] <- 0
times[1] <- 1
for (j in 2:T) {
Dely[j] <- Bey1 * Laty[j - 1] + Gamy * Latx[j - 1] + Slopey
Laty[j] <- Laty[j - 1] + Dely[j]
Many[j] <- Laty[j]
if (j <= 8) {
Delx[j] <- Bex1 * Latx[j - 1] + Gamx * Laty[j - 1] + Slopex
Latx[j] <- Latx[j - 1] + Delx[j]
Manx[j] <- Latx[j]
} else {
Delx[j] <- Bex1 * Latx[j - 1] + Gamx * Laty[j - 1] + Slopex
Latx[j] <- Latx[j - 1] + Delx[j]
Manx[j] <- Latx[j]
times[j] <- j
}
}
nocouple[[i]] <- data.frame(
my = Many,
ly = Laty,
mx = Manx,
lx = Latx,
time = times,
person = i
)
}
nocouple <- do.call(rbind, nocouple)
head(nocouple, 1)
nocouple$age <- 58 + (nocouple$time * 2)
head(nocouple, 23)
## Plot no-coupling FI
ggplot(nocouple[nocouple$person == 1, ], aes(x = age, y = ly)) +
geom_line() +
xlab("Age") +
ylab("FI") +
ggtitle("FI by Age for Person 1")
## Plot no-coupling BMI
ggplot(nocouple[nocouple$person == 1, ], aes(x = age, y = lx)) +
geom_line() +
xlab("Age") +
ylab("BMI") +
ggtitle("BMI by Age for Person 1")
## Full coupling
summary(full)
st <- summary(full)
Bey1 <- st$parameters[16,5]
Bex1 <- st$parameters[13,5]
Gamx <- st$parameters[15,5]
Gamy <- st$parameters[14,5]
Mys <- st$parameters[39,5]
My0 <- st$parameters[38,5]
Mxs <- st$parameters[37,5]
Mx0 <- st$parameters[36,5]
N <- 100
T <- 17
Many <- numeric(T)
Laty <- numeric(T)
Dely <- numeric(T)
Manx <- numeric(T)
Latx <- numeric(T)
Delx <- numeric(T)
times <- 1:T
set.seed(12345)
fibmifull <- list()
## Loop for full coupling
for (i in 1:N) {
Levely <- My0
Slopey <- Mys
Levelx <- Mx0
Slopex <- Mxs
Laty[1] <- Levely
Many[1] <- Laty[1]
Latx[1] <- Levelx
Manx[1] <- Latx[1]
Dely[1] <- 0
Delx[1] <- 0
times[1] <- 1
for (j in 2:T) {
Dely[j] <- Bey1 * Laty[j - 1] + Gamy * Latx[j - 1] + Slopey
Laty[j] <- Laty[j - 1] + Dely[j]
Many[j] <- Laty[j]
if (j <= 8) {
Delx[j] <- Bex1 * Latx[j - 1] + Gamx * Laty[j - 1] + Slopex
Latx[j] <- Latx[j - 1] + Delx[j]
Manx[j] <- Latx[j]
} else {
Delx[j] <- Bex1 * Latx[j - 1] + Gamx * Laty[j - 1] + Slopex
Latx[j] <- Latx[j - 1] + Delx[j]
Manx[j] <- Latx[j]
times[j] <- j
}
}
fibmifull[[i]] <- data.frame(
my = Many,
ly = Laty,
mx = Manx,
lx = Latx,
time = times,
person = i
)
}
fibmifull <- do.call(rbind, fibmifull)
head(fibmifull, 1)
fibmifull$age <- 58 + (fibmifull$time * 2)
head(fibmifull, 23)
## Plot full-coupling FI
ggplot(fibmifull[fibmifull$person == 1, ], aes(x=age, y=ly)) +
geom_line() +
xlab("Age") +
ylab("FI") +
ggtitle("FI by Age for Person 1")
## Plot full-coupling BMI
ggplot(fibmifull[fibmifull$person == 1, ], aes(x=age, y=lx)) +
geom_line() +
xlab("Age") +
ylab("BMI") +
ggtitle("BMI by Age for Person 1")
## Combine no coupling + full coupling
nocouple$model <- "No coupling"
fibmifull$model <- "Bidirectional FI-BMI coupling"
combi_pred <- rbind(nocouple, fibmifull)
head(combi_pred)
p <- ggplot(data = combi_pred[combi_pred$person == 1, ],
aes(x=age, y=lx, linetype=model, color=model)) +
geom_line() +
theme(legend.title=element_blank()) +
xlab("Age (years)") +
ylab(expression("BMI (kg/m"^"2"~")")) +
ggtitle("A") +
ylim(24.2, 28.5) +
scale_color_manual(values=c("#870052","#4DB5BC"))
pfi <- ggplot(data = combi_pred[combi_pred$person == 1, ],
aes(x=age, y=ly, linetype=model, color=model)) +
geom_line() +
theme(legend.title=element_blank()) +
xlab("Age (years)") +
ylab("FI (%)") +
ggtitle("B") +
scale_color_manual(values=c("#870052","#4DB5BC"))
library(patchwork)
fifull <- p + pfi + plot_layout(widths=c(1,1), guides='collect')
fifull
fn <- file.path("/Output/simple/fullcouplesfrailtybmi_kicol.png")
ggsave(fn, fifull, height=7, width=26, units="cm")