Initial commit

f301fd88 · Cecilia Akerlund · f301fd88 · f301fd88
Commit f301fd88 authored Jan 31, 2023 by Cecilia Akerlund
Hide whitespace changes
Inline Side-by-side

Showing with 1603 additions and 0 deletions

EM_TS_CENTER-TBI_incr.seed_exploratory.R EM_TS_CENTER-TBI_incr.seed_exploratory.R +755 -0

EM_TS_CENTER-TBI_incr.seed_summaries.R EM_TS_CENTER-TBI_incr.seed_summaries.R +848 -0

No files found.
--- a/EM_TS_CENTER-TBI_incr.seed_exploratory.R
+++ b/EM_TS_CENTER-TBI_incr.seed_exploratory.R
+# 25 models och each number of clusters, with seed from the best model of n-1 clusters.
+
+rm(list=ls())
+
+library(dplyr)
+library(tidyr)
+library(ggplot2)
+library(gtools)
+library(gridExtra)
+library(mice)
+library(ggalluvial)
+library(rms)
+
+
+nas <- c(NA, "", NaN)
+alfa <- 1 # Bayesian parameter
+nclusters <- c(2:12)
+f <- "/Users/ceciliamac/OneDrive - Karolinska Institutet/Clustering Time-series CENTER-TBI/Clustervars_TS/" #Moved all data to onedrive 2110027
+f2 <- "/Users/ceciliamac/OneDrive - Karolinska Institutet/Clustering CENTER-TBI/Neurobot data/Core3.0/" #Moved all data to onedrive 2110027
+
+f4 <- "/Users/ceciliamac/Dropbox/Cecilia/R_code/Cluster_CENTER-TBI/EM_timeseries/models.alldays.incr.seed/"
+f5 <- "/Users/ceciliamac/Dropbox/Cecilia/R_code/Cluster_CENTER-TBI/EM_timeseries/TEMPTABLES/"
+
+# Create list with LL, entropy, belong and df: (Script inspired by CreatingModelLists_TS.R): -----------------------
+# Createbelonglist <- function(df, belong, k)
+# {
+#   models <- mixedsort(MODELS[grepl(paste0("Maxmodel",k),MODELS)])
+#   df <- mixedsort(DF[grepl(paste0("df",k), DF)])
+#   belong <- mixedsort(BELONG[grepl(paste0("belongprob",k),BELONG)])
+# 
+#   Model.list <- vector(mode="list", length=length(models))
+# 
+#   for (i in 1:length(models))
+#   {
+# 
+#     source(paste0(f4,models[i]))
+# 
+#     Model.list[[i]]$LLMax <- mod$LLMax
+#     Model.list[[i]]$Entropy <- mod$Entropy
+# 
+#     Model.list[[i]]$df <- read.delim(paste0(f4,df[i]), sep=",")
+#     colnames(Model.list[[i]]$df) <- c("iter", "difference")
+#     Model.list[[i]]$belong <- read.delim(paste0(f4,belong[i]), header=FALSE, sep=",")
+#     colnames(Model.list[[i]]$belong) <- c("gupi", "clustnumber", 1:k)
+# 
+#     Model.list[[i]]$filename <- belong[i]
+# 
+#   }
+#   return(Model.list)
+# }
+# 
+# FILES <- list.files(paste0(f4))
+# MODELS <- FILES[grepl("Maxmodel", FILES)]
+# DF <- FILES[grepl("df", FILES)]
+# BELONG <- FILES[grepl("belongprob", FILES)]
+# 
+# ML <- vector(mode="list", length=12)
+# 
+# for(k in nclusters)
+# {
+#   ML[[k]] <- Createbelonglist(df, belong, k)
+#   print(k)
+# }
+# save(ML, file=paste0(f4,"ML.RData"))
+
+
+
+# ---------------------
+
+load(file=paste0(f4,"ML.RData"))
+
+
+# 1. Make LL list.
+LL.fcn <- function(Mod.list)
+{
+  LLs <- data.frame(Model=c(1:length(Mod.list)), LLmax=NA, Entropy=NA)
+  
+  for(i in 1:nrow(LLs))
+  {
+    LLs$LLmax[i] <- Mod.list[[i]]$LLMax
+    LLs$Entropy[i] <- Mod.list[[i]]$Entropy
+    LLs$seed[i] <- Mod.list[[i]]$filename
+    #print(i)
+  }
+  
+  return(LLs)
+}
+
+
+x <- data.frame()
+
+for(i in nclusters)
+{
+  temp <- LL.fcn(ML[[i]])
+  #temp$iter <- j
+  temp$nclusters <- i
+  x <- rbind(x, temp)
+}
+rm(temp)
+
+# Plot LLs for each number of clusters:
+temp <- x %>% group_by(nclusters) %>% summarise(LL25=quantile(LLmax,0.25), LL75=quantile(LLmax,0.75), LLmin=min(LLmax), LLmean=mean(LLmax), LLmedian=median(LLmax), LLmax=max(LLmax))
+
+dev.new()
+print(ggplot(temp, aes(x=nclusters, y=LLmedian)) + geom_line(aes(x=nclusters, y=LLmedian))
+      + geom_line(aes(x=nclusters, y=LL25), color="light blue")
+      + geom_line(aes(x=nclusters, y=LL75), color="light blue")
+      + theme_minimal() +
+        theme(legend.title = element_text(face="italic"),
+              panel.border = element_rect(size=0.5, fill=NA),
+              text = element_text(size = 14)))
+
+# 2. Determine which models have highest LL.
+highestLLs <- x %>% group_by(nclusters) %>% slice_max(LLmax, n=1)
+# write.csv(highestLLs, file=paste0(f5, "highestLLs.csv"), row.names = FALSE)
+
+# # Get best models:
+# # From highestLLs
+# bestmodels <- c()
+# for (i in nclusters)
+# {
+#   w <- paste0("belongprob",i)
+#   print(w)
+#   y <- highestLLs %>% filter(nclusters==i)
+#   z <- y$Model
+#   bestmodels <- c(bestmodels, list.files(f4, pattern=w)[z])
+# }
+
+#---------------------------------------------
+
+
+# 4. Plot PSIs for best 20:
+# PSI for incremental seeds:
+PSI.incr <- read.delim(paste0(f4,"PSI.graph.txt"))
+PSI.incr <- PSI.incr %>% rename(nclusters=NComp, MIN=X0., MAX=X100., MEDIAN=X50.) %>%
+  mutate(index="c")
+
+
+PSI.plotfcn <- function(PSIabcd, TITLE)
+{
+  A <- (ggplot(PSIabcd %>% filter(index=="c"), aes(x=nclusters, y=MEDIAN)) +#, group=as.factor(iter), color=as.factor(iter))) +
+          geom_line() +
+          geom_line(aes(x=nclusters, y=X25.), color="light blue") +
+          geom_line(aes(x=nclusters, y=X75.), color="light blue") +
+          #facet_wrap(~iter, scales="free_x", ncol=4) +
+          scale_x_continuous(breaks=c(1:15)) +
+          theme_minimal() +
+          theme(legend.title = element_text(face="italic"),
+                panel.border = element_rect(size=0.5, fill=NA),
+                text = element_text(size = 9)) +
+          ylim(c(0,1)) +
+          #theme(legend.title = element_text(face="italic"))+#, text = element_text(size = 14)) +
+          xlab("N clusters") +
+          ylab("Median CSI")) #+
+          #ggtitle(paste0(TITLE)))
+  
+  # Penalty for number of clusters: (-1/nclusters)
+  
+  B <- (ggplot(PSIabcd %>% filter(index=="c"), aes(x=nclusters, y=MEDIAN-1/nclusters)) +#, group=as.factor(iter), color=as.factor(iter))) +
+          geom_line() +
+          geom_line(aes(x=nclusters, y=X25.-1/nclusters), color="light blue") +
+          geom_line(aes(x=nclusters, y=X75.-1/nclusters), color="light blue") +
+          #facet_wrap(~iter, scales="free_x", ncol=4) +
+          scale_x_continuous(breaks=c(1:15)) +
+          theme_minimal() +
+          theme(legend.title = element_text(face="italic"),
+                panel.border = element_rect(size=0.5, fill=NA),
+                text = element_text(size = 9)) +
+          ylim(c(0,1)) +
+          #theme(legend.title = element_text(face="italic"))+#, text = element_text(size = 14)) +
+          xlab("N clusters") +
+          ylab("Median CSI"))# + 
+          #ggtitle(paste0(TITLE,", with penalty 1/n")))
+  grid.arrange(A,B, ncol=2)
+  return("i")
+}
+
+# Fig 1:
+pdf("Fig1.pdf",
+    width=8, height=4,
+    colormodel="cmyk")
+PSI.plotfcn(PSI.incr, TITLE="")# TITLE="PSI (c), Incr. seed, 25 best models")
+dev.off()
+
+# AFTER IMPUTATION: ------------------
+# Not clear how many clusters are optimal:
+BL.best <- vector(mode="list", length=12)
+
+for(i in 2:length(BL.best))
+{
+  BL.best[[i]] <- ML[[i]][[(highestLLs %>% filter(nclusters==i))$Model]]$belong
+  # write.table(BL.best[[i]], paste0(f4, "BELONG",i,".csv"), sep=",", row.names=FALSE, col.names = FALSE)
+}
+
+#save(BL.best, file=paste0(f5,"BL.best.RData"))
+
+#Chain 11 has the highest mean LL:
+BL11 <- vector(mode="list", length=12)
+
+for(i in 2:length(BL11))
+{
+  BL11[[i]] <- ML[[i]][[11]]$belong
+}
+
+#save(BL11, file=paste0(f5,"BL11.RData"))
+# --------------------------------------
+
+# SANKEY BETWEEN MODELS (Chain 11): --------------
+library(ggalluvial)
+
+# Best prehosp clustering (used in first paper):
+Belong.P<-read.csv("/Users/ceciliamac/Dropbox/Cecilia/R_code/Cluster_CENTER-TBI/EM/models/Belong_EM.csv") #From previous study
+
+# -----Test of following same path in clustering (Best 7 clusters in model no 14, most best models in chain 18):
+AlluvialBelong <- left_join(Belong.P %>% select(gupi, clust), BL11[[2]] %>% select(gupi, clustnumber), by="gupi") %>%
+  mutate(clustnumber=as.factor(clustnumber)) %>% rename(mod2=clustnumber)
+for(i in 3:length(BL11))
+{
+  AlluvialBelong <- left_join(AlluvialBelong, BL11[[i]] %>% select(gupi, clustnumber), by="gupi") %>%
+    mutate(clustnumber=as.factor(clustnumber))
+  colnames(AlluvialBelong)[ncol(AlluvialBelong)] <- paste0("mod",i)
+}
+
+# write.csv(AlluvialBelong, file=paste0(f5, "AlluvialBelong.csv"), row.names=FALSE)
+
+
+# -------------------
+# Number of patients in each cluster:
+
+
+CV <- read.csv(paste0(f,"TS_vars.csv"), na.strings = nas) #Created in Clustervars_TS.R
+CV.alldays <- read.csv(paste0(f,"TS_vars.alldays.csv")) #Created in Clustervars_TS.R
+
+HRgroups <- read.csv(paste0(f2, "HRgroup.csv"), na.strings = nas) %>%
+  rename(gupi=subjectId) %>% mutate(HR=ifelse(is.na(Brainmonitoring.HDF5URL), 0,1)) %>%
+  select(-Brainmonitoring.HDF5URL)
+
+CV <- left_join(CV, HRgroups, by="gupi")
+
+CV.temp <- CV
+CV.alldays.temp <- CV.alldays
+
+
+# Not really clear how many clusters are optimal. Test IMPACT model to decide:
+
+# IMPACT scores: ----------------------------
+IMPACT.vars <- CV %>% filter(Timepoint<=1) %>%
+  select(gupi, Timepoint, Age, GCSMotor, PupilResponse, Hypoxia, Hypotension, RotterdamCTScore, tSAH, EDH, GlucoseMean, HbMean)
+IMPACT.vars <- IMPACT.vars %>% group_by(gupi) %>% mutate(RotterdamCTScore=max(RotterdamCTScore, na.rm = TRUE),
+                                                         tSAH=max(tSAH, na.rm=TRUE),
+                                                         EDH=max(EDH, na.rm=TRUE),
+                                                         GlucoseMean=max(GlucoseMean, na.rm=TRUE),
+                                                         HbMean=max(HbMean, na.rm = TRUE))
+IMPACT.vars <- IMPACT.vars %>% filter(Timepoint==0) %>% select(-Timepoint)
+IMPACT.vars[IMPACT.vars==-Inf] <- NA
+
+
+# Imputation of missing IMPACT vars: ----------------
+# https://datascienceplus.com/handling-missing-data-with-mice-package-a-simple-approach/
+y <- IMPACT.vars
+y <- y %>% mutate(PupilResponse.Arr=as.factor(PupilResponse))
+str(y)  
+
+y <- mice(y, m=5, seed=1234) #Method is default, pmm (predictive mean matching)
+y <- complete(y)
+
+
+IMPACT.fcn <- function(x)
+{
+  x <- x %>% mutate(agescore = ifelse(between(Age, 30, 39),1,
+                                      ifelse(between(Age, 40,49),2,
+                                             ifelse(between(Age, 50, 59),3,
+                                                    ifelse(between(Age, 60,69),4,
+                                                           ifelse(Age>=70,5,0))))),
+                    motorscore = ifelse(GCSMotor <=2,6,
+                                        ifelse(GCSMotor == 3,4,
+                                               ifelse(GCSMotor == 4, 2,
+                                                      ifelse(GCSMotor >= 5, 0, 3)))),
+                    pupilscore = ifelse(PupilResponse==0,0,
+                                        ifelse(PupilResponse==1,2,
+                                               ifelse(PupilResponse==2,4,NA))),
+                    hypoxiascore = ifelse(Hypoxia==1,1,0),
+                    hypotensionscore = ifelse(Hypotension==1,2,0),
+                    ctclasscore = ifelse(RotterdamCTScore == 1, -2,
+                                         ifelse(RotterdamCTScore==2,0,
+                                                ifelse(RotterdamCTScore %in% c(3,4), 2,
+                                                       ifelse(RotterdamCTScore >= 5,2,NA)))),
+                    SAHscore = ifelse(tSAH==1,2,0),
+                    EDHscore = ifelse(EDH==1,-2,0),
+                    glucosescore = ifelse(between(GlucoseMean, 6,8.9),1,
+                                          ifelse(between(GlucoseMean, 9,11.9), 2,
+                                                 ifelse(between(GlucoseMean, 12,14.9), 3,
+                                                        ifelse(GlucoseMean >= 15, 4, 0)))),
+                    hbscore = ifelse(HbMean < 9, 3,
+                                     ifelse(between(HbMean, 9, 11.9), 2,
+                                            ifelse(between(HbMean, 12, 14.9), 1, 0))))
+  
+  
+  x <- x %>% mutate(corescore=agescore + motorscore + pupilscore,
+                    ctsubscore=hypoxiascore + hypotensionscore + ctclasscore + SAHscore + EDHscore,
+                    labsubscore= glucosescore + hbscore)
+  
+  x <- x %>% mutate(IMPACT.tot.UNFAV= -2.82+0.257*(corescore + ctsubscore + labsubscore),
+                    IMPACT.tot.MORT = -3.42+0.216*(corescore + ctsubscore + labsubscore),
+                    IMPACT.core.UNFAV = -1.62 + 0.299*(corescore),
+                    IMPACT.core.MORT = -2.55 + 0.275*(corescore),
+                    IMPACT.ext.UNFAV = -2.10 + 0.276*(corescore + ctsubscore),
+                    IMPACT.ext.MORT = -2.98 + 0.256*(corescore + ctsubscore))
+  x <- x %>% mutate(IMPACT.tot.UNFAV = 1/(1+exp(-IMPACT.tot.UNFAV)),
+                    IMPACT.tot.MORT = 1/(1+exp(-IMPACT.tot.MORT)),
+                    IMPACT.core.UNFAV = 1/(1+exp(-IMPACT.core.UNFAV)),
+                    IMPACT.core.MORT = 1/(1+exp(-IMPACT.core.MORT)),
+                    IMPACT.ext.UNFAV = 1/(1+exp(-IMPACT.ext.UNFAV)),
+                    IMPACT.ext.MORT = 1/(1+exp(-IMPACT.ext.MORT)))
+  x <- x %>% mutate(IMPACT.UNFAV = ifelse(IMPACT.core.UNFAV > 0.5, 1, 0),
+                    IMPACT.MORT = ifelse(IMPACT.core.MORT > 0.5, 1, 0))
+  
+  return(x)
+}
+
+y <- IMPACT.fcn(y)
+colnames(y) <- gsub("IMPACT","IMPACT.imp",colnames(y))
+y <- y %>% rename(Hb=HbMean, Glucose=GlucoseMean)
+
+z <- left_join(y, CV %>% filter(Timepoint==0) %>% select(gupi, GOSE6mo, GOSE6mo.num), by="gupi")
+
+BL.best.df <- left_join(Belong.P %>% select(gupi, clust), BL.best[[2]] %>% select(gupi, clustnumber), by="gupi") %>%
+  mutate(clustnumber=as.factor(clustnumber)) %>% rename(mod2=clustnumber)
+
+for(i in 3:length(BL.best))
+{
+  BL.best.df <- left_join(BL.best.df, BL.best[[i]] %>% select(gupi, clustnumber), by="gupi") %>%
+    mutate(clustnumber=as.factor(clustnumber))
+  colnames(BL.best.df)[ncol(BL.best.df)] <- paste0("mod",i)
+}
+
+# write.csv(BL.best.df, file=paste0(f5,"BL.best.df.csv"), row.names = FALSE)
+z <- left_join(z, BL.best.df, by="gupi") %>% mutate(MORT=ifelse(GOSE6mo==1,1,0),
+                                                    UNFAV=ifelse(GOSE6mo<5,1,0))
+
+# write.csv(z %>% select(gupi, MORT, UNFAV, contains("IMPACT")), file=paste0(f5,"IMPACT.pred.csv"), row.names=FALSE)
+
+# Logistic regression, proportional odds model, mortality, unfav estimates:
+colnames(CV)
+
+# Create table of R2:-----------------
+LRMpred<-data.frame(
+                    N=NA,
+                    nclusters=c(1:12),
+                    R2M=NA, R2U=NA,
+                    R2coreM=lrm(MORT~Age+PupilResponse+GCSMotor, data=z)$stats[10],
+                    R2ctM=lrm(MORT~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+
+                                EDH,data=z)$stats[10],
+                    R2labM=lrm(MORT~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+
+                                 EDH+Glucose+Hb,data=z)$stats[10],
+                    R2coreMext=NA,
+                    R2ctMext=NA,
+                    R2labMext=NA,
+                    R2coreU=lrm(UNFAV~Age+PupilResponse+GCSMotor, data=z)$stats[10],
+                    R2ctU=lrm(UNFAV~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+
+                                EDH,data=z)$stats[10],
+                    R2labU=lrm(UNFAV~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+
+                                 EDH+Glucose+Hb,data=z)$stats[10],
+                    R2coreUext=NA,
+                    R2ctUext=NA,
+                    R2labUext=NA)
+
+# Bootstrapping of IMPACT predictions:------------
+LRMpred1 <- data.frame()
+
+for(i in 1:1000)
+  {
+    bootstrapgupis <- data.frame(gupi=sample(z$gupi, size=nrow(z), replace=TRUE))
+    z1 <- left_join(bootstrapgupis, z, by="gupi")
+    
+    temp <- LRMpred %>% mutate(N=i)
+    temp$R2M[2]<- as.numeric(lrm(as.factor(MORT)~clust, data=z1)$stats[10]) #6 clusters
+    temp$R2U[2]<- as.numeric(lrm(as.factor(UNFAV)~clust, data=z1)$stats[10])
+    temp$R2M[6]<- as.numeric(lrm(as.factor(MORT)~mod6, data=z1)$stats[10]) #6 clusters
+    temp$R2U[6]<- as.numeric(lrm(as.factor(UNFAV)~mod6, data=z1)$stats[10])
+    temp$R2M[3]<- as.numeric(lrm(as.factor(MORT)~mod3, data=z1)$stats[10]) #3 clusters
+    temp$R2U[3]<- as.numeric(lrm(as.factor(UNFAV)~mod3, data=z1)$stats[10])
+    temp$R2M[4]<- as.numeric(lrm(as.factor(MORT)~mod4, data=z1)$stats[10]) #4 clusters
+    temp$R2U[4]<- as.numeric(lrm(as.factor(UNFAV)~mod4, data=z1)$stats[10])
+    temp$R2M[5]<- as.numeric(lrm(as.factor(MORT)~mod5, data=z1)$stats[10]) #5 clusters
+    temp$R2U[5]<- as.numeric(lrm(as.factor(UNFAV)~mod5, data=z1)$stats[10])
+    temp$R2M[7]<- as.numeric(lrm(as.factor(MORT)~mod7, data=z1)$stats[10]) #7 clusters
+    temp$R2U[7]<- as.numeric(lrm(as.factor(UNFAV)~mod7, data=z1)$stats[10])
+    temp$R2M[8]<- as.numeric(lrm(as.factor(MORT)~mod8, data=z1)$stats[10]) #8 clusters
+    temp$R2U[8]<- as.numeric(lrm(as.factor(UNFAV)~mod8, data=z1)$stats[10])
+    temp$R2M[9]<- as.numeric(lrm(as.factor(MORT)~mod9, data=z1)$stats[10]) #9 clusters
+    temp$R2U[9]<- as.numeric(lrm(as.factor(UNFAV)~mod9, data=z1)$stats[10])
+    temp$R2M[10]<- as.numeric(lrm(as.factor(MORT)~mod10, data=z1)$stats[10]) #10 clusters
+    temp$R2U[10]<- as.numeric(lrm(as.factor(UNFAV)~mod10, data=z1)$stats[10])
+    temp$R2M[11]<- as.numeric(lrm(as.factor(MORT)~mod11, data=z1)$stats[10]) #11 clusters
+    temp$R2U[11]<- as.numeric(lrm(as.factor(UNFAV)~mod11, data=z1)$stats[10])
+    temp$R2M[12]<- as.numeric(lrm(as.factor(MORT)~mod12, data=z1)$stats[10]) #8 clusters
+    temp$R2U[12]<- as.numeric(lrm(as.factor(UNFAV)~mod12, data=z1)$stats[10])
+    
+    #Improvement of IMPACT core model:
+    temp$R2coreUext[2] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+clust, data=z1)$stats[10]
+    temp$R2coreUext[3] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+mod3, data=z1)$stats[10]
+    temp$R2coreUext[4] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+mod4, data=z1)$stats[10]
+    temp$R2coreUext[5] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+mod5, data=z1)$stats[10]
+    temp$R2coreUext[6] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+mod6, data=z1)$stats[10]
+    temp$R2coreUext[7] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+mod7, data=z1)$stats[10]
+    temp$R2coreUext[8] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+mod8, data=z1)$stats[10]
+    temp$R2coreUext[9] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+mod9, data=z1)$stats[10]
+    temp$R2coreUext[10] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+mod10, data=z1)$stats[10]
+    temp$R2coreUext[11] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+mod11, data=z1)$stats[10]
+    temp$R2coreUext[12] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+mod12, data=z1)$stats[10]
+    temp$R2coreMext[2] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+clust, data=z1)$stats[10]
+    temp$R2coreMext[3] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+mod3, data=z1)$stats[10]
+    temp$R2coreMext[4] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+mod4, data=z1)$stats[10]
+    temp$R2coreMext[5] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+mod5, data=z1)$stats[10]
+    temp$R2coreMext[6] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+mod6, data=z1)$stats[10]
+    temp$R2coreMext[7] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+mod7, data=z1)$stats[10]
+    temp$R2coreMext[8] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+mod8, data=z1)$stats[10]
+    temp$R2coreMext[9] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+mod9, data=z1)$stats[10]
+    temp$R2coreMext[10] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+mod10, data=z1)$stats[10]
+    temp$R2coreMext[11] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+mod11, data=z1)$stats[10]
+    temp$R2coreMext[12] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+mod12, data=z1)$stats[10]
+    
+    # Improvement of IMPACT CT model:
+    temp$R2ctUext[2] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+clust, data=z1)$stats[10]
+    temp$R2ctUext[3] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod3, data=z1)$stats[10]
+    temp$R2ctUext[4] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod4, data=z1)$stats[10]
+    temp$R2ctUext[5] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod5, data=z1)$stats[10]
+    temp$R2ctUext[6] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod6, data=z1)$stats[10]
+    temp$R2ctUext[7] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod7, data=z1)$stats[10]
+    temp$R2ctUext[8] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod8, data=z1)$stats[10]
+    temp$R2ctUext[9] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod9, data=z1)$stats[10]
+    temp$R2ctUext[10] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod10, data=z1)$stats[10]
+    temp$R2ctUext[11] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod11, data=z1)$stats[10]
+    temp$R2ctUext[12] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod12, data=z1)$stats[10]
+    
+    temp$R2ctMext[2] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+clust, data=z1)$stats[10]
+    temp$R2ctMext[3] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod3, data=z1)$stats[10]
+    temp$R2ctMext[4] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod4, data=z1)$stats[10]
+    temp$R2ctMext[5] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod5, data=z1)$stats[10]
+    temp$R2ctMext[6] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod6, data=z1)$stats[10]
+    temp$R2ctMext[7] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod7, data=z1)$stats[10]
+    temp$R2ctMext[8] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod8, data=z1)$stats[10]
+    temp$R2ctMext[9] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod9, data=z1)$stats[10]
+    temp$R2ctMext[10] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod10, data=z1)$stats[10]
+    temp$R2ctMext[11] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod11, data=z1)$stats[10]
+    temp$R2ctMext[12] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod12, data=z1)$stats[10]
+    
+    #Improvement of IMPACT lab model:
+    #2 is old best clustering (6 clusters, clust):
+    temp$R2labUext[2] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+clust, data=z1)$stats[10]
+    temp$R2labUext[3] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod3, data=z1)$stats[10]
+    temp$R2labUext[4] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod4, data=z1)$stats[10]
+    temp$R2labUext[5] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod5, data=z1)$stats[10]
+    temp$R2labUext[6] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod6, data=z1)$stats[10]
+    temp$R2labUext[7] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod7, data=z1)$stats[10]
+    temp$R2labUext[8] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod8, data=z1)$stats[10]
+    temp$R2labUext[9] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod9, data=z1)$stats[10]
+    temp$R2labUext[10] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod10, data=z1)$stats[10]
+    temp$R2labUext[11] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod11, data=z1)$stats[10]
+    temp$R2labUext[12] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod12, data=z1)$stats[10]
+    temp$R2labMext[2] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+clust, data=z1)$stats[10]
+    temp$R2labMext[3] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod3, data=z1)$stats[10]
+    temp$R2labMext[4] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod4, data=z1)$stats[10]
+    temp$R2labMext[5] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod5, data=z1)$stats[10]
+    temp$R2labMext[6] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod6, data=z1)$stats[10]
+    temp$R2labMext[7] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod7, data=z1)$stats[10]
+    temp$R2labMext[8] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod8, data=z1)$stats[10]
+    temp$R2labMext[9] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod9, data=z1)$stats[10]
+    temp$R2labMext[10] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod10, data=z1)$stats[10]
+    temp$R2labMext[11] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod11, data=z1)$stats[10]
+    temp$R2labMext[12] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod12, data=z1)$stats[10]
+
+    print(i)
+    LRMpred1 <- rbind(LRMpred1, temp)
+    
+}
+
+# Validate-test:--------------------------------------------------------
+# Make it parallel, as it is quite slow:
+
+LRMpred.val2 <- data.frame()
+
+library(parallel)
+library(foreach)
+library(doParallel)
+cl <- makeCluster(detectCores()-1)
+registerDoParallel(cl)
+
+parTest <- function(z, LRMpred)
+{
+x<-foreach(i=1:1000) %dopar% {
+  library(dplyr)
+  library(rms)
+  bootstrapgupis <- data.frame(gupi=sample(z$gupi, size=nrow(z), replace=TRUE))
+  z1 <- left_join(bootstrapgupis, z, by="gupi")
+  
+  temp <- LRMpred %>% mutate(N=i)
+  
+  temp$R2M[2]<- as.numeric(validate(lrm(as.factor(MORT)~clust, data=z1, x=TRUE, y=TRUE), B=200)[2,5]) #6 clusters
+  temp$R2U[2]<- as.numeric(validate(lrm(as.factor(UNFAV)~clust, data=z1, x=TRUE, y=TRUE), B=200)[2,5])
+  temp$R2M[6]<- as.numeric(validate(lrm(as.factor(MORT)~mod6, data=z1, x=TRUE, y=TRUE), B=200)[2,5]) #6 clusters
+  temp$R2U[6]<- as.numeric(validate(lrm(as.factor(UNFAV)~mod6, data=z1, x=TRUE, y=TRUE), B=200)[2,5])
+  temp$R2M[3]<- as.numeric(validate(lrm(as.factor(MORT)~mod3, data=z1, x=TRUE, y=TRUE), B=200)[2,5]) #3 clusters
+  temp$R2U[3]<- as.numeric(validate(lrm(as.factor(UNFAV)~mod3, data=z1, x=TRUE, y=TRUE), B=200)[2,5])
+  temp$R2M[4]<- as.numeric(validate(lrm(as.factor(MORT)~mod4, data=z1, x=TRUE, y=TRUE), B=200)[2,5]) #4 clusters
+  temp$R2U[4]<- as.numeric(validate(lrm(as.factor(UNFAV)~mod4, data=z1, x=TRUE, y=TRUE), B=200)[2,5])
+  temp$R2M[5]<- as.numeric(validate(lrm(as.factor(MORT)~mod5, data=z1, x=TRUE, y=TRUE), B=200)[2,5]) #5 clusters
+  temp$R2U[5]<- as.numeric(validate(lrm(as.factor(UNFAV)~mod5, data=z1, x=TRUE, y=TRUE), B=200)[2,5])
+  temp$R2M[7]<- as.numeric(validate(lrm(as.factor(MORT)~mod7, data=z1, x=TRUE, y=TRUE), B=200)[2,5]) #7 clusters
+  temp$R2U[7]<- as.numeric(validate(lrm(as.factor(UNFAV)~mod7, data=z1, x=TRUE, y=TRUE), B=200)[2,5])
+  temp$R2M[8]<- as.numeric(validate(lrm(as.factor(MORT)~mod8, data=z1, x=TRUE, y=TRUE), B=200)[2,5]) #8 clusters
+  temp$R2U[8]<- as.numeric(validate(lrm(as.factor(UNFAV)~mod8, data=z1, x=TRUE, y=TRUE), B=200)[2,5])
+  temp$R2M[9]<- as.numeric(validate(lrm(as.factor(MORT)~mod9, data=z1, x=TRUE, y=TRUE), B=200)[2,5]) #9 clusters
+  temp$R2U[9]<- as.numeric(validate(lrm(as.factor(UNFAV)~mod9, data=z1, x=TRUE, y=TRUE), B=200)[2,5])
+  temp$R2M[10]<- as.numeric(validate(lrm(as.factor(MORT)~mod10, data=z1, x=TRUE, y=TRUE), B=200)[2,5]) #10 clusters
+  temp$R2U[10]<- as.numeric(validate(lrm(as.factor(UNFAV)~mod10, data=z1, x=TRUE, y=TRUE), B=200)[2,5])
+  temp$R2M[11]<- as.numeric(validate(lrm(as.factor(MORT)~mod11, data=z1, x=TRUE, y=TRUE), B=200)[2,5]) #11 clusters
+  temp$R2U[11]<- as.numeric(validate(lrm(as.factor(UNFAV)~mod11, data=z1, x=TRUE, y=TRUE), B=200)[2,5])
+  temp$R2M[12]<- as.numeric(validate(lrm(as.factor(MORT)~mod12, data=z1, x=TRUE, y=TRUE), B=200)[2,5]) #8 clusters
+  temp$R2U[12]<- as.numeric(validate(lrm(as.factor(UNFAV)~mod12, data=z1, x=TRUE, y=TRUE), B=200)[2,5])
+
+  #Improvement of IMPACT core model:
+  temp$R2coreUext[2] <- validate(lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+clust, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2coreUext[3] <- validate(lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+mod3, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2coreUext[4] <- validate(lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+mod4, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2coreUext[5] <- validate(lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+mod5, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2coreUext[6] <- validate(lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+mod6, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2coreUext[7] <- validate(lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+mod7, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2coreUext[8] <- validate(lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+mod8, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2coreUext[9] <- validate(lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+mod9, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2coreUext[10] <- validate(lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+mod10, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2coreUext[11] <- validate(lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+mod11, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2coreUext[12] <- validate(lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+mod12, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2coreMext[2] <- validate(lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+clust, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2coreMext[3] <- validate(lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+mod3, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2coreMext[4] <- validate(lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+mod4, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2coreMext[5] <- validate(lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+mod5, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2coreMext[6] <- validate(lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+mod6, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2coreMext[7] <- validate(lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+mod7, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2coreMext[8] <- validate(lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+mod8, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2coreMext[9] <- validate(lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+mod9, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2coreMext[10] <- validate(lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+mod10, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2coreMext[11] <- validate(lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+mod11, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2coreMext[12] <- validate(lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+mod12, data=z1, x=TRUE, y=TRUE))[2,5]
+  
+  # Improvement of IMPACT CT model:
+  temp$R2ctUext[2] <- validate(lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+clust, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2ctUext[3] <- validate(lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod3, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2ctUext[4] <- validate(lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod4, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2ctUext[5] <- validate(lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod5, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2ctUext[6] <- validate(lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod6, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2ctUext[7] <- validate(lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod7, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2ctUext[8] <- validate(lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod8, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2ctUext[9] <- validate(lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod9, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2ctUext[10] <- validate(lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod10, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2ctUext[11] <- validate(lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod11, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2ctUext[12] <- validate(lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod12, data=z1, x=TRUE, y=TRUE))[2,5]
+  
+  temp$R2ctMext[2] <- validate(lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+clust, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2ctMext[3] <- validate(lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod3, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2ctMext[4] <- validate(lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod4, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2ctMext[5] <- validate(lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod5, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2ctMext[6] <- validate(lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod6, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2ctMext[7] <- validate(lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod7, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2ctMext[8] <- validate(lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod8, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2ctMext[9] <- validate(lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod9, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2ctMext[10] <- validate(lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod10, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2ctMext[11] <- validate(lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod11, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2ctMext[12] <- validate(lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod12, data=z1, x=TRUE, y=TRUE))[2,5]
+  
+  #Improvement of IMPACT lab model:
+  #2 is old best clustering (6 clusters, clust):
+  temp$R2labUext[2] <- validate(lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+clust, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2labUext[3] <- validate(lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod3, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2labUext[4] <- validate(lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod4, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2labUext[5] <- validate(lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod5, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2labUext[6] <- validate(lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod6, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2labUext[7] <- validate(lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod7, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2labUext[8] <- validate(lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod8, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2labUext[9] <- validate(lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod9, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2labUext[10] <- validate(lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod10, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2labUext[11] <- validate(lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod11, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2labUext[12] <- validate(lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod12, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2labMext[2] <- validate(lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+clust, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2labMext[3] <- validate(lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod3, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2labMext[4] <- validate(lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod4, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2labMext[5] <- validate(lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod5, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2labMext[6] <- validate(lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod6, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2labMext[7] <- validate(lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod7, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2labMext[8] <- validate(lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod8, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2labMext[9] <- validate(lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod9, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2labMext[10] <- validate(lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod10, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2labMext[11] <- validate(lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod11, data=z1, x=TRUE, y=TRUE))[2,5]
+  temp$R2labMext[12] <- validate(lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod12, data=z1, x=TRUE, y=TRUE))[2,5]
+  
+  #print(i)
+  #LRMpred.val2 <- rbind(LRMpred.val2, temp)
+  temp[,drop=FALSE]
+}
+do.call('rbind', x)
+}
+
+LRMpred.val2 <- parTest(z, LRMpred)  
+stopCluster(cl)
+# write.csv(LRMpred.val2, paste0(f5,"LRMpred.val2.bootstrapped.csv"), row.names=FALSE)
+
+LRMpred.bootstrapped <- LRMpred.val2 %>% group_by(nclusters) %>% summarise_all(list(MEAN=mean, SD=sd), na.rm=TRUE)
+temp <- LRMpred.bootstrapped %>% pivot_longer(R2M_MEAN:R2labUext_SD, names_to="Feature", values_to="Value")
+temp <- temp %>% mutate(x=gsub(".*_","", Feature))
+temp <- temp %>% mutate(Feature=gsub("_.*", "", Feature))
+temp <- temp %>% pivot_wider(names_from = x, values_from = Value)
+temp <- temp %>% mutate(MEANSD=paste0(round(MEAN,2), " (",round(SD,2),")"))
+temp <- temp %>% select(-N_MEAN, -MEAN, -SD) %>% pivot_wider(names_from = Feature, values_from = MEANSD)
+
+#--------------------------------------------------
+
+
+
+LRMpred.bootstrapped <- LRMpred1 %>% group_by(nclusters) %>% summarise_all(list(MEAN=mean, SD=sd), na.rm=TRUE)
+temp <- LRMpred.bootstrapped %>% pivot_longer(R2M_MEAN:R2labUext_SD, names_to="Feature", values_to="Value")
+temp <- temp %>% mutate(x=gsub(".*_","", Feature))
+temp <- temp %>% mutate(Feature=gsub("_.*", "", Feature))
+temp <- temp %>% pivot_wider(names_from = x, values_from = Value)
+temp <- temp %>% mutate(MEANSD=paste0(round(MEAN,2), " (",round(SD,2),")"))
+temp <- temp %>% select(-N_MEAN, -bootstrap_SD, -MEAN, -SD) %>% pivot_wider(names_from = Feature, values_from = MEANSD)
+
+
+
+
+
+
+# 2 clusters = old best clustering (6 clusters, cl):
+LRMpred<-data.frame(nclusters=c(1:12),
+                    R2M=NA, R2U=NA,
+                    R2coreM=lrm(MORT~Age+PupilResponse+GCSMotor, data=z)$stats[10],
+                    R2ctM=lrm(MORT~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+
+                                EDH,data=z)$stats[10],
+                    R2labM=lrm(MORT~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+
+                                 EDH+Glucose+Hb,data=z)$stats[10],
+                    R2coreMext=NA,
+                    R2ctMext=NA,
+                    R2labMext=NA,
+                    R2coreU=lrm(UNFAV~Age+PupilResponse+GCSMotor, data=z)$stats[10],
+                    R2ctU=lrm(UNFAV~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+
+                                EDH,data=z)$stats[10],
+                    R2labU=lrm(UNFAV~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+
+                                 EDH+Glucose+Hb,data=z)$stats[10],
+                    R2coreUext=NA,
+                    R2ctUext=NA,
+                    R2labUext=NA)
+
+LRMpred$R2M[2]<- as.numeric(lrm(as.factor(MORT)~clust, data=z)$stats[10]) #6 clusters
+LRMpred$R2U[2]<- as.numeric(lrm(as.factor(UNFAV)~clust, data=z)$stats[10])
+LRMpred$R2M[6]<- as.numeric(lrm(as.factor(MORT)~mod6, data=z)$stats[10]) #6 clusters
+LRMpred$R2U[6]<- as.numeric(lrm(as.factor(UNFAV)~mod6, data=z)$stats[10])
+LRMpred$R2M[3]<- as.numeric(lrm(as.factor(MORT)~mod3, data=z)$stats[10]) #3 clusters
+LRMpred$R2U[3]<- as.numeric(lrm(as.factor(UNFAV)~mod3, data=z)$stats[10])
+LRMpred$R2M[4]<- as.numeric(lrm(as.factor(MORT)~mod4, data=z)$stats[10]) #4 clusters
+LRMpred$R2U[4]<- as.numeric(lrm(as.factor(UNFAV)~mod4, data=z)$stats[10])
+LRMpred$R2M[5]<- as.numeric(lrm(as.factor(MORT)~mod5, data=z)$stats[10]) #5 clusters
+LRMpred$R2U[5]<- as.numeric(lrm(as.factor(UNFAV)~mod5, data=z)$stats[10])
+LRMpred$R2M[7]<- as.numeric(lrm(as.factor(MORT)~mod7, data=z)$stats[10]) #7 clusters
+LRMpred$R2U[7]<- as.numeric(lrm(as.factor(UNFAV)~mod7, data=z)$stats[10])
+LRMpred$R2M[8]<- as.numeric(lrm(as.factor(MORT)~mod8, data=z)$stats[10]) #8 clusters
+LRMpred$R2U[8]<- as.numeric(lrm(as.factor(UNFAV)~mod8, data=z)$stats[10])
+LRMpred$R2M[9]<- as.numeric(lrm(as.factor(MORT)~mod9, data=z)$stats[10]) #9 clusters
+LRMpred$R2U[9]<- as.numeric(lrm(as.factor(UNFAV)~mod9, data=z)$stats[10])
+LRMpred$R2M[10]<- as.numeric(lrm(as.factor(MORT)~mod10, data=z)$stats[10]) #10 clusters
+LRMpred$R2U[10]<- as.numeric(lrm(as.factor(UNFAV)~mod10, data=z)$stats[10])
+LRMpred$R2M[11]<- as.numeric(lrm(as.factor(MORT)~mod11, data=z)$stats[10]) #11 clusters
+LRMpred$R2U[11]<- as.numeric(lrm(as.factor(UNFAV)~mod11, data=z)$stats[10])
+LRMpred$R2M[12]<- as.numeric(lrm(as.factor(MORT)~mod12, data=z)$stats[10]) #8 clusters
+LRMpred$R2U[12]<- as.numeric(lrm(as.factor(UNFAV)~mod12, data=z)$stats[10])
+
+#Improvement of IMPACT core model:
+LRMpred$R2coreUext[2] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+clust, data=z)$stats[10]
+LRMpred$R2coreUext[3] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+mod3, data=z)$stats[10]
+LRMpred$R2coreUext[4] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+mod4, data=z)$stats[10]
+LRMpred$R2coreUext[5] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+mod5, data=z)$stats[10]
+LRMpred$R2coreUext[6] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+mod6, data=z)$stats[10]
+LRMpred$R2coreUext[7] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+mod7, data=z)$stats[10]
+LRMpred$R2coreUext[8] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+mod8, data=z)$stats[10]
+LRMpred$R2coreUext[9] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+mod9, data=z)$stats[10]
+LRMpred$R2coreUext[10] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+mod10, data=z)$stats[10]
+LRMpred$R2coreUext[11] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+mod11, data=z)$stats[10]
+LRMpred$R2coreUext[12] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+mod12, data=z)$stats[10]
+LRMpred$R2coreMext[2] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+clust, data=z)$stats[10]
+LRMpred$R2coreMext[3] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+mod3, data=z)$stats[10]
+LRMpred$R2coreMext[4] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+mod4, data=z)$stats[10]
+LRMpred$R2coreMext[5] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+mod5, data=z)$stats[10]
+LRMpred$R2coreMext[6] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+mod6, data=z)$stats[10]
+LRMpred$R2coreMext[7] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+mod7, data=z)$stats[10]
+LRMpred$R2coreMext[8] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+mod8, data=z)$stats[10]
+LRMpred$R2coreMext[9] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+mod9, data=z)$stats[10]
+LRMpred$R2coreMext[10] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+mod10, data=z)$stats[10]
+LRMpred$R2coreMext[11] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+mod11, data=z)$stats[10]
+LRMpred$R2coreMext[12] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+mod12, data=z)$stats[10]
+
+# Improvement of IMPACT CT model:
+LRMpred$R2ctUext[2] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+clust, data=z)$stats[10]
+LRMpred$R2ctUext[3] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod3, data=z)$stats[10]
+LRMpred$R2ctUext[4] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod4, data=z)$stats[10]
+LRMpred$R2ctUext[5] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod5, data=z)$stats[10]
+LRMpred$R2ctUext[6] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod6, data=z)$stats[10]
+LRMpred$R2ctUext[7] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod7, data=z)$stats[10]
+LRMpred$R2ctUext[8] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod8, data=z)$stats[10]
+LRMpred$R2ctUext[9] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod9, data=z)$stats[10]
+LRMpred$R2ctUext[10] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod10, data=z)$stats[10]
+LRMpred$R2ctUext[11] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod11, data=z)$stats[10]
+LRMpred$R2ctUext[12] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod12, data=z)$stats[10]
+
+LRMpred$R2ctMext[2] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+clust, data=z)$stats[10]
+LRMpred$R2ctMext[3] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod3, data=z)$stats[10]
+LRMpred$R2ctMext[4] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod4, data=z)$stats[10]
+LRMpred$R2ctMext[5] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod5, data=z)$stats[10]
+LRMpred$R2ctMext[6] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod6, data=z)$stats[10]
+LRMpred$R2ctMext[7] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod7, data=z)$stats[10]
+LRMpred$R2ctMext[8] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod8, data=z)$stats[10]
+LRMpred$R2ctMext[9] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod9, data=z)$stats[10]
+LRMpred$R2ctMext[10] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod10, data=z)$stats[10]
+LRMpred$R2ctMext[11] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod11, data=z)$stats[10]
+LRMpred$R2ctMext[12] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+mod12, data=z)$stats[10]
+
+#Improvement of IMPACT lab model:
+#2 is old best clustering (6 clusters, clust):
+LRMpred$R2labUext[2] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+clust, data=z)$stats[10]
+LRMpred$R2labUext[3] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod3, data=z)$stats[10]
+LRMpred$R2labUext[4] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod4, data=z)$stats[10]
+LRMpred$R2labUext[5] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod5, data=z)$stats[10]
+LRMpred$R2labUext[6] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod6, data=z)$stats[10]
+LRMpred$R2labUext[7] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod7, data=z)$stats[10]
+LRMpred$R2labUext[8] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod8, data=z)$stats[10]
+LRMpred$R2labUext[9] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod9, data=z)$stats[10]
+LRMpred$R2labUext[10] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod10, data=z)$stats[10]
+LRMpred$R2labUext[11] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod11, data=z)$stats[10]
+LRMpred$R2labUext[12] <- lrm(as.factor(UNFAV)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod12, data=z)$stats[10]
+LRMpred$R2labMext[2] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+clust, data=z)$stats[10]
+LRMpred$R2labMext[3] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod3, data=z)$stats[10]
+LRMpred$R2labMext[4] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod4, data=z)$stats[10]
+LRMpred$R2labMext[5] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod5, data=z)$stats[10]
+LRMpred$R2labMext[6] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod6, data=z)$stats[10]
+LRMpred$R2labMext[7] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod7, data=z)$stats[10]
+LRMpred$R2labMext[8] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod8, data=z)$stats[10]
+LRMpred$R2labMext[9] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod9, data=z)$stats[10]
+LRMpred$R2labMext[10] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod10, data=z)$stats[10]
+LRMpred$R2labMext[11] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod11, data=z)$stats[10]
+LRMpred$R2labMext[12] <- lrm(as.factor(MORT)~Age+PupilResponse+GCSMotor+Hypoxia+Hypotension+RotterdamCTScore+tSAH+Glucose+Hb+mod12, data=z)$stats[10]
+
+# write.csv(LRMpred, file=paste0(f5, "LRMpred.csv"), row.names=FALSE)
+
+# -------------------------------------------
+# 9 clusters seem best???
+# Continue in EM_TS_CENTER-TBI_incr.seed_summaries.R
\ No newline at end of file
--- a/EM_TS_CENTER-TBI_incr.seed_summaries.R
+++ b/EM_TS_CENTER-TBI_incr.seed_summaries.R
+# 25 models och each number of clusters, with seed from the best model of n-1 clusters.
+
+rm(list=ls())
+
+library(dplyr)
+library(tidyr)
+library(ggplot2)
+library(gtools)
+library(gridExtra)
+library(mice)
+library(ggalluvial)
+library(rms)
+library(greekLetters)
+
+
+nas <- c(NA, "", NaN)
+alfa <- 1 # Bayesian parameter
+nclusters <- c(2:12)
+f <- "/Users/ceciliamac/OneDrive - Karolinska Institutet/Clustering Time-series CENTER-TBI/Clustervars_TS/" #Moved all data to onedrive 2110027
+f2 <- "/Users/ceciliamac/OneDrive - Karolinska Institutet/Clustering CENTER-TBI/Neurobot data/Core3.0/" #Moved all data to onedrive 2110027
+
+f4 <- "/Users/ceciliamac/Dropbox/Cecilia/R_code/Cluster_CENTER-TBI/EM_timeseries/models.alldays.incr.seed/"
+f5 <- "/Users/ceciliamac/Dropbox/Cecilia/R_code/Cluster_CENTER-TBI/EM_timeseries/TEMPTABLES/"
+
+
+load(file=paste0(f4,"ML.RData"))
+highestLLs <- read.csv(file=paste0(f5, "highestLLs.csv"))
+load(file=paste0(f5,"BL.best.RData")) #all best models
+load(file=paste0(f5,"BL11.RData")) #Highest mean LL
+AlluvialBelong <- read.csv(file=paste0(f5, "AlluvialBelong.csv")) #"Chain" 11
+BL.best.df <- read.csv(file=paste0(f5,"BL.best.df.csv")) # Best models of each number of cluster
+LRMpred <- read.csv(file=paste0(f5, "LRMpred.csv"))
+
+CV <- read.csv(paste0(f,"TS_vars.csv"), na.strings = nas) #Created in Clustervars_TS.R
+CV.alldays <- read.csv(paste0(f,"TS_vars.alldays.csv")) #Created in Clustervars_TS.R
+HRgroups <- read.csv(paste0(f2, "HRgroup.csv"), na.strings = nas) %>%
+  rename(gupi=subjectId) %>% mutate(HR=ifelse(is.na(Brainmonitoring.HDF5URL), 0,1)) %>%
+  select(-Brainmonitoring.HDF5URL)
+CV <- left_join(CV, HRgroups, by="gupi")
+
+IMPACT.pred <- read.csv(file=paste0(f5,"IMPACT.pred.csv"))
+
+#---------------------------------------------
+CV.temp <- CV
+CV.alldays.temp <- CV.alldays
+
+# Before sorting for outcome: ----------------
+CV.temp <- left_join(CV, BL.best[[6]], by="gupi")
+CV.alldays.temp <- left_join(CV.alldays, BL.best[[6]], by="gupi")
+
+print(count(CV.alldays.temp,clustnumber))
+Npts <- data.frame(count(CV.alldays.temp, clustnumber))
+
+
+CV.temp <- CV.temp %>% mutate(GOSE6mo=ifelse(GOSE6mo=="2_or_3", "2, 3", GOSE6mo))
+dev.new()
+print(ggplot(CV.temp %>% filter(Timepoint==0), aes(x=clustnumber, fill=GOSE6mo)) +
+        #scale_fill_brewer(palette="RdYlGn") +
+        scale_fill_manual(values=c("#003f5c","#2f4b7c", "#665191", "#a05195", "#d45087", "#f95d6a", "#ff7c43", "#ffa600")) +
+        theme_minimal() + labs(fill="GOS-E 6 months")+
+        geom_bar(position="fill")) #Color set2
+
+# Change cluster names according to outcome: ---------------
+clust <- data.frame(clustnumber=c(1:6))
+
+# Letters:
+# clust <- clust %>% mutate(CLUST=ifelse(clustnumber==4, 'A(4)',
+#                                        ifelse(clustnumber==1,'B(1)',
+#                                               ifelse(clustnumber==6,'C(6)',
+#                                                      ifelse(clustnumber==5, 'D(5)',
+#                                                             ifelse(clustnumber==3,'E(3)',
+#                                                                    ifelse(clustnumber==2,'F(2)',NA)))))))
+
+#Greek letters:
+clust <- clust %>% mutate(CLUST=ifelse(clustnumber==4, greeks("alpha"),
+                                       ifelse(clustnumber==1, greeks("beta"),
+                                              ifelse(clustnumber==6, greeks("gamma"),
+                                                     ifelse(clustnumber==5, greeks("delta"),
+                                                            ifelse(clustnumber==3,greeks("epsilon"),
+                                                                   ifelse(clustnumber==2,greeks("zeta"),NA)))))))
+
+Belong6 <- left_join(BL.best[[6]], clust, by="clustnumber")
+Belong6 <- Belong6 %>% select(-clustnumber) %>% rename(clustnumber=CLUST) %>%
+  select(gupi, clustnumber, everything())
+
+
+CV <- left_join(CV, Belong6 %>% select(gupi, clustnumber), by="gupi")
+CV.alldays <- left_join(CV.alldays, Belong6 %>% select(gupi, clustnumber), by="gupi")
+
+# Clusters sorted by outcome, and with greek letters: -------
+count(CV.alldays,clustnumber)
+Npts <- data.frame(count(CV.alldays, clustnumber))
+
+# Outcome and patients per day:-----------
+# Fig 5:
+
+CV.temp <- CV %>% mutate(GOSE6mo=ifelse(GOSE6mo=="2_or_3", "2, 3", GOSE6mo))
+
+fig5a <- ggplot(CV.temp %>% filter(Timepoint==0), aes(x=clustnumber, fill=GOSE6mo)) +
+         geom_bar(position="fill") +      
+  #scale_fill_brewer(palette="RdYlGn") +
+        scale_fill_manual(values=c("#003f5c","#2f4b7c", "#665191", "#a05195", "#d45087", "#f95d6a", "#ff7c43", "#ffa600")) +
+        theme_minimal() + 
+        theme(panel.grid.major.x=element_blank()) +
+        labs(fill="GOS-E 6 months") + xlab("Trajectory") + ylab("Percentage") #Color set2
+
+# How many patients each day:
+s2 <- CV %>% select(gupi, Timepoint, clustnumber, Dead, Discharged)
+s2 <- s2 %>% group_by(gupi, Timepoint, clustnumber) %>% mutate(ICUStatus=ifelse(Dead==1, "Dead", 
+                                                                                ifelse(Discharged==1, "Discharged", "ICU")))
+s2 <- s2 %>% mutate(ICUStatus=factor(ICUStatus, levels=c("Dead","Discharged","ICU")))
+
+fig5b <- ggplot(s2, aes(x=Timepoint, fill=ICUStatus)) +
+        geom_bar() + facet_wrap(~clustnumber, scales="free_y", nrow=3) +
+        scale_fill_manual(values=c("#d45087","#ff7c43","#665191")) +
+        theme_minimal() + theme(panel.grid.major.x=element_blank()) +
+        labs(fill="ICU Status")
+
+dev.new()
+grid.arrange(fig5a, fig5b, ncol=2) #Make screenshot, then save (if saved as pdf from R, greek letters become ..)
+
+
+# Add outcome graphs for all number of clusters:
+View(BL.best.df)
+CV.temp <- left_join(CV,BL.best.df, by="gupi") %>% filter(Timepoint==0) %>% select(gupi, GOSE6mo, mod2:mod12)
+CV.temp <- CV.temp %>% pivot_longer(mod2:mod12, names_to = "model", values_to="clustnumber")
+CV.temp <- CV.temp %>% mutate(model=as.numeric(gsub("mod","", model)),
+                              GOSE6mo=factor(GOSE6mo, levels=c(1,"2_or_3", 4:8)))
+# Reorder cluster numbers to make nicer plots:
+temp <- CV.temp %>% group_by(model, clustnumber, GOSE6mo) %>% summarise(N=n())
+temp2 <- CV.temp %>% group_by(model, clustnumber) %>% summarise(clustsize=n())
+temp <- left_join(temp, temp2, by=c("model", "clustnumber"))
+temp <- temp %>% mutate(PERC=N/clustsize)
+temp <- temp %>% group_by(model) %>% arrange(GOSE6mo, PERC) %>% filter(GOSE6mo==1)
+temp <- temp %>% group_by(model) %>% mutate(CLUST=row_number()) %>% select(-GOSE6mo)
+ 
+#temp <- temp %>% select(model, clustnumber, CLUST)
+
+CV.temp <- left_join(CV.temp, temp, by=c("model", "clustnumber"))
+CV.temp <- CV.temp %>% mutate(CLUST=as.factor(CLUST))
+mortplots <- ggplot(CV.temp, aes(x=CLUST, fill=GOSE6mo)) +
+  geom_bar(position="fill") +      
+  #scale_fill_brewer(palette="RdYlGn") +
+  scale_fill_manual(values=c("#003f5c","#2f4b7c", "#665191", "#a05195", "#d45087", "#f95d6a", "#ff7c43", "#ffa600")) +
+  #scale_fill_manual(values=c("#003f5c","#2f4b7c", "#665191", "#a05195", "#d45087", "#f95d6a", "#ff7c43", "#ffa600", "#e8c309", "#e4e809", "#bfe809",  "#5be809", "309e82e")) +
+  theme_minimal() + 
+  theme(panel.grid.major.x=element_blank(), strip.text.x = element_text(face="bold"),
+        axis.text.x=element_text(size=7) )+
+  #scale_x_discrete(breaks=c(1:12)) +
+  labs(fill="GOS-E 6 months") + xlab("Trajectory") + ylab("Percentage") + #Color set2
+  facet_wrap(~model, scales="free_x")
+
+# SFig3:
+pdf("SFig3.pdf",
+    width=9, height=7)
+print(mortplots)
+dev.off()
+
+# Heatmap: --------------------------
+Firstdayvars <- colnames(CV.alldays)[!grepl("[0-9]", colnames(CV.alldays))]
+Firstdayvars <- c(Firstdayvars, "GOSE6mo.num")
+
+#temp2=all features, incl age, iss,.... temp3=Firstdayvars excluded:
+temp2 <- pivot_longer(CV %>% select(-GOSE6mo), cols = Age:HR, names_to = "Feature", values_to = "Value")
+temp2 <- temp2 %>% group_by(Feature) %>% mutate(sv=scale(Value))
+temp2 <- temp2 %>% group_by(Timepoint, Feature, clustnumber) %>% summarise(Value=mean(sv, na.rm=TRUE))
+temp3 <- temp2 %>% filter(!Feature %in% c(Firstdayvars, "ICPabove20", "ICPabove25", "ICPabove30", "ICPMonitorType", "CSFDrainageYes", "HR",
+                                          "ExtracranISSHighest", "HeadISS", "TotalISS", "MidlineShiftmm"))
+temp2 <- temp2 %>% mutate(Feature = factor(Feature, levels = c("Age", "Anticoags", "ASAClass", "BE", "BMI", "Lactate",
+                                                               "Sex", "ExtracranISSHighest", "HeadISS", "TotalISS",
+                                                               levels(temp3$Feature))))
+
+temp3 <- temp3 %>% mutate(Feature=ifelse(Feature=="GCSScore", "GCS Score", ifelse(Feature=="GCSEyes", "GCS Eye Score", ifelse(Feature=="GCSMotor", "GCS Motor Score",
+                                  ifelse(Feature=="GCSVerbal", "GCS Verbal Score", ifelse(Feature=="UCH.L1", "UCH-L1*", ifelse(Feature=="SodiumDiff", "∆ Sodium",
+                                  ifelse(Feature=="SodiumMean", "Sodium mean", ifelse(Feature=="GlucoseDiff", "∆ Glucose*", ifelse(Feature=="GlucoseMean", "Glucose mean",
+                                  ifelse(Feature=="HbMean", "Hemoglobin mean", ifelse(Feature=="CreatinineMean", "Creatinine*", ifelse(Feature=="PaO2Mean", "PaO2 mean",
+                                  ifelse(Feature=="PaO2Diff", "∆ PaO2", ifelse(Feature=="PaCO2Mean", "PaCO2 mean", ifelse(Feature=="PaCO2Diff", "∆ PaCO2",
+                                  ifelse(Feature=="pHDiff", "∆ pH", ifelse(Feature=="pHMean", "pH mean",
+                                  ifelse(Feature=="SpO2Diff", "∆ SpO2*", ifelse(Feature=="SpO2Mean", "SpO2 mean*", ifelse(Feature=="AlbuminMean", "Albumin",
+                                  ifelse(Feature=="PlateletMean", "Platelet count", ifelse(Feature=="HRMean", "Heart Rate mean", ifelse(Feature=="HRDiff", "∆ Heart Rate",
+                                  ifelse(Feature=="MAPDiff", "∆ MAP", ifelse(Feature=="MAPMean", "MAP mean", ifelse(Feature=="TempDiff", "∆ Temperature",
+                                  ifelse(Feature=="TempMean", "Temperature mean", ifelse(Feature=="ICPDiff", "∆ ICP", ifelse(Feature=="ICPMean", "ICP mean", Feature))))))))))))))))))))))))))))))
+temp3 <- temp3 %>% mutate(Feature=ifelse(Feature=="ICPMonitorYes", "ICP Monitoring", ifelse(Feature=="PupilResponse", "Pupil Reactivity", ifelse(Feature=="FisherClassification", "Fisher CT Classification",
+                                  ifelse(Feature=="RotterdamCTScore", "Rotterdam CT Score", ifelse(Feature=="MidlineShift", "Midline Shift",
+                                  ifelse(Feature=="TotalTIL", "Total TIL Score", ifelse(Feature=="ICPSurgery", "Surgery for ICP control", ifelse(Feature=="DecomprCran", "Decompressive craniectomy",
+                                  ifelse(Feature=="Vasopressor", "Vasopressors", ifelse(Feature=="FluidLoading", "Fluid Loading", ifelse(Feature=="PositionNursedFlat", "Nursed flat for ICP control",
+                                  ifelse(Feature=="Position", "Elevated head for ICP control", ifelse(Feature=="SedationTherapy", "Sedation Therapy",ifelse(Feature=="NeuromuscBlock", "Neuromuscular Block",
+                                  ifelse(Feature=="HyperventTherapy", "Hyperventilation Therapy",ifelse(Feature=="CSFDrainageVol", "CSF Drainage Volume",ifelse(Feature=="CSFDrainageTherapy", "CSF Drainage Therapy",
+                                  ifelse(Feature=="Saline", "Hypertonic Saline",ifelse(Feature=="Tempcontrol", "Temperature Control",
+                                  ifelse(Feature=="FluidBal", "Fluid Balance",ifelse(Feature=="Intub", "Intubation",
+                                  ifelse(Feature=="Tau", "Tau*", ifelse(Feature=="GFAP", "GFAP*", ifelse(Feature=="NFL", "NFL*", ifelse(Feature=="S100B", "S100B*",
+                                  ifelse(Feature=="NSE", "NSE*", Feature)))))))))))))))))))))))))))
+temp3 <- temp3 %>% mutate(Feature = factor(Feature, levels = c("Discharged","Dead","Intubation", "Fluid Balance","Temperature Control",
+                                                               "Hypertonic Saline","CSF Drainage Therapy","CSF Drainage Volume","Hyperventilation Therapy",
+                                                               "Neuromuscular Block","Mannitol", "Sedation Therapy", "Nursed flat for ICP control",
+                                                               "Elevated head for ICP control","Fluid Loading","Vasopressors", "Decompressive craniectomy",
+                                                               "Surgery for ICP control", "Total TIL Score", "aSDH", "Contusion","tSAH","EDH","TAI",
+                                                               "Midline Shift", "Rotterdam CT Score", "Fisher CT Classification",
+                                                               "Hypocapnia","Seizures","Hypotension","Hypoxia", "Pupil Reactivity","ICP Monitoring", 
+                                                               "ICP mean","∆ ICP","Temperature mean", "∆ Temperature","MAP mean","∆ MAP","Heart Rate mean",
+                                                               "∆ Heart Rate", "Platelet count","Albumin","SpO2 mean*", "∆ SpO2*","Hemoglobin mean",
+                                                               "PaCO2 mean","∆ PaCO2", "PaO2 mean","∆ PaO2","pH mean","∆ pH", "Creatinine*","Glucose mean",
+                                                               "∆ Glucose*","Sodium mean","∆ Sodium","Tau*","NSE*", "UCH-L1*","NFL*","GFAP*","S100B*",
+                                                               "GCS Verbal Score","GCS Motor Score","GCS Eye Score","GCS Score")))
+
+# Fig 4: Heatmap of all features across all clusters.
+dev.new()
+print(ggplot(temp3, aes(x=Timepoint, y=Feature, fill=Value)) + geom_tile(na.rm=TRUE)
+      + coord_equal() + facet_wrap(~clustnumber, nrow = 1) +
+        guides(fill = guide_colourbar(title='Normalized value', title.position="top", title.vjust=0.5, barwidth = 15, barheight = .75)) +
+        scale_fill_gradient2(na.value='dark grey',low='#003f5c',mid='#bc5090',high='#ffa600',midpoint=.0,limits = c(-2,2),breaks = c(-2,2), labels = c('Low','High')) +
+        theme_minimal() + theme(legend.position = "bottom", legend.title=element_text(face="bold", size=6.5),
+                                legend.text=element_text(size=6.5),
+                                legend.title.align = .5,
+                                axis.text.y = element_text(angle=20, size=6.5),
+                                axis.title=element_text(face="bold"),
+                                axis.title.y=element_text(margin=margin(r=-10)),
+                                strip.text=element_text(face="bold")) +
+                                xlab("Day"))
+
+# # Split into four different heatmaps:
+# heatmaps4 <- vector(mode="list", length=4)
+# j <- 1
+# i <- 1
+# for(j in c(1:4))
+# {
+#   heatmaps4[[j]] <- ggplot(temp3 %>% filter(Feature %in% unique(temp3$Feature)[i:(i+16)]), aes(x=Timepoint, y=Feature, fill=Value)) + geom_tile(na.rm=TRUE) +
+#   coord_equal() + facet_wrap(~clustnumber, nrow = 1) + #Change to 3x2?
+#     #scale_fill_gradient2(na.value='#488f31',low='#003f5c',mid='#eacaf4',high='#de425b',midpoint=.5,limits = c(-2,2),breaks = c(0.05,.95), labels = c('Low','High')) + #+ scale_fill_distiller(palette = "Reds", direction=1)) +
+#     scale_fill_gradient2(na.value='#488f31',low='#003f5c',mid='#bc5090',high='#ffa600',midpoint=.0,limits = c(-2,2),breaks = c(-2,2), labels = c('Low','High')) +
+#     theme_minimal() + theme(legend.position = "", axis.text.y = element_text(angle=20, size=6.5)) +
+#     xlab("Day")
+#   i <- i+17
+# }
+# dev.new()
+# grid.arrange(grobs = heatmaps4, nrow=2, ncol=2, list(top=NULL))
+
+
+# ---------------------
+
+CV <- left_join(CV, IMPACT.pred, by="gupi")
+CV.alldays <- left_join(CV.alldays, CV %>% filter(Timepoint==0) %>% select(gupi, contains("GOSE")), by="gupi")
+
+# Fig 2:
+# Model sequence 11:
+
+df2 <- pivot_longer(AlluvialBelong, cols=mod2:mod12, names_to="mod", values_to="ClustNumber")
+df2 <- df2 %>% mutate(mod=gsub("mod","",mod))
+
+df2$mod <- factor(df2$mod, levels=c("2", "3", "4","5", "6","7","8", "9", "10", "11", "12"))
+df2$ClustNumber <- as.factor(df2$ClustNumber)
+
+pdf("Fig2.pdf",
+    width=10, height=6)
+print(ggplot(df2, aes(x=mod, stratum=ClustNumber, alluvium=gupi, fill=ClustNumber, label=ClustNumber)) +
+        geom_stratum(alpha=0.5, fill="white",color="black") +
+        geom_flow() +
+        geom_text(stat="stratum", size=5) +
+        theme_minimal() +
+        theme(text = element_text(size = 14, face="bold"), legend.position="none", panel.grid=element_blank(),
+              axis.text.y=element_blank(), axis.ticks.y=element_blank(), axis.title.y=element_text(margin=margin(l=-2)),
+              axis.text.x=element_text(margin=margin(t=-2)), axis.title.x=element_text()) +
+        #scale_fill_gradient2(na.value='#488f31',low='#003f5c',mid='#bc5090',high='#ffa600',midpoint=.0,limits = c(-2,2),breaks = c(-2,2), labels = c('Low','High')) +
+        scale_fill_manual(values=c("#003f5c","#2f4b7c", "#665191", "#a05195", "#d45087", "#f95d6a", "#ff7c43", "#ffa600", "#e8c309", "#e4e809", "#bfe809",  "#5be809", "309e82e")) +
+        ylab("Cluster number") +
+        xlab("Number of clusters"))
+dev.off()
+
+
+# Probability of different trajectories from cluster belongings in first study: ------------
+# Table STable 4:
+x <- left_join(BL.best.df %>% rename(clustnumber=mod6), clust, by="clustnumber")
+x <- x %>% group_by(clust) %>% mutate(N=n()) %>% group_by(clust, CLUST, N) %>% summarise(P=n())
+x <- x %>% mutate(Prob=P/N*100)
+View(x %>% select(-P, -N) %>% mutate(Prob=round(Prob, 2)) %>% pivot_wider(names_from = CLUST, values_from=Prob))
+
+
+# Sankey of admission clusters vs new clusters (6 clusters):---------------
+#SFig5:
+df2 <- left_join(BL.best.df %>% rename(clustnumber=mod6), clust, by="clustnumber")
+df2 <- df2 %>% mutate(clust=as.factor(clust), CLUST=as.factor(CLUST))
+df3 <- df2 %>% group_by(clust, CLUST) %>% summarise(freq=n())
+df3 <- df3 %>% rename("Admission_cluster"=clust, "Trajectory"=CLUST)
+
+dev.new()
+print(ggplot(df3,aes(y=freq, axis1=Admission_cluster, axis2=Trajectory)) +
+        geom_alluvium(aes(fill=Admission_cluster)) +
+        geom_stratum(fill="white", color="black") +
+        geom_text(stat="stratum", aes(label=after_stat(stratum)), size=5) +
+        theme_minimal() +
+        theme(text = element_text(size = 16, face="bold"), legend.position="none", panel.grid=element_blank(),
+              axis.text.y=element_blank(), axis.ticks.y=element_blank(),
+              axis.text.x=element_text(vjust=7), axis.ticks.x=element_blank()) +
+        scale_x_discrete(limits=c("Admission cluster", "Trajectory")) +
+        scale_fill_manual(values=c("#003f5c","#2f4b7c", "#665191", "#a05195", "#d45087", "#f95d6a", "#ff7c43", "#ffa600")) +
+        ylab(""))
+# -------------------------------
+
+# Highest MI features: Run C script with MI and belong file
+# Violin plots: -------------------------------------
+# Highest MI features determined in excel file
+
+
+CV2 <- CV %>% mutate(Timepoint=as.factor(Timepoint))
+MI_feat <- c("GlucoseDiff", "Tau", "UCH.L1", "GFAP",
+             "NFL", "S100B", "NSE", "CreatinineMean", "SpO2Diff", "SpO2Mean")
+
+xx <- c("∆ Glucose", "Tau", "UCH-L1", "GFAP", "NFL", "S100B", "NSE",
+        "Creatinine mean", "∆ SpO2", "SpO2 mean") #For nice titles
+
+# Zoom of subplots (not used in manuscript):
+for(i in 1:length(MI_feat))
+{
+  temp <- CV2 %>% select(Timepoint, clustnumber, MI_feat[i])
+  colnames(temp)[3] <- "Value"
+
+  dev.new()
+  print(ggplot(temp, aes(y=Value, x=Timepoint)) + geom_violin(aes(fill=Timepoint))
+        + stat_summary(fun="mean", geom="point", color="red")
+        
+        + facet_wrap(~clustnumber) +
+          ggtitle(xx[i]))
+}
+
+# SFig4: 
+MI_plots <- vector(length=10, mode="list")
+
+for(i in 1:length(MI_feat))
+{
+  temp <- CV2 %>% select(Timepoint, clustnumber, MI_feat[i])
+  colnames(temp)[3] <- "Value"
+  MI_plots[[i]] <- ggplot(temp, aes(y=Value, x=Timepoint)) + geom_violin(aes(fill=Timepoint)) +
+    stat_summary(fun="mean", geom="point", color="red") +
+    facet_wrap(~clustnumber) + ggtitle(xx[i]) +
+    scale_fill_manual(values=c("#003f5c","#2f4b7c", "#665191", "#a05195", "#d45087", "#f95d6a", "#ff7c43", "#ffa600")) +
+    theme_minimal() +
+    theme(legend.position = "none", panel.grid.major.x=element_blank()) +
+    xlab("Day") + ylab("Concentration")
+}
+
+dev.new()
+marrangeGrob(MI_plots, nrow=2, ncol=5, list(top=NULL)) #Screen shot and then export as pdf (to avoid greek letters to be ..)
+
+
+# Investigate sodium relations (not used in manuscript):
+
+sodiumplot <- vector(mode="list", length=2)
+sodiumplot[[1]] <- ggplot(CV2 %>% select(SodiumMean, Timepoint, clustnumber), aes(y=SodiumMean, x=Timepoint)) + geom_violin(aes(fill=Timepoint)) +
+  stat_summary(fun="mean", geom="point", color="red") +
+  facet_wrap(~clustnumber) +
+  ggtitle("Sodium mean")
+
+sodiumplot[[2]] <- ggplot(CV2 %>% select(SodiumDiff, Timepoint, clustnumber), aes(y=SodiumDiff, x=Timepoint)) + geom_violin(aes(fill=Timepoint)) +
+  stat_summary(fun="mean", geom="point", color="red") +
+  facet_wrap(~clustnumber) +
+  ggtitle("∆ Sodium")
+
+marrangeGrob(sodiumplot, nrow=1, ncol=2, list(top=NULL))
+
+
+# Plot belonging probabilities, all clusters, all patients:
+# Fig 3: 
+
+unique(Belong6$clustnumber)
+View(Belong6)
+# Belong6 <- Belong6 %>% rename(greeks("alpha")='4', greeks("beta")='1', greeks("gamma")='6', greeks("delta")='5', greeks("epsilon")='3', greeks("zeta")='2')
+head(Belong6)
+temp <- c(greeks("beta"), greeks("zeta"), greeks("epsilon"), greeks("alpha"), greeks("delta"), greeks("gamma"))
+colnames(Belong6)[3:ncol(Belong6)] <- temp
+temp <- pivot_longer(Belong6, greeks("beta"):greeks("gamma"), names_to = "cluster", values_to = "prob")
+
+dev.new()
+print(ggplot(temp, aes(x=cluster, y=prob)) + geom_boxplot(aes(fill=cluster)) +
+        theme_minimal() +
+        theme(legend.position="none") +
+        facet_wrap(~clustnumber) +
+        xlab("Cluster assignment") + ylab("Probability of belonging to a cluster"))
+
+
+# METADATA TABLE: ---------------------------
+# 6 CLUSTERS:
+# Use only values when still in ICU (=use CV.alldays for analysis)
+
+# N patients in each cluster: 
+View(t(Npts))
+
+# Length of stay:
+f2 <- "/Users/ceciliamac/OneDrive - Karolinska Institutet/Clustering CENTER-TBI/Neurobot data/Core3.0/" #Moved all data to onedrive 2110027
+deathdisch <- read.csv(paste0(f2, "deathdischarge.csv"), na.strings = nas, stringsAsFactors = FALSE)
+colnames(deathdisch) <- c("gupi", "ICUDischDate", "DeathDate")
+
+deathdisch <- deathdisch %>% mutate(ICULOS=as.numeric(difftime(ICUDischDate, "1970-01-01", units="days")))
+
+CV <- left_join(CV, deathdisch %>% select(gupi, ICULOS), by="gupi")
+
+median((CV %>% filter(Timepoint==1))$ICULOS, na.rm=TRUE)
+quantile(((CV %>% filter(Timepoint==1))$ICULOS), 0.25, na.rm=TRUE)
+quantile(((CV %>% filter(Timepoint==1))$ICULOS), 0.75, na.rm=TRUE)
+
+dev.new()
+print(ggplot(CV %>% filter(Timepoint==1), aes(x=ICULOS, group=as.factor(HR), color=as.factor(HR))) + geom_density() +
+        ggtitle("ICU LOS"))
+
+
+CV %>% filter(Timepoint==1) %>% group_by(clustnumber) %>% summarise(MEDIAN=median(ICULOS, na.rm = TRUE),
+                                                              LB25=quantile(ICULOS, 0.25, na.rm=TRUE),
+                                                              UB75=quantile(ICULOS, 0.75, na.rm=TRUE))
+
+# COUNTS, categorical features: -------
+
+# GOS-E:
+temp <- count(CV %>% filter(Timepoint==0) %>% group_by(clustnumber), GOSE6mo) %>% rename(CLUST=clustnumber)
+temp <- left_join(temp, Npts, by="CLUST")
+
+temp <- temp %>% mutate(nperc=paste0(n.x, " (", round(n.x/n.y,3)*100,")"))
+View(temp %>% select(-n.x, -n.y))
+
+temp <- count(CV %>% filter(Timepoint==0), GOSE6mo)
+temp <- temp %>% mutate(nperc=paste0(n, " (", round(n/nrow(CV.alldays),3)*100,")"))
+temp %>% select(GOSE6mo, nperc)
+
+#ASA-PS:
+temp <- left_join(count(CV.alldays,CLUST, ASAClass), Npts, by="CLUST")
+temp <- temp %>% mutate(nperc=paste0(n.x, " (", round(n.x/n.y,3)*100,")"))
+View(temp %>% select(-n.x, -n.y))
+
+temp <- count(CV.alldays, ASAClass)
+temp <- temp %>% mutate(nperc=paste0(n, " (", round(n/nrow(CV.alldays),3)*100,")"))
+View(temp %>% select(-n))
+
+# Anticoags:
+temp <- left_join(count(CV.alldays, CLUST, Anticoags), Npts, by="CLUST")
+temp <- temp %>% mutate(nperc=paste0(n.x, " (", round(n.x/n.y,3)*100,")"))
+View(temp %>% filter(Anticoags==1) %>% select(-n.x, -n.y, -Anticoags))
+temp <- count(CV %>% filter(Timepoint==0), Anticoags)
+temp <- temp %>% mutate(nperc=paste0(n, " (", round(n/nrow(CV.alldays),3)*100,")"))
+
+# Type of injury:
+temp <- left_join(count(CV.alldays, CLUST, InjType), Npts, by="CLUST")
+temp <- temp %>% mutate(nperc=paste0(n.x, " (", round(n.x/n.y,3)*100,")"))
+View((temp %>% select(-n.x, -n.y)))
+temp <- count(CV.alldays, InjType)
+temp <- temp %>% mutate(nperc=paste0(n, " (", round(n/nrow(CV.alldays),3)*100,")"))
+View(temp)
+
+# Cause of injury:
+temp <- left_join(count(CV.alldays, CLUST, InjCause), Npts, by="CLUST")
+temp <- temp %>% mutate(nperc=paste0(n.x, " (", round(n.x/n.y,3)*100,")"))
+View((temp %>% select(-n.x, -n.y)))
+temp <- count(CV.alldays, InjCause)
+temp <- temp %>% mutate(nperc=paste0(n, " (", round(n/nrow(CV.alldays),3)*100,")"))
+View(temp)
+
+# Pupil reactivity:
+temp <- left_join(count(CV.alldays, CLUST, PupilResponse.0), Npts, by="CLUST")
+temp <- temp %>% mutate(nperc=paste0(n.x, " (", round(n.x/n.y,3)*100,")"))
+View(temp %>% select(-n.x, -n.y) %>% pivot_wider(names_from=CLUST, values_from=nperc))
+
+temp <- count(CV.alldays, PupilResponse.0)
+temp <- temp %>% mutate(nperc=paste0(n, " (", round(n/nrow(CV.alldays),3)*100,")"))
+View(temp)
+
+# Means first day:
+temp <- CV.alldays %>% select("CLUST", "Age", "GOSE6mo.num", contains("ISS"),(starts_with("GCS") & ends_with("0")),
+                              "BE", "Lactate", "BMI", "CreatinineMean.1", "AlbuminMean.1", (starts_with("Glucose") & ends_with("1")),
+                              "HbMean.1", "HRDiff.1", "HRMean.0", "FluidBal.1", "ICPDiff.1", "ICPMean.1", "MAPMean.0",
+                              "TempMean.0", "PaCO2Mean.0", "PaO2Mean.0", "pHMean.0", "SpO2Mean.0",
+                              "RotterdamCTScore.1", "FisherClassification.1")
+CV.alldays.summary <- temp %>% group_by(CLUST) %>% summarise_all(list(Median=median,
+                                                                      LB25=~quantile(x=., probs=0.25, na.rm=TRUE),
+                                                                      UB75=~quantile(x=., probs=0.75, na.rm=TRUE)), na.rm=TRUE)
+View(t(CV.alldays.summary))
+# write.csv(t(CV.alldays.summary), paste0(f5,"firstdaysummaries.csv"))
+
+
+CV.alldays.overall.summary <- temp %>% select(-CLUST) %>%  summarise_all(list(Median=median,
+                                                                              LB25=~quantile(x=., probs=0.25, na.rm=TRUE),
+                                                                              UB75=~quantile(x=., probs=0.75, na.rm=TRUE)), na.rm=TRUE)
+# write.csv(t(CV.alldays.overall.summary), paste0(f5,"/firstdaysummaries.overall.csv"))
+
+View(t(CV.alldays.overall.summary))
+# ---------------------------------
+
+# ISS:
+temp <- CV %>% filter(Timepoint==0) %>% select(clustnumber, contains("ISS")) 
+
+temp <- temp %>% group_by(clustnumber) %>% summarise_all(list(Median=median,
+                                                                      LB25=~quantile(x=., probs=0.25, na.rm=TRUE),
+                                                                      UB75=~quantile(x=., probs=0.75, na.rm=TRUE)), na.rm=TRUE)
+temp <- temp %>% mutate(ExtracranISS=paste0(ExtracranISSHighest_Median, " (",ExtracranISSHighest_LB25, ", ", ExtracranISSHighest_UB75,")"),
+                        HeadISS=paste0(HeadISS_Median, " (",HeadISS_LB25, ", ", HeadISS_UB75, ")"),
+                        TotalISS=paste0(TotalISS_Median, " (", TotalISS_LB25, ", ", TotalISS_UB75, ")"))
+View(temp %>% select(clustnumber, ExtracranISS, HeadISS, TotalISS))
+# Overall ISS:
+temp <- CV %>% filter(Timepoint==0) %>% select(contains("ISS")) %>% summarise_all(list(Median=median,
+                                                              LB25=~quantile(x=., probs=0.25, na.rm=TRUE),
+                                                              UB75=~quantile(x=., probs=0.75, na.rm=TRUE)), na.rm=TRUE)
+temp <- temp %>% mutate(ExtracranISS=paste0(ExtracranISSHighest_Median, " (",ExtracranISSHighest_LB25, ", ", ExtracranISSHighest_UB75,")"),
+                        HeadISS=paste0(HeadISS_Median, " (",HeadISS_LB25, ", ", HeadISS_UB75, ")"),
+                        TotalISS=paste0(TotalISS_Median, " (", TotalISS_LB25, ", ", TotalISS_UB75, ")"))
+View(temp %>% select(ExtracranISS, HeadISS, TotalISS))
+
+# GCS score total:
+temp <- CV %>% filter(Timepoint==0) %>% select(gupi, clustnumber, GCSScore)
+temp %>% summarise(GCSTotal=paste0(median(GCSScore, na.rm = TRUE), " (",quantile(GCSScore, 0.25, na.rm=TRUE),
+                                   ", ", quantile(GCSScore, 0.75, na.rm=TRUE), ")"))
+
+View(temp %>% group_by(clustnumber) %>% summarise(GCSTotal=paste0(median(GCSScore, na.rm = TRUE), " (",quantile(GCSScore, 0.25, na.rm=TRUE),
+                                   ", ", quantile(GCSScore, 0.75, na.rm=TRUE), ")")))
+
+# Therapies, any day:
+# USE CV.ALLDAYS AS THIS HAS NA IF PATIENT DISCHARGED.
+temp <- CV.alldays %>% select(gupi, CLUST, contains("ICPMonitorYes"), contains("ICPSurgery"),
+                              contains("Intub"), contains("Hypocapnia"), contains("Hypotension"),
+                              contains("Hypoxia"), contains("Seizures"), contains("FluidBal"), contains("ICPMean"),
+                              contains("ICPDiff"),  
+                              contains("SodiumMean"), contains("SodiumDiff"), contains("PlateletMean"), contains("Rotterdam"), contains("Fisher"), contains("TAI"),
+                              contains("MidlineShift"), contains("MidlineShiftmm"), contains("TAI"), contains("aSDH"),
+                              contains("Contusion"), contains("EDH"), contains("tSAH"), contains("Position"), contains("PositionNursedFlat"),
+                              contains("FluidLoading"), contains("Vasopressor"), contains("DecomprCran"), contains("Neuromusc"), 
+                              contains("Tempcontrol"), contains("UCH.L1"),
+                              contains("S100B"), contains("Tau"), contains("NFL"), contains("GFAP"), contains("NSE"),
+                              contains("Creatinine"))
+
+temp <- temp %>% pivot_longer(cols=ICPMonitorYes.1:CreatinineMean.7, names_to = "Feature", values_to = "Value")
+temp9 <- temp
+temp$Feature <- gsub("\\..*","", temp$Feature)
+temp2 <- temp %>% group_by(gupi, CLUST, Feature) %>% summarise(MAX=max(Value, na.rm=TRUE),
+                                                               MIN=min(Value, na.rm=TRUE))
+
+temp2[temp2==-Inf] <- NA
+temp2[temp2==Inf] <- NA
+temp <- ungroup(temp)
+View(temp2) # Max and min values all week.
+
+# Data to Ari, pts w ICP monitoring:
+temp3 <- (ungroup(temp2) %>% filter(Feature=="ICPMonitorYes") %>% select(gupi, MAX) %>% rename(ICPMonitorYes=MAX))
+write.csv(temp3, file=paste0(f5,"endotypegupis.csv"), row.names = FALSE)
+# Overall summaries (N(%), any day):
+temp3 <- temp2 %>% group_by(Feature) %>% summarise(N=sum(MAX, na.rm = TRUE))
+temp3 <- temp3 %>% mutate(PERC=round(N/nrow(CV.alldays),3)*100)
+temp3 <- temp3 %>% mutate(NPERC=paste0(N, " (", PERC, ")"))
+
+View(temp3 %>% select(-N, -PERC))
+
+#Stratified by cluster (N(%), any day):
+temp4 <- temp2 %>% group_by(CLUST, Feature) %>% summarise(N=sum(MAX, na.rm = TRUE))
+
+temp4 <- left_join(temp4, Npts, by="CLUST")
+temp4 <- temp4 %>% mutate(PERC=round(N/n,3)*100)
+temp4 <- temp4 %>% mutate(NPERC=paste0(N, " (", PERC, ")"))
+
+View(temp4)
+# write.csv(temp4 %>% select(CLUST, Feature, NPERC), file=paste0(f5,"N.alldays.csv"))
+
+# Highest any day stratified by cluster:
+temp3 <- temp2 %>% select(-gupi) %>% filter(Feature %in% c("CreatinineMean", "FisherClassification",
+                                                           "GFAP", "ICPDiff", "ICPMean", "NFL", "NSE", "RotterdamCTScore",
+                                                           "S100B", "SodiumDiff", "SodiumMean", "Tau", "UCH")) %>% group_by(CLUST, Feature) %>%
+  summarise(MAXMEDIAN=median(MAX, na.rm=TRUE),
+            LB25=quantile(MAX, probs=0.25, na.rm=TRUE),
+            UB75=quantile(MAX, probs=0.75, na.rm=TRUE))
+temp3 <- temp3 %>% mutate(MAXMEDIAN_IQR=paste0(MAXMEDIAN, " (",LB25, ", ",UB75,")"))
+temp3 <- temp3 %>% mutate(Feature=factor(Feature, levels=c("CreatinineMean", "ICPMean", "ICPDiff",
+                                                           "SodiumMean", "SodiumDiff", "RotterdamCTScore",
+                                                           "FisherClassification", "UCH", "S100B", "Tau", "NFL", "GFAP", "NSE")))
+View(temp3 %>% select(CLUST, Feature, MAXMEDIAN_IQR) %>% arrange((Feature)))
+# write.csv(temp3 %>% select(CLUST, Feature, MAXMEDIAN_IQR), file=paste0(f5,"highestanydayclust.csv"))
+
+# Highest any day, all: patients:
+temp3 <- temp2 %>% select(-gupi) %>%filter(Feature %in% c("CreatinineMean", "FisherClassification",
+                                                          "GFAP", "ICPDiff", "ICPMean", "NFL", "NSE", "RotterdamCTScore",
+                                                          "S100B", "SodiumDiff", "SodiumMean", "Tau", "UCH")) %>%
+  group_by(CLUST, Feature) %>% group_by(Feature) %>%
+  summarise(MAXMEDIAN=median(MAX, na.rm=TRUE),
+            LB25=quantile(MAX, probs=0.25, na.rm=TRUE),
+            UB75=quantile(MAX, probs=0.75, na.rm=TRUE))
+temp3 <- temp3 %>% mutate(MAXMEDIAN_IQR=paste0(MAXMEDIAN, " (",LB25, ", ",UB75,")"))
+temp3 <- temp3 %>% mutate(Feature=factor(Feature, levels=c("CreatinineMean", "ICPMean", "ICPDiff",
+                                                           "SodiumMean", "SodiumDiff", "RotterdamCTScore",
+                                                           "FisherClassification", "UCH", "S100B", "Tau", "NFL", "GFAP", "NSE")))
+View(temp3 %>% select(Feature, MAXMEDIAN_IQR) %>% arrange((Feature)))
+
+
+# Lowest any day by cluster (Platelets:)
+temp3 <- temp2 %>% group_by(CLUST) %>% filter(Feature=="PlateletMean") %>%
+  summarise(MINMEDIAN=median(MIN, na.rm=TRUE),
+            LB25=quantile(MIN, probs=0.25, na.rm=TRUE),
+            UB75=quantile(MIN, probs=0.75, na.rm=TRUE))
+temp3 <- temp3 %>% mutate(MAXMEDIAN_IQR=paste0(round(MINMEDIAN,0), " (",round(LB25,0), ", ",round(UB75,0),")"))
+View(temp3 %>% select(CLUST, MAXMEDIAN_IQR))
+
+# All patients:
+temp3 <- temp2 %>% ungroup() %>% filter(Feature=="PlateletMean") %>% summarise(MINMEDIAN=median(MIN, na.rm=TRUE),
+                                                                               LB25=quantile(MIN, probs=0.25, na.rm=TRUE),
+                                                                               UB75=quantile(MIN, probs=0.75, na.rm=TRUE))
+temp3 <- temp3 %>% mutate(MAXMEDIAN_IQR=paste0(round(MINMEDIAN,0), " (",round(LB25,0), ", ",round(UB75,0),")"))
+View(temp3)
+
+# ICP mean, Sodium mean, median all days:
+View(temp)
+temp3 <- temp %>% group_by(gupi, CLUST, Feature) %>% filter(Feature %in% c("ICPMean", "SodiumMean")) %>%
+  summarise(MEAN=mean(Value, na.rm=TRUE))
+temp3$MEAN[is.nan(temp3$MEAN)] <- NA
+temp3 <- temp3 %>% group_by(CLUST, Feature) %>% summarise(MEDIAN=median(MEAN, na.rm=TRUE),
+                                                          LB25=quantile(MEAN, 0.25, na.rm=TRUE),
+                                                          UB75=quantile(MEAN, 0.75, na.rm=TRUE))
+temp3 <- temp3 %>% mutate(MEDIAN_IQR=ifelse(Feature=="ICPMean",
+                                            paste0(round(MEDIAN,1), " (", round(LB25,1),", ",round(UB75,1), ")"),
+                                            paste0(round(MEDIAN,0), " (", round(LB25,0),", ",round(UB75,0), ")")))
+View(temp3 %>% arrange(Feature))
+
+# All days:
+temp3 <- temp %>% group_by(gupi, Feature) %>% filter(Feature %in% c("ICPMean", "SodiumMean")) %>% summarise(MEAN=mean(Value, na.rm=TRUE))
+temp3$MEAN[is.nan(temp3$MEAN)] <- NA
+temp3 <- temp3 %>% group_by(Feature) %>% summarise(MEDIAN=median(MEAN, na.rm=TRUE),
+                                           LB25=quantile(MEAN, 0.25, na.rm=TRUE),
+                                           UB75=quantile(MEAN, 0.75, na.rm=TRUE))
+
+temp3 <- temp3 %>% mutate(MEDIAN_IQR=ifelse(Feature=="ICPMean",
+                                            paste0(round(MEDIAN,1), " (", round(LB25,1),", ",round(UB75,1), ")"),
+                                            paste0(round(MEDIAN,0), " (", round(LB25,0),", ",round(UB75,0), ")")))
+View(temp3 %>% arrange(Feature) %>% select(Feature, MEDIAN_IQR))
+
+# FluidBal, sum all days:
+temp3 <- temp %>% group_by(gupi, CLUST) %>% filter(Feature=="FluidBal") %>%
+  summarise(SUM=sum(Value, na.rm=TRUE))
+temp3 <- temp3 %>% group_by(CLUST) %>% summarise(MEDIAN=median(SUM, na.rm=TRUE),
+                                                 LB25=quantile(SUM, 0.25, na.rm=TRUE),
+                                                 UB75=quantile(SUM, 0.75, na.rm=TRUE))
+temp3 <- temp3 %>% mutate(MEDIAN_IQR=paste0(round(MEDIAN,0), " (",round(LB25,0), ", ",round(UB75,0),")"))
+View(temp3 %>% select(CLUST, MEDIAN_IQR))
+
+# All patients:
+temp3 <- temp %>% group_by(gupi) %>% filter(Feature=="FluidBal") %>%
+  summarise(SUM=sum(Value, na.rm=TRUE))
+temp3 <- temp3 %>% summarise(MEDIAN=median(SUM, na.rm=TRUE),
+                                                 LB25=quantile(SUM, 0.25, na.rm=TRUE),
+                                                 UB75=quantile(SUM, 0.75, na.rm=TRUE))
+temp3 <- temp3 %>% mutate(MEDIAN_IQR=paste0(round(MEDIAN,0), " (",round(LB25,0), ", ",round(UB75,0),")"))
+View(temp3)
+
+# Midline shift, first day and any day:
+# First day:
+CV <- CV %>% rename(CLUST=clustnumber)
+temp3 <- left_join(count(CV %>% filter(Timepoint==1), CLUST, MidlineShift), Npts, by="CLUST")
+temp3 <- temp3 %>% mutate(nperc=paste0(n.x, " (", round(n.x/n.y,3)*100,")"))
+View(temp3 %>% filter(MidlineShift==1) %>% select(CLUST,nperc) )
+
+temp3 <- CV %>% filter(Timepoint==1) %>% group_by(CLUST) %>% summarise(MEDIAN=median(MidlineShiftmm, na.rm=TRUE),
+                                                                       LB25=quantile(MidlineShiftmm, 0.25, na.rm=TRUE),
+                                                                       UB75=quantile(MidlineShiftmm, 0.75, na.rm=TRUE))
+temp3 <- temp3 %>% mutate(MidlineShiftmm=paste0(MEDIAN, " (", LB25, ", ", UB75, ")"))
+View(temp3 %>% select(CLUST, MidlineShiftmm))
+
+# Overall first day:
+temp3 <- count(CV %>% filter(Timepoint==1), MidlineShift)
+temp3$n/1728*100
+
+temp3 <- CV %>% filter(Timepoint==1) %>% summarise(MEDIAN=median(MidlineShiftmm, na.rm=TRUE),
+                                                                       LB25=quantile(MidlineShiftmm, 0.25, na.rm=TRUE),
+                                                                       UB75=quantile(MidlineShiftmm, 0.75, na.rm=TRUE))
+temp3 <- temp3 %>% mutate(MidlineShiftmm=paste0(MEDIAN, " (", LB25, ", ", UB75, ")"))
+View(temp3 %>% select(MidlineShiftmm))
+
+
+# Any day:
+temp3 <- CV %>% select(gupi, CLUST, contains("Midline"))
+temp3 <- temp3 %>% group_by(gupi, CLUST) %>% summarise(MLMAX=max(MidlineShift, na.rm=TRUE),
+                                                       MLmmMAX=max(MidlineShiftmm, na.rm=TRUE))
+temp3[temp3==-Inf] <- NA
+View(temp3)
+
+#By cluster:
+temp3 <- temp3 %>% group_by(CLUST) %>% summarise(N=sum(MLMAX, na.rm=TRUE),
+                                                 MLMAX=median(MLmmMAX, na.rm=TRUE),
+                                                 LB25=quantile(MLmmMAX, 0.25, na.rm=TRUE),
+                                                 UB75=quantile(MLmmMAX, 0.75, na.rm=TRUE))
+temp3 <- left_join(temp3, Npts, by="CLUST")
+
+temp3 <- temp3 %>% mutate(nperc=paste0(N, " (",round(N/n,3)*100, ")"),
+                          MLmm=paste0(MLMAX, " (",LB25,", ",UB75,")"))
+View(temp3 %>% select(CLUST, nperc, MLmm))
+
+# Overall highest any day:
+temp3 <- CV %>% select(gupi, contains("Midline")) %>% group_by(gupi) %>%
+                         summarise(MLMAX=max(MidlineShift, na.rm=TRUE),
+                                MLmmMAX=max(MidlineShiftmm, na.rm = TRUE))
+temp3$MLMAX[temp3$MLMAX==-Inf] <- NA
+
+temp3 <- temp3 %>% summarise(N=sum(MLMAX, na.rm=TRUE),
+                                                 MLMAX=median(MLmmMAX, na.rm=TRUE),
+                                                 LB25=quantile(MLmmMAX, 0.25, na.rm=TRUE),
+                                                 UB75=quantile(MLmmMAX, 0.75, na.rm=TRUE))
+temp3 <- temp3 %>% mutate(nperc=paste0(N, " (",round(N/1728,3)*100, ")"),
+                          MLmm=paste0(MLMAX, " (",LB25,", ",UB75,")"))
+View(temp3 %>% select(nperc, MLmm))
+
+
+# Seizures first day:
+# By cluster:
+temp3 <- left_join(count(CV.alldays, CLUST, Seizures.0), Npts, by="CLUST")
+temp3 <- temp3 %>% mutate(nperc=paste0(n.x, " (", round(n.x/n.y,3)*100,")"))
+View(temp3 %>% filter(Seizures.0==1) %>% select(CLUST,nperc))
+
+# All:
+count(CV.alldays, Seizures.0) #103
+103/1728*100 #5.96%
+
+
+
+# N% first day, by cluster: 
+temp3 <- temp9 %>% select(-gupi) %>% filter(grepl(".1", Feature), Feature %in% c("TAI.1", "aSDH.1", "Contusion.1", "EDH.1", "tSAH.1")) %>% group_by(CLUST, Feature) %>% summarise(N=sum(Value, na.rm=TRUE))
+temp3 <- temp3 %>% mutate(nperc=paste0(N, " (",round(N/1728,3)*100,")"))                                                     
+temp3 <- temp3 %>% mutate(Feature=gsub(".1","", Feature))
+
+View(temp3 %>% select(CLUST, Feature, nperc))
+
+# Overall N(%) first day:
+temp3 <- temp9 %>% select(-gupi) %>% filter(grepl(".1", Feature), Feature %in% c("TAI.1", "aSDH.1", "Contusion.1", "EDH.1", "tSAH.1")) %>% group_by(Feature) %>% summarise(N=sum(Value, na.rm=TRUE))
+temp3 <- temp3 %>% mutate(nperc=paste0(N, " (",round(N/1728,3)*100,")"))                                                     
+temp3 <- temp3 %>% mutate(Feature=gsub(".1","", Feature))
+View(temp3 %>% select(Feature, nperc))
+
+
+#Daily therapies >= categories:
+temp8 <- CV.alldays %>% select(gupi, CLUST, contains("Tempcontrol"), contains("Saline"), contains("Mannitol"),
+                               contains("SedationTherapy"), contains("HyperventTherapy"))
+temp8 <- temp8 %>% pivot_longer(cols=Tempcontrol.1:HyperventTherapy.7,  names_to="Feature", values_to = "Value")
+temp8 <- temp8 %>% mutate(Feature=gsub("\\..*","", Feature))
+
+temp3 <- temp8 %>% group_by(gupi, Feature, CLUST) %>% summarise(MAX=max(Value, na.rm=TRUE))
+temp3$MAX[temp3$MAX==-Inf] <- NA
+temp3 <- ungroup(temp3)
+
+# By cluster:
+temp4 <- temp3 %>% group_by(CLUST, Feature, MAX) %>% summarise(N=n())
+temp4 <- left_join(temp4, Npts, by="CLUST")
+temp4 <- temp4 %>% mutate(nperc=paste0(N, " (", round(N/n,3)*100,")"))
+# write.csv(temp4, file=paste0(f5, "treatmentcatsclust.csv"))
+View(temp4)
+
+# All:
+temp4 <- temp3 %>% group_by(Feature, MAX) %>% summarise(N=n())
+temp4 <- temp4 %>% mutate(nperc=paste0(N, " (", round(N/1728,3)*100,")"))
+# write.csv(temp4, file=paste0(f5, "treatmentcatsall.csv"))
+View(temp4)
+
+# CSF Drainage Therapy:
+# Per cluster:
+temp3 <- CV %>% select(gupi, CLUST, contains("CSFDrainageTherapy")) %>% group_by(gupi, CLUST) %>%
+  summarise(MAX=max(CSFDrainageTherapy, na.rm=TRUE))
+temp3[temp3==-Inf] <- NA
+temp3 <- temp3 %>% group_by(CLUST, MAX) %>% summarise(N=n())
+temp3 <- left_join(temp3, Npts, by="CLUST") #%>% mutate()
+temp3 <- temp3 %>% mutate(nperc=paste0(N, " (", round(N/n,3)*100,")"))
+View(temp3 %>% select(CLUST, nperc))
+
+# All:
+temp3 <- CV %>% select(gupi, CLUST, contains("CSFDrainageTherapy")) %>% group_by(gupi) %>%
+  summarise(MAX=max(CSFDrainageTherapy, na.rm=TRUE))
+temp3[temp3==-Inf] <- NA
+temp3 <- temp3 %>% group_by(MAX) %>% summarise(N=n())
+temp3 <- temp3 %>% mutate(nperc=paste0(N, " (", round(N/1728,3)*100,")"))
+View(temp3)
+
+#Daily TIL:
+temp3 <- CV %>% select(gupi, CLUST, TotalTIL)
+temp3 <- temp3 %>% group_by(gupi, CLUST) %>% mutate(TotalTIL=max(TotalTIL, na.rm=TRUE))
+# By clust:
+temp4 <- temp3 %>% group_by(CLUST) %>% summarise(MEDIAN=median(TotalTIL, na.rm=TRUE),
+                                                 LB25=quantile(TotalTIL, 0.25, na.rm=TRUE),
+                                                 UB75=quantile(TotalTIL, 0.75, na.rm=TRUE))
+temp4 <- temp4 %>% mutate(MEDIAN_IQR=paste0(MEDIAN, " (",LB25, ", ", UB75, ")"))
+View(temp4 %>% select(CLUST, MEDIAN_IQR))
+
+# Total: 
+temp3 <- temp3 %>% ungroup() %>% summarise(MEDIAN=median(TotalTIL, na.rm=TRUE),
+                                           LB25=quantile(TotalTIL, 0.25, na.rm=TRUE),
+                                           UB75=quantile(TotalTIL, 0.75, na.rm=TRUE))
+
+
+# ----------------------
+# Mortality and unfav: 
+#CV <- CV %>% rename(CLUST=clustnumber)
+
+temp <- count(CV %>% filter(Timepoint==0) %>% group_by(CLUST), MORT)
+temp <- left_join(temp, Npts, by="CLUST")
+temp <- temp %>% mutate(nperc=paste0(n.x, " (", round(n.x/n.y,3)*100,")")) %>%
+  filter (MORT==1) %>% select(-n.x, -n.y, -MORT) %>% rename(MORT=nperc)
+
+temp2 <- count(CV %>% filter(Timepoint==0) %>% group_by(CLUST), UNFAV)
+temp2 <- left_join(temp2, Npts, by="CLUST")
+temp2 <- temp2 %>% mutate(nperc=paste0(n.x, " (", round(n.x/n.y,3)*100,")")) %>%
+  filter(UNFAV==1) %>% select(-n.x, -n.y, -UNFAV) %>% rename(UNFAV=nperc)
+
+temp3 <- CV %>% filter(Timepoint==0) %>% group_by(CLUST) %>% summarise(IMMed=round(median(IMPACT.imp.tot.MORT, na.rm=TRUE),3)*100,
+                                            IMLB25=round(quantile(IMPACT.imp.tot.MORT, 0.25, na.rm=TRUE),3)*100,
+                                            IMUB75=round(quantile(IMPACT.imp.tot.MORT, 0.75, na.rm=TRUE),3)*100,
+                                            IUMed=round(median(IMPACT.imp.tot.UNFAV, na.rm=TRUE),3)*100,
+                                            IULB25=round(quantile(IMPACT.imp.tot.UNFAV, 0.25, na.rm=TRUE),3)*100,
+                                            IUUB75=round(quantile(IMPACT.imp.tot.UNFAV, 0.75, na.rm=TRUE),3)*100)
+temp3 <- temp3 %>% mutate(IM=paste0(IMMed, " (", IMLB25, ", ", IMUB75, ")"),
+                          IU=paste0(IUMed, " (", IULB25, ", ", IUUB75, ")")) %>% select(CLUST, IM, IU)
+
+temp <- left_join(temp, temp2, by="CLUST")
+temp <- left_join(temp, temp3, by="CLUST")
+View(t(temp))
+
+# OVERALL MORT, UNFAV, IMPACT...
+temp <- CV %>% filter(Timepoint==0) %>% count(MORT) %>% filter(MORT==1) %>% mutate(N=1728, perc=round(n/N,3)*100,
+                                                      nperc=paste0(n, " (", perc,")")) %>%
+  select(MORT, nperc)  %>% mutate(MORT="MORT")%>% rename(Outcome=MORT)
+
+temp2 <- CV %>% filter(Timepoint==0) %>% count(UNFAV) %>% filter(UNFAV==1) %>% mutate(N=1728, perc=round(n/N,3)*100,
+                                                         nperc=paste0(n, " (", perc,")")) %>%
+  select(UNFAV, nperc)  %>% mutate(UNFAV="UNFAV")%>% rename(Outcome=UNFAV)
+
+temp <- rbind(temp, temp2)
+
+temp3 <- CV %>% filter(Timepoint==0) %>% summarise(IMMed=round(median(IMPACT.imp.tot.MORT, na.rm=TRUE),3)*100,
+                         IMLB25=round(quantile(IMPACT.imp.tot.MORT, 0.25, na.rm=TRUE),3)*100,
+                         IMUB75=round(quantile(IMPACT.imp.tot.MORT, 0.75, na.rm=TRUE),3)*100,
+                         IUMed=round(median(IMPACT.imp.tot.UNFAV, na.rm=TRUE),3)*100,
+                         IULB25=round(quantile(IMPACT.imp.tot.UNFAV, 0.25, na.rm=TRUE),3)*100,
+                         IUUB75=round(quantile(IMPACT.imp.tot.UNFAV, 0.75, na.rm=TRUE),3)*100)
+temp3 <- temp3 %>% mutate(IM=paste0(IMMed, " (", IMLB25, ", ", IMUB75, ")"),
+                          IU=paste0(IUMed, " (", IULB25, ", ", IUUB75, ")")) %>% select(IM, IU)
+
+temp3 <- data.frame(nperc=t(temp3))
+temp3 <- temp3 %>% mutate(Outcome=row.names(temp3)) %>% select(Outcome, nperc)
+temp <- rbind(temp, temp3)
+
+View(temp)
+
+
+
+
+
+# -------------------------------------
+# Mutual information: ----------------
+# Average MI per clusters:
+MInf <- read.csv(file=paste0(f4,"DailyMILong.csv"), sep=";")
+MInf <- MInf %>% mutate(Name=gsub("\\..*","",Feature))
+MInf <- MInf %>% group_by(nclusters, Name) %>% summarise(MEAN=round(mean(MI, na.rm=TRUE),4),
+                                              SD=round(sd(MI, na.rm=TRUE),4))
+MIMEAN <- MInf %>% select(-SD) %>% pivot_wider(names_from = nclusters, values_from = MEAN)
+MISD <- MInf %>% select(-MEAN) %>% pivot_wider(names_from = nclusters, values_from = SD)