Commit aa0d7bad authored by Kevin's avatar Kevin

fixed msm fit, introduced target rules and added propr imputation report

parent 210b1842
...@@ -39,6 +39,12 @@ rule prepare_data: ...@@ -39,6 +39,12 @@ rule prepare_data:
mv reports/figures.zip {output.figures} mv reports/figures.zip {output.figures}
""" """
# define corresponding target rule for ease of use
rule data_report_v1_1:
input:
pdf = "output/v1.1/prepare_data.pdf",
figures = "output/v1.1/prepare_data_figures.zip"
...@@ -75,6 +81,9 @@ rule generate_validation_data: ...@@ -75,6 +81,9 @@ rule generate_validation_data:
""" """
# adjust threads by model type # adjust threads by model type
def get_rule_threads(wildcards): def get_rule_threads(wildcards):
if wildcards.model in ("locf", "msm"): if wildcards.model in ("locf", "msm"):
...@@ -123,10 +132,17 @@ rule model_assessment: ...@@ -123,10 +132,17 @@ rule model_assessment:
mv reports/figures.zip {output.figures} mv reports/figures.zip {output.figures}
""" """
# define corresponding target rule for ease of use
rule cv_model_comparison_report_v1_1:
input:
pdf = "output/v1.1/model_assessment.pdf",
figures = "output/v1.1/model_assessment_figures.zip"
# rules for imputing on entire dataset
rule generate_imputation_data: rule generate_imputation_data:
input: input:
rules.prepare_data.output, rules.prepare_data.output,
...@@ -145,7 +161,6 @@ rule generate_imputation_data: ...@@ -145,7 +161,6 @@ rule generate_imputation_data:
# rules for imputing on entire dataset
rule model_impute: rule model_impute:
input: input:
"config.yml", "config.yml",
...@@ -175,3 +190,24 @@ rule post_process_imputations: ...@@ -175,3 +190,24 @@ rule post_process_imputations:
mkdir -p output/{wildcards.version}/data/imputation/{wildcards.model} mkdir -p output/{wildcards.version}/data/imputation/{wildcards.model}
Rscript scripts/post_process_imputations.R output/{wildcards.version}/data/imputation/{wildcards.model} output/{wildcards.version}/data/df_gose.rds {output} Rscript scripts/post_process_imputations.R output/{wildcards.version}/data/imputation/{wildcards.model} output/{wildcards.version}/data/df_gose.rds {output}
""" """
rule imputation_report:
input:
"config.yml",
rules.post_process_imputations.output,
markdown = "reports/imputations.Rmd"
output:
pdf = "output/{version}/gose_imputations_{model}.pdf",
figures = "output/{version}/gose_imputations_{model}_figures.zip"
shell:
"""
mkdir -p output/{wildcards.version}
Rscript -e "rmarkdown::render(\\"{input.markdown}\\", output_dir = \\"output/{wildcards.version}\\", params = list(data_dir = \\"../output/{wildcards.version}/data\\", imputations = "../output/v1.1/data/imputation/{wildcards.model}/df_gose_imputed.csv"))"
mv reports/figures.zip {output.figures}
"""
# define corresponding target rule for ease of use
rule impute_msm_v1_1:
input:
pdf = "output/v1.1/gose_imputations_msm.pdf",
figures = "output/v1.1/gose_imputations_msm_figures.zip"
...@@ -93,7 +93,7 @@ df_posteriors <- tibble( ...@@ -93,7 +93,7 @@ df_posteriors <- tibble(
fitted$pstate %>% {colnames(.) <- c("1", as.character(3:8)); .} fitted$pstate %>% {colnames(.) <- c("1", as.character(3:8)); .}
) %>% ) %>%
mutate("2" = 0) %>% mutate("2" = 0) %>%
filter(t == config$t_out_msm) %>% filter(t == config$t_out_msm + .5) %>%
gather(GOSE, probability, as.character(1:8)) %>% gather(GOSE, probability, as.character(1:8)) %>%
arrange(gupi, t, GOSE) arrange(gupi, t, GOSE)
......
--- ---
title: "Imputing GOSE scores in CENTER-TBI" title: "Imputing GOSE scores in CENTER-TBI"
subtitle: "assessing final imputations"
date: "`r Sys.time()`" date: "`r Sys.time()`"
statistician: "Kevin Kunzmann (kevin.kunzmann@mrc-bsu.cam.ac.uk)" statistician: "Kevin Kunzmann (kevin.kunzmann@mrc-bsu.cam.ac.uk)"
...@@ -15,6 +17,7 @@ git-wd-clean: "`r ifelse(system('git diff-index --quiet HEAD') == 0, 'clean', 'f ...@@ -15,6 +17,7 @@ git-wd-clean: "`r ifelse(system('git diff-index --quiet HEAD') == 0, 'clean', 'f
params: params:
data_dir: "../output/v1.1/data" data_dir: "../output/v1.1/data"
imputations: "../output/v1.1/data/imputation/msm/df_gose_imputed.csv"
config_file: "../config.yml" config_file: "../config.yml"
--- ---
...@@ -25,19 +28,110 @@ require(tidyverse, quietly = TRUE) ...@@ -25,19 +28,110 @@ require(tidyverse, quietly = TRUE)
config <- yaml::read_yaml(params$config_file) config <- yaml::read_yaml(params$config_file)
set.seed(config$seed) set.seed(config$seed)
df_imputations <- read_csv(params$imputations)
``` ```
# Model descriptions Since we do not use the raw imputed values but give preference to the per
protocol values (when a derived composite GOSE is availabel within 5-8 months
after injury), we start by comparing the final combined version with the raw
imputations.
```{r, fig.height=7, fig.width=7}
caret::confusionMatrix(
df_imputations$Subject.DerivedImputed180DaysGOSE %>% factor,
df_imputations$Subject.GOSE6monthEndpointDerived %>% factor
) %>%
as.matrix %>% as_tibble %>%
mutate(
Subject.DerivedImputed180DaysGOSE = row_number() %>% as.character
) %>%
gather(Subject.GOSE6monthEndpointDerived, n, `1`:`8`) %>%
ggplot(aes(Subject.GOSE6monthEndpointDerived, Subject.DerivedImputed180DaysGOSE, fill = n)) +
geom_raster() +
geom_hline(yintercept = c(2, 4, 6) + .5, color = "black") +
geom_vline(xintercept = c(2, 4, 6) + .5, color = "black") +
geom_text(aes(label = sprintf("%i", n)), vjust = 1) +
scale_fill_gradient(low = "white", high = "black") +
coord_fixed(expand = FALSE) +
theme_bw() +
theme(
panel.grid = element_blank()
) +
ggtitle("Confusion matrix (absolute counts)")
ggsave(filename = "confusion_matrix.pdf", width = 7, height = 7)
ggsave(filename = "confusion_matrix.png", width = 7, height = 7)
```
```{r} ```{r}
df_imputations <- read_csv(sprintf("%s/imputation/msm/df_gose_imputed.csv", params$data_dir))
df_gose <- readRDS(sprintf("%s/df_gose.rds", params$data_dir)) df_gose <- readRDS(sprintf("%s/df_gose.rds", params$data_dir))
```
df_per_protocol_gose <- df_gose %>%
filter(
Outcomes.DerivedCompositeGOSEDaysPostInjury >= 5*30,
Outcomes.DerivedCompositeGOSEDaysPostInjury <= 8*30
) %>%
# pick closest to 180
group_by(gupi) %>%
summarize(
closest_per_protocol_GOSE = Outcomes.DerivedCompositeGOSE[
which.min(abs(Outcomes.DerivedCompositeGOSEDaysPostInjury - 180))
]
)
```
Overall, only `r nrow(df_per_protocol_gose)` six-months GOSE are observed,
i.e., `r nrow(df_imputations) - nrow(df_per_protocol_gose)` model-based values
are used.
```{r, fig.height=8}
df_posteriors <- df_imputations %>%
select(-Subject.GOSE6monthEndpointDerived) %>%
gather(GOSE, probability, 3:10) %>%
arrange(gupi, GOSE) %>%
mutate(
GOSE = factor(GOSE) %>% as.numeric,
t = 180
)
idx <- unique(df_gose$gupi)[1:40]
df_gose %>%
filter(gupi %in% idx) %>%
transmute(
gupi,
t = Outcomes.DerivedCompositeGOSEDaysPostInjury,
GOSE = Outcomes.DerivedCompositeGOSE %>% as.numeric
) %>%
ggplot(aes(t, GOSE)) +
geom_rect(
aes(
xmin = t - 14,
xmax = t + 14,
ymin = GOSE -.5,
ymax = GOSE + .5,
fill = probability
),
data = df_posteriors %>% filter(gupi %in% idx)
) +
facet_wrap(~gupi, ncol = 5) +
geom_point(size = .5, color = "red") +
theme_bw() +
theme(
panel.grid = element_blank(),
strip.background = element_blank(),
strip.text.x = element_blank()
) +
scale_fill_gradient2(low = "white", high = "black", limits = c(0, 1)) +
scale_y_continuous(breaks = 1:8, limits = c(.5, 8.5)) +
ggtitle("First 30 GOSE trajectories with 6-months predicted probabilities.")
ggsave(filename = "sample_probs.pdf", width = 7, height = 8)
ggsave(filename = "sample_probs.png", width = 7, height = 8)
```
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment