Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
GOSe-6mo-imputation-paper
Project overview
Project overview
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Kevin Kunzmann
GOSe-6mo-imputation-paper
Commits
aa0d7bad
Commit
aa0d7bad
authored
Feb 03, 2019
by
Kevin
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fixed msm fit, introduced target rules and added propr imputation report
parent
210b1842
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
136 additions
and
6 deletions
+136
-6
Snakefile
Snakefile
+37
-1
models/msm/fit.R
models/msm/fit.R
+1
-1
reports/imputations.Rmd
reports/imputations.Rmd
+98
-4
No files found.
Snakefile
View file @
aa0d7bad
...
...
@@ -39,6 +39,12 @@ rule prepare_data:
mv reports/figures.zip {output.figures}
"""
# define corresponding target rule for ease of use
rule data_report_v1_1:
input:
pdf = "output/v1.1/prepare_data.pdf",
figures = "output/v1.1/prepare_data_figures.zip"
...
...
@@ -75,6 +81,9 @@ rule generate_validation_data:
"""
# adjust threads by model type
def get_rule_threads(wildcards):
if wildcards.model in ("locf", "msm"):
...
...
@@ -123,10 +132,17 @@ rule model_assessment:
mv reports/figures.zip {output.figures}
"""
# define corresponding target rule for ease of use
rule cv_model_comparison_report_v1_1:
input:
pdf = "output/v1.1/model_assessment.pdf",
figures = "output/v1.1/model_assessment_figures.zip"
# rules for imputing on entire dataset
rule generate_imputation_data:
input:
rules.prepare_data.output,
...
...
@@ -145,7 +161,6 @@ rule generate_imputation_data:
# rules for imputing on entire dataset
rule model_impute:
input:
"config.yml",
...
...
@@ -175,3 +190,24 @@ rule post_process_imputations:
mkdir -p output/{wildcards.version}/data/imputation/{wildcards.model}
Rscript scripts/post_process_imputations.R output/{wildcards.version}/data/imputation/{wildcards.model} output/{wildcards.version}/data/df_gose.rds {output}
"""
rule imputation_report:
input:
"config.yml",
rules.post_process_imputations.output,
markdown = "reports/imputations.Rmd"
output:
pdf = "output/{version}/gose_imputations_{model}.pdf",
figures = "output/{version}/gose_imputations_{model}_figures.zip"
shell:
"""
mkdir -p output/{wildcards.version}
Rscript -e "rmarkdown::render(\\"{input.markdown}\\", output_dir = \\"output/{wildcards.version}\\", params = list(data_dir = \\"../output/{wildcards.version}/data\\", imputations = "../output/v1.1/data/imputation/{wildcards.model}/df_gose_imputed.csv"))"
mv reports/figures.zip {output.figures}
"""
# define corresponding target rule for ease of use
rule impute_msm_v1_1:
input:
pdf = "output/v1.1/gose_imputations_msm.pdf",
figures = "output/v1.1/gose_imputations_msm_figures.zip"
models/msm/fit.R
View file @
aa0d7bad
...
...
@@ -93,7 +93,7 @@ df_posteriors <- tibble(
fitted
$
pstate
%>%
{
colnames
(
.
)
<-
c
(
"1"
,
as.character
(
3
:
8
));
.
}
)
%>%
mutate
(
"2"
=
0
)
%>%
filter
(
t
==
config
$
t_out_msm
)
%>%
filter
(
t
==
config
$
t_out_msm
+
.5
)
%>%
gather
(
GOSE
,
probability
,
as.character
(
1
:
8
))
%>%
arrange
(
gupi
,
t
,
GOSE
)
...
...
reports/imputations.Rmd
View file @
aa0d7bad
---
title: "Imputing GOSE scores in CENTER-TBI"
subtitle: "assessing final imputations"
date: "`r Sys.time()`"
statistician: "Kevin Kunzmann (kevin.kunzmann@mrc-bsu.cam.ac.uk)"
...
...
@@ -15,6 +17,7 @@ git-wd-clean: "`r ifelse(system('git diff-index --quiet HEAD') == 0, 'clean', 'f
params:
data_dir: "../output/v1.1/data"
imputations: "../output/v1.1/data/imputation/msm/df_gose_imputed.csv"
config_file: "../config.yml"
---
...
...
@@ -25,19 +28,110 @@ require(tidyverse, quietly = TRUE)
config <- yaml::read_yaml(params$config_file)
set.seed(config$seed)
df_imputations <- read_csv(params$imputations)
```
# Model descriptions
Since we do not use the raw imputed values but give preference to the per
protocol values (when a derived composite GOSE is availabel within 5-8 months
after injury), we start by comparing the final combined version with the raw
imputations.
```{r, fig.height=7, fig.width=7}
caret::confusionMatrix(
df_imputations$Subject.DerivedImputed180DaysGOSE %>% factor,
df_imputations$Subject.GOSE6monthEndpointDerived %>% factor
) %>%
as.matrix %>% as_tibble %>%
mutate(
Subject.DerivedImputed180DaysGOSE = row_number() %>% as.character
) %>%
gather(Subject.GOSE6monthEndpointDerived, n, `1`:`8`) %>%
ggplot(aes(Subject.GOSE6monthEndpointDerived, Subject.DerivedImputed180DaysGOSE, fill = n)) +
geom_raster() +
geom_hline(yintercept = c(2, 4, 6) + .5, color = "black") +
geom_vline(xintercept = c(2, 4, 6) + .5, color = "black") +
geom_text(aes(label = sprintf("%i", n)), vjust = 1) +
scale_fill_gradient(low = "white", high = "black") +
coord_fixed(expand = FALSE) +
theme_bw() +
theme(
panel.grid = element_blank()
) +
ggtitle("Confusion matrix (absolute counts)")
ggsave(filename = "confusion_matrix.pdf", width = 7, height = 7)
ggsave(filename = "confusion_matrix.png", width = 7, height = 7)
```
```{r}
df_imputations <- read_csv(sprintf("%s/imputation/msm/df_gose_imputed.csv", params$data_dir))
df_gose <- readRDS(sprintf("%s/df_gose.rds", params$data_dir))
```
df_per_protocol_gose <- df_gose %>%
filter(
Outcomes.DerivedCompositeGOSEDaysPostInjury >= 5*30,
Outcomes.DerivedCompositeGOSEDaysPostInjury <= 8*30
) %>%
# pick closest to 180
group_by(gupi) %>%
summarize(
closest_per_protocol_GOSE = Outcomes.DerivedCompositeGOSE[
which.min(abs(Outcomes.DerivedCompositeGOSEDaysPostInjury - 180))
]
)
```
Overall, only `r nrow(df_per_protocol_gose)` six-months GOSE are observed,
i.e., `r nrow(df_imputations) - nrow(df_per_protocol_gose)` model-based values
are used.
```{r, fig.height=8}
df_posteriors <- df_imputations %>%
select(-Subject.GOSE6monthEndpointDerived) %>%
gather(GOSE, probability, 3:10) %>%
arrange(gupi, GOSE) %>%
mutate(
GOSE = factor(GOSE) %>% as.numeric,
t = 180
)
idx <- unique(df_gose$gupi)[1:40]
df_gose %>%
filter(gupi %in% idx) %>%
transmute(
gupi,
t = Outcomes.DerivedCompositeGOSEDaysPostInjury,
GOSE = Outcomes.DerivedCompositeGOSE %>% as.numeric
) %>%
ggplot(aes(t, GOSE)) +
geom_rect(
aes(
xmin = t - 14,
xmax = t + 14,
ymin = GOSE -.5,
ymax = GOSE + .5,
fill = probability
),
data = df_posteriors %>% filter(gupi %in% idx)
) +
facet_wrap(~gupi, ncol = 5) +
geom_point(size = .5, color = "red") +
theme_bw() +
theme(
panel.grid = element_blank(),
strip.background = element_blank(),
strip.text.x = element_blank()
) +
scale_fill_gradient2(low = "white", high = "black", limits = c(0, 1)) +
scale_y_continuous(breaks = 1:8, limits = c(.5, 8.5)) +
ggtitle("First 30 GOSE trajectories with 6-months predicted probabilities.")
ggsave(filename = "sample_probs.pdf", width = 7, height = 8)
ggsave(filename = "sample_probs.png", width = 7, height = 8)
```
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment