Commit 6b448f2e authored by Kevin Kunzmann's avatar Kevin Kunzmann

revision of manuscript

parent 0badf884
...@@ -553,7 +553,7 @@ alternative measure of bias which does not require this tacit assumption. ...@@ -553,7 +553,7 @@ alternative measure of bias which does not require this tacit assumption.
Note that the scale is not directlz comparable to the one of the Note that the scale is not directlz comparable to the one of the
other three quantities! other three quantities!
All measures are considered both conditional on the ground-truth All measures are considered both conditional on the ground-truth
(unobserved true GOSe) as well as averaged over the entire test set. (unobserved observed GOSe) as well as averaged over the entire test set.
LOCF, by design, cannot provide imputed values when there are no LOCF, by design, cannot provide imputed values when there are no
...@@ -671,7 +671,7 @@ baseline covariates. ...@@ -671,7 +671,7 @@ baseline covariates.
We first consider results for the set of test cases which allow LOCF imputation We first consider results for the set of test cases which allow LOCF imputation
(n = `r df_predictions %>% filter(model == "LOCF") %>% nrow - length(idx)`). (n = `r df_predictions %>% filter(model == "LOCF") %>% nrow - length(idx)`).
Both the raw count as well as the relative (by left-out true GOSe) confusion matrices Both the raw count as well as the relative (by left-out observed GOSe) confusion matrices
are presented in Figure ???. are presented in Figure ???.
```{r confusion-matrix-locf, warning=FALSE, message=FALSE, echo=FALSE, fig.cap="Confusion matrices on LOCF subset."} ```{r confusion-matrix-locf, warning=FALSE, message=FALSE, echo=FALSE, fig.cap="Confusion matrices on LOCF subset."}
...@@ -687,16 +687,16 @@ plot_confusion_matrices <- function(df_predictions, models) { ...@@ -687,16 +687,16 @@ plot_confusion_matrices <- function(df_predictions, models) {
) %>% ) %>%
as.matrix %>% as_tibble %>% as.matrix %>% as_tibble %>%
mutate(`Predicted GOSE` = row_number() %>% as.character) %>% mutate(`Predicted GOSE` = row_number() %>% as.character) %>%
gather(`True GOSE`, n, 1:8) gather(`Observed GOSE`, n, 1:8)
) %>% ) %>%
unnest %>% unnest %>%
group_by(model, `Predicted GOSE`, `True GOSE`) %>% group_by(model, `Predicted GOSE`, `Observed GOSE`) %>%
summarize(n = mean(n)) %>% summarize(n = mean(n)) %>%
ungroup %>% ungroup %>%
mutate(model = factor(model, models)) mutate(model = factor(model, models))
p_cnf_mtrx_raw <- df_average_confusion_matrices %>% p_cnf_mtrx_raw <- df_average_confusion_matrices %>%
ggplot(aes(`True GOSE`, `Predicted GOSE`, fill = n)) + ggplot(aes(`Observed GOSE`, `Predicted GOSE`, fill = n)) +
geom_raster() + geom_raster() +
geom_text(aes( geom_text(aes(
label = sprintf("%.1f", n) %>% label = sprintf("%.1f", n) %>%
...@@ -708,7 +708,7 @@ plot_confusion_matrices <- function(df_predictions, models) { ...@@ -708,7 +708,7 @@ plot_confusion_matrices <- function(df_predictions, models) {
geom_vline(xintercept = c(2, 4, 6) + .5, color = "black") + geom_vline(xintercept = c(2, 4, 6) + .5, color = "black") +
scale_fill_gradient(low = "white", high = "#555555") + scale_fill_gradient(low = "white", high = "#555555") +
coord_fixed(expand = FALSE) + coord_fixed(expand = FALSE) +
labs(x = "true GOSe", y = "imputed GOSe", fill = "") + labs(x = "observed GOSe", y = "imputed GOSe", fill = "") +
theme_bw() + theme_bw() +
theme( theme(
panel.grid = element_blank() panel.grid = element_blank()
...@@ -717,18 +717,18 @@ plot_confusion_matrices <- function(df_predictions, models) { ...@@ -717,18 +717,18 @@ plot_confusion_matrices <- function(df_predictions, models) {
ggtitle("Average confusion matrix accross folds (absolute counts)") ggtitle("Average confusion matrix accross folds (absolute counts)")
p_cnf_mtrx_colnrm <- df_average_confusion_matrices %>% p_cnf_mtrx_colnrm <- df_average_confusion_matrices %>%
group_by(model, `True GOSE`) %>% group_by(model, `Observed GOSE`) %>%
mutate( mutate(
`fraction (column)` = n / sum(n), `fraction (column)` = n / sum(n),
`fraction (column)` = ifelse(is.nan(`fraction (column)`), 0, `fraction (column)`) `fraction (column)` = ifelse(is.nan(`fraction (column)`), 0, `fraction (column)`)
) %>% ) %>%
ggplot(aes(`True GOSE`, `Predicted GOSE`, fill = `fraction (column)`)) + ggplot(aes(`Observed GOSE`, `Predicted GOSE`, fill = `fraction (column)`)) +
geom_raster() + geom_raster() +
geom_hline(yintercept = c(2, 4, 6) + .5, color = "black") + geom_hline(yintercept = c(2, 4, 6) + .5, color = "black") +
geom_vline(xintercept = c(2, 4, 6) + .5, color = "black") + geom_vline(xintercept = c(2, 4, 6) + .5, color = "black") +
scale_fill_gradient("", low = "white", high = "black", limits = c(0, 1)) + scale_fill_gradient("", low = "white", high = "black", limits = c(0, 1)) +
coord_fixed(expand = FALSE) + coord_fixed(expand = FALSE) +
labs(x = "true GOSe", y = "imputed GOSe", fill = "") + labs(x = "observed GOSe", y = "imputed GOSe", fill = "") +
theme_bw() + theme_bw() +
theme( theme(
panel.grid = element_blank() panel.grid = element_blank()
...@@ -786,10 +786,10 @@ df_average_confusion_matrices <- df_predictions %>% ...@@ -786,10 +786,10 @@ df_average_confusion_matrices <- df_predictions %>%
) %>% ) %>%
as.matrix %>% as_tibble %>% as.matrix %>% as_tibble %>%
mutate(`Predicted GOSE` = row_number() %>% as.character) %>% mutate(`Predicted GOSE` = row_number() %>% as.character) %>%
gather(`True GOSE`, n, 1:8) gather(`Observed GOSE`, n, 1:8)
) %>% ) %>%
unnest %>% unnest %>%
group_by(model, `Predicted GOSE`, `True GOSE`) %>% group_by(model, `Predicted GOSE`, `Observed GOSE`) %>%
summarize(n = mean(n)) %>% summarize(n = mean(n)) %>%
ungroup %>% ungroup %>%
mutate(model = factor(model, models)) mutate(model = factor(model, models))
...@@ -797,7 +797,7 @@ rbind( ...@@ -797,7 +797,7 @@ rbind(
df_average_confusion_matrices %>% df_average_confusion_matrices %>%
filter(model %in% c("LOCF", "MM", "GP + cov", "MSM")) %>% filter(model %in% c("LOCF", "MM", "GP + cov", "MSM")) %>%
group_by(model) %>% group_by(model) %>%
filter(`True GOSE` <= 3) %>% filter(`Observed GOSE` <= 3) %>%
mutate(n_total = sum(n)) %>% mutate(n_total = sum(n)) %>%
filter(`Predicted GOSE` > 3) %>% filter(`Predicted GOSE` > 3) %>%
summarize(fraction = sum(n / n_total)) %>% summarize(fraction = sum(n / n_total)) %>%
...@@ -806,7 +806,7 @@ df_average_confusion_matrices %>% ...@@ -806,7 +806,7 @@ df_average_confusion_matrices %>%
df_average_confusion_matrices %>% df_average_confusion_matrices %>%
filter(model %in% c("LOCF", "MM", "GP + cov", "MSM")) %>% filter(model %in% c("LOCF", "MM", "GP + cov", "MSM")) %>%
group_by(model) %>% group_by(model) %>%
filter(`True GOSE` == 4) %>% filter(`Observed GOSE` == 4) %>%
mutate(n_total = sum(n)) %>% mutate(n_total = sum(n)) %>%
filter(`Predicted GOSE` > 4) %>% filter(`Predicted GOSE` > 4) %>%
summarize(fraction = sum(n / n_total)) %>% summarize(fraction = sum(n / n_total)) %>%
...@@ -815,7 +815,7 @@ df_average_confusion_matrices %>% ...@@ -815,7 +815,7 @@ df_average_confusion_matrices %>%
df_average_confusion_matrices %>% df_average_confusion_matrices %>%
filter(model %in% c("LOCF", "MM", "GP + cov", "MSM")) %>% filter(model %in% c("LOCF", "MM", "GP + cov", "MSM")) %>%
group_by(model) %>% group_by(model) %>%
filter(`True GOSE` < 8) %>% filter(`Observed GOSE` < 8) %>%
mutate(n_total = sum(n)) %>% mutate(n_total = sum(n)) %>%
filter(`Predicted GOSE` == 8) %>% filter(`Predicted GOSE` == 8) %>%
summarize(fraction = sum(n / n_total)) %>% summarize(fraction = sum(n / n_total)) %>%
...@@ -895,13 +895,17 @@ ggsave(filename = "errors_stratified_locf.png", width = 9, height = 3) ...@@ -895,13 +895,17 @@ ggsave(filename = "errors_stratified_locf.png", width = 9, height = 3)
Just as with the overall performance, differences are most pronounced in terms Just as with the overall performance, differences are most pronounced in terms
of bias. of bias.
Interestingly, the conditional perspective reveals differences between bias
as difference between mean imputed and mean observed values (tacitly assuming
an at least interval scale) and the difference in the probability to
over- or undershoot the observed value.
Again, the category imbalance in the GOSe distribution explains the fact that Again, the category imbalance in the GOSe distribution explains the fact that
all model-based approaches tend to perform better for the most frequent all model-based approaches tend to perform better for the most frequent
categories 6, 7, and 8 while sacrificing performance for the less frequent categories 6, 7, and 8 while sacrificing performance for the less frequent
categories 4 and 5 as compared to LOCF. categories 4 and 5 as compared to LOCF.
Bias-wise all methods exhibit a certain regression to the mean effect since low Bias-wise all methods exhibit a certain regression to the mean effect since low
categories tend to be confused with better (higher) GOSe on average while categories tend to be confused with better (higher) GOSe on average while
high true GOSe values are dampened (negative bias at 7, 8). high observed GOSe values are dampened (negative bias at 7, 8).
Since LOCF does not take the category imbalance into account and since it exhibits Since LOCF does not take the category imbalance into account and since it exhibits
a relatively large negative bias at the most frequent GOSe values, it is a relatively large negative bias at the most frequent GOSe values, it is
overall negatively biased. overall negatively biased.
...@@ -909,14 +913,14 @@ Interestingly, the conditional assessment of the GP regressions bias profile ...@@ -909,14 +913,14 @@ Interestingly, the conditional assessment of the GP regressions bias profile
reveals that the overall unbiasedness can be explained by the relatively high reveals that the overall unbiasedness can be explained by the relatively high
positive and negative biases conditional on low/high GOSe values canceling out positive and negative biases conditional on low/high GOSe values canceling out
in the overall population. in the overall population.
Since the accuracy results mirror this effect, the GP regression model seems The MSM and MM models are fairly similar with respect to accuracy but MSM
to suffer from an overly string regression to the mean effect. clearly dominates with respect to bias.
Note that irrespective of the exact definition of bias used, MSM ominates the other
The MSM and MM models are fairly similar with respect to accuracy with a slight model-based approaches.
advantage for MSM. Comparing LOCF and MSM, there is a slight advantage of MSM in terms of accuracy for
Interestingly, though, the MSM approach is consistently less positively biased the majority classes 3, 7, 8 which explain the overall difference shwon in Figure ???.
across the rarer low GOSe categories while being only insignificantly more With respect to bias, MSM also performs better than LOCF for the most frequently
negatively biased for categories 7 and 8. observed categories, but the extent of this improvement depend on the performance measure.
...@@ -956,17 +960,17 @@ df_average_confusion_matrices <- df_predictions %>% ...@@ -956,17 +960,17 @@ df_average_confusion_matrices <- df_predictions %>%
) %>% ) %>%
as.matrix %>% as_tibble %>% as.matrix %>% as_tibble %>%
mutate(`Predicted GOSE` = row_number() %>% as.character) %>% mutate(`Predicted GOSE` = row_number() %>% as.character) %>%
gather(`True GOSE`, n, 1:8) gather(`Observed GOSE`, n, 1:8)
) %>% ) %>%
unnest %>% unnest %>%
group_by(model, `Predicted GOSE`, `True GOSE`) %>% group_by(model, `Predicted GOSE`, `Observed GOSE`) %>%
summarize(n = mean(n)) %>% summarize(n = mean(n)) %>%
ungroup %>% ungroup %>%
mutate(model = factor(model, models)) mutate(model = factor(model, models))
rbind( rbind(
df_average_confusion_matrices %>% df_average_confusion_matrices %>%
group_by(model) %>% group_by(model) %>%
filter(`True GOSE` <= 3) %>% filter(`Observed GOSE` <= 3) %>%
mutate(n_total = sum(n)) %>% mutate(n_total = sum(n)) %>%
filter(`Predicted GOSE` > 3) %>% filter(`Predicted GOSE` > 3) %>%
summarize(fraction = sum(n / n_total)) %>% summarize(fraction = sum(n / n_total)) %>%
...@@ -974,7 +978,7 @@ df_average_confusion_matrices %>% ...@@ -974,7 +978,7 @@ df_average_confusion_matrices %>%
df_average_confusion_matrices %>% df_average_confusion_matrices %>%
group_by(model) %>% group_by(model) %>%
filter(`True GOSE` == 4) %>% filter(`Observed GOSE` == 4) %>%
mutate(n_total = sum(n)) %>% mutate(n_total = sum(n)) %>%
filter(`Predicted GOSE` > 4) %>% filter(`Predicted GOSE` > 4) %>%
summarize(fraction = sum(n / n_total)) %>% summarize(fraction = sum(n / n_total)) %>%
...@@ -982,7 +986,7 @@ df_average_confusion_matrices %>% ...@@ -982,7 +986,7 @@ df_average_confusion_matrices %>%
df_average_confusion_matrices %>% df_average_confusion_matrices %>%
group_by(model) %>% group_by(model) %>%
filter(`True GOSE` < 8) %>% filter(`Observed GOSE` < 8) %>%
mutate(n_total = sum(n)) %>% mutate(n_total = sum(n)) %>%
filter(`Predicted GOSE` == 8) %>% filter(`Predicted GOSE` == 8) %>%
summarize(fraction = sum(n / n_total)) %>% summarize(fraction = sum(n / n_total)) %>%
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment