Commit 943d2c27 authored by Kevin Kunzmann's avatar Kevin Kunzmann

updated figures

parent 476dbbdd
......@@ -548,9 +548,9 @@ MAE and RMSE are both a measures of average precision where
RMSE puts more weight on large deviations as compared to RMSE.
Comparisons in terms of bias, MAE, and RMSE tacitly assume that
GOSe values can be sensibly interpreted on an interval scale.
We therefore also consider $Pr[est > true] - Pr[est < true]$ as an
We therefore also consider $Pr[imp. > true] - Pr[imp. < true]$ as an
alternative measure of bias which does not require this tacit assumption.
Note that the scale is not directlz comparable to the one of the
Note that the scale is not directly comparable to the one of the
other three quantities!
All measures are considered both conditional on the ground-truth
(unobserved observed GOSe) as well as averaged over the entire test set.
......@@ -581,33 +581,41 @@ The overall performance of all fitted models in terms of bias, MAE, and RMSE
is depicted in Figure ??? both conditional on LOCF being applicable and,
excluding LOCF, on the entire test set.
```{r overall-comparison-all-methods, echo=FALSE, fig.height=7}
```{r overall-comparison-all-methods, echo=FALSE, fig.height=9, fig.width=6}
plot_summary_measures <- function(df, label) {
df_predictions %>%
filter(!(gupi %in% idx)) %>%
df %>%
group_by(model, fold) %>%
summarize(
RMSE = mean((GOSE - prediction)^2, na.rm = TRUE) %>% sqrt,
MAE = mean(abs(GOSE - prediction), na.rm = TRUE),
Bias = mean(prediction, na.rm = TRUE) - mean(GOSE, na.rm = TRUE),
`Pr[est. > true] - Pr[est. < true]` = mean(prediction > GOSE, na.rm = TRUE) - mean(prediction < GOSE, na.rm = TRUE)
`Pr[imp. > true] - Pr[imp. < true]` = mean(prediction > GOSE, na.rm = TRUE) - mean(prediction < GOSE, na.rm = TRUE)
) %>%
ungroup %>%
gather(error, value, -model, -fold) %>%
mutate(
error = factor(error, c(
"Bias",
"Pr[est. > true] - Pr[est. < true]",
"Pr[imp. > true] - Pr[imp. < true]",
"MAE",
"RMSE"
))
) %>%
ggplot(aes(model, value)) +
group_by(model, error) %>%
summarize(
mean_error = mean(value),
se_error = sd(value) / sqrt(n())
) %>%
ggplot(aes(x = model, y = mean_error)) +
geom_hline(yintercept = 0, color = "black") +
geom_boxplot() +
facet_wrap(~error, nrow = 1) +
scale_y_continuous(name = "", breaks = seq(-2, 8, .25), limits = c(-.5, 1.5)) +
geom_point(size = .8) +
geom_errorbar(
aes(ymin = mean_error - 1.96*se_error, ymax = mean_error + 1.96*se_error),
width = .25
) +
facet_wrap(~error, nrow = 2) +
scale_y_continuous(name = "", breaks = seq(-2, 8, .25), limits = c(-.5, 1.25)) +
scale_x_discrete("") +
theme_bw() +
theme(
......@@ -621,7 +629,7 @@ plot_summary_measures <- function(df, label) {
cowplot::plot_grid(
plot_summary_measures(
df_predictions %>% filter(gupi %in% idx),
df_predictions %>% filter(!(gupi %in% idx)),
"Summary measures, LOCF subset"
),
plot_summary_measures(
......@@ -674,7 +682,7 @@ We first consider results for the set of test cases which allow LOCF imputation
Both the raw count as well as the relative (by left-out observed GOSe) confusion matrices
are presented in Figure ???.
```{r confusion-matrix-locf, warning=FALSE, message=FALSE, echo=FALSE, fig.cap="Confusion matrices on LOCF subset."}
```{r confusion-matrix-locf, warning=FALSE, message=FALSE, echo=FALSE, fig.cap="Confusion matrices on LOCF subset.", fig.height=9, fig.width=6}
plot_confusion_matrices <- function(df_predictions, models) {
df_average_confusion_matrices <- df_predictions %>%
......@@ -702,7 +710,7 @@ plot_confusion_matrices <- function(df_predictions, models) {
label = sprintf("%.1f", n) %>%
ifelse(. == "0.0", "", .)
),
size = 1.5
size = 2
) +
geom_hline(yintercept = c(2, 4, 6) + .5, color = "black") +
geom_vline(xintercept = c(2, 4, 6) + .5, color = "black") +
......@@ -713,7 +721,7 @@ plot_confusion_matrices <- function(df_predictions, models) {
theme(
panel.grid = element_blank()
) +
facet_wrap(~model, nrow = 1) +
facet_wrap(~model, nrow = 2) +
ggtitle("Average confusion matrix accross folds (absolute counts)")
p_cnf_mtrx_colnrm <- df_average_confusion_matrices %>%
......@@ -733,10 +741,10 @@ plot_confusion_matrices <- function(df_predictions, models) {
theme(
panel.grid = element_blank()
) +
facet_wrap(~model, nrow = 1) +
facet_wrap(~model, nrow = 2) +
ggtitle("Average confusion matrix accross folds (column fraction)")
cowplot::plot_grid(p_cnf_mtrx_raw, p_cnf_mtrx_colnrm, ncol = 1, align = "v")
cowplot::plot_grid(p_cnf_mtrx_raw, p_cnf_mtrx_colnrm, ncol = 1, align = "v")
}
......@@ -745,8 +753,8 @@ plot_confusion_matrices(
c("MSM", "GP + cov", "MM", "LOCF")
)
ggsave(filename = "confusion_matrices_locf.pdf", width = 7, height = 6)
ggsave(filename = "confusion_matrices_locf.png", width = 7, height = 6)
ggsave(filename = "confusion_matrices_locf.pdf", width = 6, height = 9)
ggsave(filename = "confusion_matrices_locf.png", width = 6, height = 9)
```
The absolute-count confusion matrices show that most imputed values are
......@@ -831,7 +839,7 @@ we also consider the performance conditional on the respective ground-truth
(i.e. the observed GOSe values in the test sets).
The results are shown in Figure ??? (vertical bars are =/- one standard error of the mean).
```{r error-scores-locf, echo=FALSE, fig.height=3, fig.width=9}
```{r error-scores-locf, echo=FALSE, fig.height=5, fig.width=9}
plot_summary_measures_cond <- function(df_predictions, models, label) {
df_predictions %>%
......@@ -841,7 +849,7 @@ plot_summary_measures_cond <- function(df_predictions, models, label) {
RMSE = mean((GOSE - prediction)^2, na.rm = TRUE) %>% sqrt,
MAE = mean(abs(GOSE - prediction), na.rm = TRUE),
Bias = mean(prediction, na.rm = TRUE) - mean(GOSE, na.rm = TRUE),
`Pr[est. > true] - Pr[est. < true]` = mean(prediction > GOSE, na.rm = TRUE) - mean(prediction < GOSE, na.rm = TRUE)
`Pr[imp. > true] - Pr[imp. < true]` = mean(prediction > GOSE, na.rm = TRUE) - mean(prediction < GOSE, na.rm = TRUE)
) %>%
gather(error, value, -model, -GOSE, -fold) %>%
group_by(GOSE, model, error, fold) %>%
......@@ -858,7 +866,7 @@ plot_summary_measures_cond <- function(df_predictions, models, label) {
model = factor(model, models),
error = factor(error, c(
"Bias",
"Pr[est. > true] - Pr[est. < true]",
"Pr[imp. > true] - Pr[imp. < true]",
"MAE",
"RMSE"
))
......@@ -869,7 +877,7 @@ plot_summary_measures_cond <- function(df_predictions, models, label) {
width = .2,
position = position_dodge(.33)
) +
geom_line(aes(y = mean), alpha = .5) +
geom_line(aes(y = mean), alpha = .66) +
xlab("GOSe") +
facet_wrap(~error, nrow = 1) +
scale_y_continuous(name = "", breaks = seq(-2, 8, .25)) +
......@@ -889,8 +897,8 @@ plot_summary_measures_cond(
"Summary measures by observed GOSe, LOCF subset"
)
ggsave(filename = "errors_stratified_locf.pdf", width = 9, height = 3)
ggsave(filename = "errors_stratified_locf.png", width = 9, height = 3)
ggsave(filename = "errors_stratified_locf.pdf", width = 9, height = 5)
ggsave(filename = "errors_stratified_locf.png", width = 9, height = 5)
```
Just as with the overall performance, differences are most pronounced in terms
......@@ -938,14 +946,14 @@ to the LOCF subset.
* decide whether figures go in appendix - David and I agree on them being actually the
primary analysis. we just needto convince people of the fact that LOCF should be dropped *first*. As always, I am open to debate this but we should just make a decision, figurexit or figuremain?
```{r confusion-matrix, warning=FALSE, message=FALSE, echo=FALSE, fig.cap="Confusion matrices, full training set without LOCF."}
```{r confusion-matrix, warning=FALSE, message=FALSE, echo=FALSE, fig.cap="Confusion matrices, full training set without LOCF.", fig.height=9, fig.width=6}
plot_confusion_matrices(
df_predictions,
c("MSM", "GP + cov", "MM")
)
ggsave(filename = "confusion_matrices_all.pdf", width = 7, height = 6)
ggsave(filename = "confusion_matrices_all.png", width = 7, height = 6)
ggsave(filename = "confusion_matrices_all.pdf", width = 6, height = 9)
ggsave(filename = "confusion_matrices_all.png", width = 6, height = 9)
```
```{r crossing-table-full, echo=FALSE, warning=FALSE, results='asis'}
......@@ -997,15 +1005,15 @@ df_average_confusion_matrices %>%
pander::pandoc.table("Some specific confusion percentages, full data set.", digits = 3)
```
```{r error-scores-all, echo=FALSE, fig.height=3, fig.width=9}
```{r error-scores-all, echo=FALSE, fig.height=5, fig.width=99}
plot_summary_measures_cond(
df_predictions %>% filter(!(gupi %in% idx)),
c("MSM", "GP + cov", "MM"),
"Summary measures by observed GOSe, full test set"
)
ggsave(filename = "imputation_error.pdf", width = 9, height = 3)
ggsave(filename = "imputation_error.png", width = 9, height = 3)
ggsave(filename = "imputation_error.pdf", width = 9, height = 5)
ggsave(filename = "imputation_error.png", width = 9, height = 5)
```
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment