updated figures

943d2c27 · Kevin Kunzmann · 476dbbdd · 943d2c27
Commit 943d2c27 authored Apr 01, 2019 by Kevin Kunzmann
Hide whitespace changes
Inline Side-by-side

Showing with 39 additions and 31 deletions

manuscript/manuscript.Rmd manuscript/manuscript.Rmd +39 -31

No files found.
--- a/manuscript/manuscript.Rmd
+++ b/manuscript/manuscript.Rmd
@@ -548,9 +548,9 @@ MAE and RMSE are both a measures of average precision where
 RMSE puts more weight on large deviations as compared to RMSE.
 Comparisons in terms of bias, MAE, and RMSE tacitly assume that 
 GOSe values can be sensibly interpreted on an interval scale.
-We therefore also consider $Pr[est > true] - Pr[est < true]$ as an
+We therefore also consider $Pr[imp. > true] - Pr[imp. < true]$ as an
 alternative measure of bias which does not require this tacit assumption.
-Note that the scale is not directlz comparable to the one of the
+Note that the scale is not directly comparable to the one of the
 other three quantities!
 All measures are considered both conditional on the ground-truth
 (unobserved observed GOSe) as well as averaged over the entire test set.
@@ -581,33 +581,41 @@ The overall performance of all fitted models in terms of bias, MAE, and RMSE
 is depicted in Figure ??? both conditional on LOCF being applicable and,
 excluding LOCF, on the entire test set.

-```{r overall-comparison-all-methods, echo=FALSE, fig.height=7}
+```{r overall-comparison-all-methods, echo=FALSE, fig.height=9, fig.width=6}
 plot_summary_measures <- function(df, label) {

-  df_predictions %>%
-    filter(!(gupi %in% idx)) %>% 
+  df %>%
    group_by(model, fold) %>% 
    summarize(
      RMSE = mean((GOSE - prediction)^2, na.rm = TRUE) %>% sqrt,
      MAE  = mean(abs(GOSE - prediction), na.rm = TRUE),
      Bias = mean(prediction, na.rm = TRUE) - mean(GOSE, na.rm = TRUE),
-      `Pr[est. > true] - Pr[est. < true]` = mean(prediction > GOSE, na.rm = TRUE) - mean(prediction < GOSE, na.rm = TRUE)
+      `Pr[imp. > true] - Pr[imp. < true]` = mean(prediction > GOSE, na.rm = TRUE) - mean(prediction < GOSE, na.rm = TRUE)
    ) %>% 
    ungroup %>% 
    gather(error, value, -model, -fold) %>% 
    mutate(
      error = factor(error, c(
        "Bias",
-        "Pr[est. > true] - Pr[est. < true]",
+        "Pr[imp. > true] - Pr[imp. < true]",
        "MAE",
        "RMSE"
      ))
    ) %>% 
-    ggplot(aes(model, value)) + 
+    group_by(model, error) %>%
+    summarize(
+      mean_error = mean(value),
+      se_error   = sd(value) / sqrt(n())
+    ) %>% 
+    ggplot(aes(x = model, y = mean_error)) + 
      geom_hline(yintercept = 0, color = "black") +
-      geom_boxplot() + 
-      facet_wrap(~error, nrow = 1) +
-      scale_y_continuous(name = "", breaks = seq(-2, 8, .25), limits = c(-.5, 1.5)) +
+      geom_point(size = .8) +
+      geom_errorbar(
+        aes(ymin = mean_error - 1.96*se_error, ymax = mean_error + 1.96*se_error),
+        width = .25
+      ) + 
+      facet_wrap(~error, nrow = 2) +
+      scale_y_continuous(name = "", breaks = seq(-2, 8, .25), limits = c(-.5, 1.25)) +
      scale_x_discrete("") + 
      theme_bw() +
      theme(
@@ -621,7 +629,7 @@ plot_summary_measures <- function(df, label) {

 cowplot::plot_grid(
    plot_summary_measures(
-      df_predictions %>% filter(gupi %in% idx), 
+      df_predictions %>% filter(!(gupi %in% idx)), 
      "Summary measures, LOCF subset"
    ), 
    plot_summary_measures(
@@ -674,7 +682,7 @@ We first consider results for the set of test cases which allow LOCF imputation
 Both the raw count as well as the relative (by left-out observed GOSe) confusion matrices
 are presented in Figure ???.

-```{r confusion-matrix-locf, warning=FALSE, message=FALSE, echo=FALSE, fig.cap="Confusion matrices on LOCF subset."}
+```{r confusion-matrix-locf, warning=FALSE, message=FALSE, echo=FALSE, fig.cap="Confusion matrices on LOCF subset.", fig.height=9, fig.width=6}
 plot_confusion_matrices <- function(df_predictions, models) {
  
  df_average_confusion_matrices <- df_predictions %>% 
@@ -702,7 +710,7 @@ plot_confusion_matrices <- function(df_predictions, models) {
          label = sprintf("%.1f", n) %>% 
            ifelse(. == "0.0", "", .)
        ), 
-        size = 1.5
+        size = 2
      ) +
      geom_hline(yintercept = c(2, 4, 6) + .5, color = "black") +
      geom_vline(xintercept = c(2, 4, 6) + .5, color = "black") +
@@ -713,7 +721,7 @@ plot_confusion_matrices <- function(df_predictions, models) {
      theme(
        panel.grid = element_blank()
      ) + 
-      facet_wrap(~model, nrow = 1) +
+      facet_wrap(~model, nrow = 2) +
      ggtitle("Average confusion matrix accross folds (absolute counts)")
  
  p_cnf_mtrx_colnrm <- df_average_confusion_matrices %>%
@@ -733,10 +741,10 @@ plot_confusion_matrices <- function(df_predictions, models) {
      theme(
        panel.grid = element_blank()
      ) + 
-      facet_wrap(~model, nrow = 1) +
+      facet_wrap(~model, nrow = 2) +
      ggtitle("Average confusion matrix accross folds (column fraction)")
  
-  cowplot::plot_grid(p_cnf_mtrx_raw, p_cnf_mtrx_colnrm, ncol = 1, align = "v")        
+  cowplot::plot_grid(p_cnf_mtrx_raw, p_cnf_mtrx_colnrm, ncol = 1, align = "v")      
  
 }

@@ -745,8 +753,8 @@ plot_confusion_matrices(
  c("MSM", "GP + cov", "MM", "LOCF")
 )

-ggsave(filename = "confusion_matrices_locf.pdf", width = 7, height = 6)
-ggsave(filename = "confusion_matrices_locf.png", width = 7, height = 6)
+ggsave(filename = "confusion_matrices_locf.pdf", width = 6, height = 9)
+ggsave(filename = "confusion_matrices_locf.png", width = 6, height = 9)
 ```

 The absolute-count confusion matrices show that most imputed values are 
@@ -831,7 +839,7 @@ we also consider the performance conditional on the respective ground-truth
 (i.e. the observed GOSe values in the test sets).
 The results are shown in Figure ??? (vertical bars are =/- one standard error of the mean).

-```{r error-scores-locf, echo=FALSE, fig.height=3, fig.width=9}
+```{r error-scores-locf, echo=FALSE, fig.height=5, fig.width=9}
 plot_summary_measures_cond <- function(df_predictions, models, label) {
  
  df_predictions %>%
@@ -841,7 +849,7 @@ plot_summary_measures_cond <- function(df_predictions, models, label) {
      RMSE = mean((GOSE - prediction)^2, na.rm = TRUE) %>% sqrt,
      MAE  = mean(abs(GOSE - prediction), na.rm = TRUE),
      Bias = mean(prediction, na.rm = TRUE) - mean(GOSE, na.rm = TRUE),
-      `Pr[est. > true] - Pr[est. < true]` = mean(prediction > GOSE, na.rm = TRUE) - mean(prediction < GOSE, na.rm = TRUE)
+      `Pr[imp. > true] - Pr[imp. < true]` = mean(prediction > GOSE, na.rm = TRUE) - mean(prediction < GOSE, na.rm = TRUE)
    ) %>% 
    gather(error, value, -model, -GOSE, -fold) %>% 
    group_by(GOSE, model, error, fold) %>% 
@@ -858,7 +866,7 @@ plot_summary_measures_cond <- function(df_predictions, models, label) {
      model = factor(model, models),
      error = factor(error, c(
        "Bias",
-        "Pr[est. > true] - Pr[est. < true]",
+        "Pr[imp. > true] - Pr[imp. < true]",
        "MAE",
        "RMSE"
      ))   
@@ -869,7 +877,7 @@ plot_summary_measures_cond <- function(df_predictions, models, label) {
        width = .2,
        position = position_dodge(.33)
      ) +
-      geom_line(aes(y = mean), alpha = .5) + 
+      geom_line(aes(y = mean), alpha = .66) + 
      xlab("GOSe") + 
      facet_wrap(~error, nrow = 1) +
      scale_y_continuous(name = "", breaks = seq(-2, 8, .25)) +
@@ -889,8 +897,8 @@ plot_summary_measures_cond(
  "Summary measures by observed GOSe, LOCF subset"
 )

-ggsave(filename = "errors_stratified_locf.pdf", width = 9, height = 3)
-ggsave(filename = "errors_stratified_locf.png", width = 9, height = 3)
+ggsave(filename = "errors_stratified_locf.pdf", width = 9, height = 5)
+ggsave(filename = "errors_stratified_locf.png", width = 9, height = 5)
 ```

 Just as with the overall performance, differences are most pronounced in terms
@@ -938,14 +946,14 @@ to the LOCF subset.
 * decide whether figures go in appendix - David and I agree on them being actually the 
 primary analysis. we just needto convince people of the fact that LOCF should be dropped *first*. As always, I am open to debate this but we should just make a decision, figurexit or figuremain?

-```{r confusion-matrix, warning=FALSE, message=FALSE, echo=FALSE, fig.cap="Confusion matrices, full training set without LOCF."}
+```{r confusion-matrix, warning=FALSE, message=FALSE, echo=FALSE, fig.cap="Confusion matrices, full training set without LOCF.", fig.height=9, fig.width=6}
 plot_confusion_matrices(
  df_predictions, 
  c("MSM", "GP + cov", "MM")
 )

-ggsave(filename = "confusion_matrices_all.pdf", width = 7, height = 6)
-ggsave(filename = "confusion_matrices_all.png", width = 7, height = 6)
+ggsave(filename = "confusion_matrices_all.pdf", width = 6, height = 9)
+ggsave(filename = "confusion_matrices_all.png", width = 6, height = 9)
 ```

 ```{r crossing-table-full, echo=FALSE, warning=FALSE, results='asis'}
@@ -997,15 +1005,15 @@ df_average_confusion_matrices %>%
  pander::pandoc.table("Some specific confusion percentages, full data set.", digits = 3)
 ```

-```{r error-scores-all, echo=FALSE, fig.height=3, fig.width=9}
+```{r error-scores-all, echo=FALSE, fig.height=5, fig.width=99}
 plot_summary_measures_cond(
  df_predictions %>% filter(!(gupi %in% idx)), 
  c("MSM", "GP + cov", "MM"), 
  "Summary measures by observed GOSe, full test set"
 )

-ggsave(filename = "imputation_error.pdf", width = 9, height = 3)
-ggsave(filename = "imputation_error.png", width = 9, height = 3)
+ggsave(filename = "imputation_error.pdf", width = 9, height = 5)
+ggsave(filename = "imputation_error.png", width = 9, height = 5)
 ```