figures update

bdafd5db · kevin · b0dd68da · bdafd5db
Commit bdafd5db authored Aug 15, 2019 by kevin
Hide whitespace changes
Inline Side-by-side

Showing with 46 additions and 41 deletions

manuscript/manuscript.Rmd manuscript/manuscript.Rmd +46 -41

No files found.
--- a/manuscript/manuscript.Rmd
+++ b/manuscript/manuscript.Rmd
@@ -564,15 +564,15 @@ compute_summary_measures <- function(df) {
    summarize(
      RMSE    = mean((GOSE - prediction)^2, na.rm = TRUE) %>% sqrt,
      MAE     = mean(abs(GOSE - prediction), na.rm = TRUE),
-      Bias    = mean(prediction, na.rm = TRUE) - mean(GOSE, na.rm = TRUE),
-      `Bias'` = mean(prediction > GOSE, na.rm = TRUE) - mean(prediction < GOSE, na.rm = TRUE)
+      bias    = mean(prediction, na.rm = TRUE) - mean(GOSE, na.rm = TRUE),
+      `d-bias` = mean(prediction > GOSE, na.rm = TRUE) - mean(prediction < GOSE, na.rm = TRUE)
    ) %>% 
    ungroup %>% 
    gather(error, value, -model, -fold) %>% 
    mutate(
      error = factor(error, c(
-        "Bias",
-        "Bias'",
+        "bias",
+        "d-bias",
        "MAE",
        "RMSE"
      ))
@@ -663,7 +663,7 @@ The GOSe scale is restricted to 3+ since the imputation is conditional on
 an observed GOSe larger than 1 (deaths are known and no imputation necessary)
 and no GOSe 2 was observed.

-```{r confusion-matrix-locf, warning=FALSE, message=FALSE, echo=FALSE, fig.cap="Confusion matrices on LOCF subset.", fig.height=6, fig.width=6}
+```{r confusion-matrix-locf, warning=FALSE, message=FALSE, echo=FALSE, fig.cap="Confusion matrices on LOCF subset.", fig.height=3, fig.width=6}

 plot_confusion_matrices <- function(df_predictions, models, nrow = 2, legendpos, scriptsize) {
  
@@ -686,7 +686,8 @@ plot_confusion_matrices <- function(df_predictions, models, nrow = 2, legendpos,
    ungroup %>% 
    mutate(model = factor(model, models))
  
-  p_cnf_mtrx_raw <- df_average_confusion_matrices %>%
+  # p_cnf_mtrx_raw <- 
+  df_average_confusion_matrices %>%
    ggplot(aes(`Observed GOSE`, `Predicted GOSE`, fill = n)) +
      geom_raster(fill = "white") +
      geom_text(aes(
@@ -710,28 +711,28 @@ plot_confusion_matrices <- function(df_predictions, models, nrow = 2, legendpos,
      facet_wrap(~model, nrow = nrow) +
      ggtitle("Average confusion matrix across folds (absolute counts)")
  
-  p_cnf_mtrx_colnrm <- df_average_confusion_matrices %>%
-    group_by(model, `Observed GOSE`) %>%
-    mutate(
-      `fraction (column)` = n / sum(n),
-      `fraction (column)` = ifelse(is.nan(`fraction (column)`), 0, `fraction (column)`)
-    ) %>% 
-    ggplot(aes(`Observed GOSE`, `Predicted GOSE`, fill = `fraction (column)`)) +
-      geom_raster() +
-      geom_hline(yintercept = c(2, 4, 6) + .5, color = "black") +
-      geom_vline(xintercept = c(2, 4, 6) + .5, color = "black") +
-      scale_fill_gradient("", low = "white", high = "black", limits = c(0, 1)) +
-      coord_fixed(expand = FALSE) + 
-      labs(x = "observed GOSe", y = "imputed GOSe", fill = "") +
-      theme_bw() +
-      theme(
-        panel.grid = element_blank(),
-        legend.position = legendpos
-      ) + 
-      facet_wrap(~model, nrow = nrow) +
-      ggtitle("Average confusion matrix across folds (column fraction)")
+  # p_cnf_mtrx_colnrm <- df_average_confusion_matrices %>%
+  #   group_by(model, `Observed GOSE`) %>%
+  #   mutate(
+  #     `fraction (column)` = n / sum(n),
+  #     `fraction (column)` = ifelse(is.nan(`fraction (column)`), 0, `fraction (column)`)
+  #   ) %>% 
+  #   ggplot(aes(`Observed GOSE`, `Predicted GOSE`, fill = `fraction (column)`)) +
+  #     geom_raster() +
+  #     geom_hline(yintercept = c(2, 4, 6) + .5, color = "black") +
+  #     geom_vline(xintercept = c(2, 4, 6) + .5, color = "black") +
+  #     scale_fill_gradient("", low = "white", high = "black", limits = c(0, 1)) +
+  #     coord_fixed(expand = FALSE) + 
+  #     labs(x = "observed GOSe", y = "imputed GOSe", fill = "") +
+  #     theme_bw() +
+  #     theme(
+  #       panel.grid = element_blank(),
+  #       legend.position = legendpos
+  #     ) + 
+  #     facet_wrap(~model, nrow = nrow) +
+  #     ggtitle("Average confusion matrix across folds (column fraction)")
  
-  cowplot::plot_grid(p_cnf_mtrx_raw, p_cnf_mtrx_colnrm, ncol = 1, align = "h")      
+  # cowplot::plot_grid(p_cnf_mtrx_raw, p_cnf_mtrx_colnrm, ncol = 1, align = "h")      
 }

 plot_confusion_matrices(
@@ -743,8 +744,8 @@ plot_confusion_matrices(
  scriptsize = 2.5
 )

-ggsave(filename = "confusion_matrices_locf.pdf", width = 6, height = 6)
-ggsave(filename = "confusion_matrices_locf.png", width = 6, height = 6)
+ggsave(filename = "confusion_matrices_locf.pdf", width = 6, height = 3)
+ggsave(filename = "confusion_matrices_locf.png", width = 6, height = 3)
 ```

 The absolute-count confusion matrices show that most imputed values are 
@@ -772,8 +773,8 @@ plot_summary_measures_cond <- function(df_predictions, models, label) {
    summarize(
      RMSE = mean((GOSE - prediction)^2, na.rm = TRUE) %>% sqrt,
      MAE  = mean(abs(GOSE - prediction), na.rm = TRUE),
-      Bias = mean(prediction, na.rm = TRUE) - mean(GOSE, na.rm = TRUE),
-      `Bias'` = mean(prediction > GOSE, na.rm = TRUE) - mean(prediction < GOSE, na.rm = TRUE)
+      bias = mean(prediction, na.rm = TRUE) - mean(GOSE, na.rm = TRUE),
+      `d-bias` = mean(prediction > GOSE, na.rm = TRUE) - mean(prediction < GOSE, na.rm = TRUE)
    ) %>% 
    gather(error, value, -model, -GOSE, -fold) %>% 
    group_by(GOSE, model, error, fold) %>% 
@@ -789,20 +790,24 @@ plot_summary_measures_cond <- function(df_predictions, models, label) {
    mutate(
      model = factor(model, models),
      error = factor(error, c(
-        "Bias",
-        "Bias'",
+        "bias",
+        "d-bias",
        "MAE",
        "RMSE"
      ))   
    ) %>% 
  ggplot(aes(GOSE, color = model)) + 
      geom_hline(yintercept = 0, color = "black") +
-      geom_line(aes(y = mean)) + 
+      geom_line(aes(y = mean), alpha = .5) + 
+      geom_point(aes(y = mean), size = .3,
+        position = position_dodge(.2)) + 
      geom_errorbar(aes(ymin = mean - 1.96*se, ymax = mean + 1.96*se), 
-        width = .2,
-        position = position_dodge(.33),
-        size = 1
+        width = .33,
+        size = .5,
+        position = position_dodge(.2)
      ) +
+      geom_point(aes(y = mean),
+        position = position_dodge(.2)) + 
      xlab("observed GOSe") + 
      facet_wrap(~error, nrow = 1) +
      scale_y_continuous(name = "", breaks = seq(-2, 8, .5)) +
@@ -824,7 +829,7 @@ plot_summary_measures_cond(
 )

 ggsave(filename = "errors_stratified_locf.pdf", width = 6, height = 3.5)
-ggsave(filename = "errors_stratified_locf.png", width = 6, height = 3.5)
+ggsave(filename = "errors_stratified_locf.png", width = 6, height = 3.5, scale = 1.25)
 ```

 Just as with the overall performance, differences are most pronounced in terms
@@ -879,8 +884,8 @@ plot_confusion_matrices(
  scriptsize = 3
 )

-ggsave(filename = "confusion_matrices_all.pdf", width = 6, height = 6)
-ggsave(filename = "confusion_matrices_all.png", width = 6, height = 6)
+ggsave(filename = "confusion_matrices_all.pdf", width = 6, height = 3)
+ggsave(filename = "confusion_matrices_all.png", width = 6, height = 3)
 ```


@@ -893,7 +898,7 @@ plot_summary_measures_cond(
 )

 ggsave(filename = "imputation_error.pdf", width = 6, height = 3.5)
-ggsave(filename = "imputation_error.png", width = 6, height = 3.5)
+ggsave(filename = "imputation_error.png", width = 6, height = 3.5, scale = 1.25)
 ```