Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
GOSe-6mo-imputation-paper
Project overview
Project overview
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Kevin Kunzmann
GOSe-6mo-imputation-paper
Commits
943d2c27
Commit
943d2c27
authored
Apr 01, 2019
by
Kevin Kunzmann
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
updated figures
parent
476dbbdd
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
39 additions
and
31 deletions
+39
-31
manuscript/manuscript.Rmd
manuscript/manuscript.Rmd
+39
-31
No files found.
manuscript/manuscript.Rmd
View file @
943d2c27
...
@@ -548,9 +548,9 @@ MAE and RMSE are both a measures of average precision where
...
@@ -548,9 +548,9 @@ MAE and RMSE are both a measures of average precision where
RMSE
puts
more
weight
on
large
deviations
as
compared
to
RMSE
.
RMSE
puts
more
weight
on
large
deviations
as
compared
to
RMSE
.
Comparisons
in
terms
of
bias
,
MAE
,
and
RMSE
tacitly
assume
that
Comparisons
in
terms
of
bias
,
MAE
,
and
RMSE
tacitly
assume
that
GOSe
values
can
be
sensibly
interpreted
on
an
interval
scale
.
GOSe
values
can
be
sensibly
interpreted
on
an
interval
scale
.
We
therefore
also
consider
$
Pr
[
est
>
true
]
-
Pr
[
est
<
true
]$
as
an
We
therefore
also
consider
$
Pr
[
imp
.
>
true
]
-
Pr
[
imp
.
<
true
]$
as
an
alternative
measure
of
bias
which
does
not
require
this
tacit
assumption
.
alternative
measure
of
bias
which
does
not
require
this
tacit
assumption
.
Note
that
the
scale
is
not
directl
z
comparable
to
the
one
of
the
Note
that
the
scale
is
not
directl
y
comparable
to
the
one
of
the
other
three
quantities
!
other
three
quantities
!
All
measures
are
considered
both
conditional
on
the
ground
-
truth
All
measures
are
considered
both
conditional
on
the
ground
-
truth
(
unobserved
observed
GOSe
)
as
well
as
averaged
over
the
entire
test
set
.
(
unobserved
observed
GOSe
)
as
well
as
averaged
over
the
entire
test
set
.
...
@@ -581,33 +581,41 @@ The overall performance of all fitted models in terms of bias, MAE, and RMSE
...
@@ -581,33 +581,41 @@ The overall performance of all fitted models in terms of bias, MAE, and RMSE
is
depicted
in
Figure
???
both
conditional
on
LOCF
being
applicable
and
,
is
depicted
in
Figure
???
both
conditional
on
LOCF
being
applicable
and
,
excluding
LOCF
,
on
the
entire
test
set
.
excluding
LOCF
,
on
the
entire
test
set
.
```{
r
overall
-
comparison
-
all
-
methods
,
echo
=
FALSE
,
fig
.
height
=
7
}
```{
r
overall
-
comparison
-
all
-
methods
,
echo
=
FALSE
,
fig
.
height
=
9
,
fig
.
width
=
6
}
plot_summary_measures
<-
function
(
df
,
label
)
{
plot_summary_measures
<-
function
(
df
,
label
)
{
df_predictions
%>%
df
%>%
filter
(
!(gupi %in% idx)) %>%
group_by
(
model
,
fold
)
%>%
group_by
(
model
,
fold
)
%>%
summarize
(
summarize
(
RMSE
=
mean
((
GOSE
-
prediction
)^
2
,
na
.
rm
=
TRUE
)
%>%
sqrt
,
RMSE
=
mean
((
GOSE
-
prediction
)^
2
,
na
.
rm
=
TRUE
)
%>%
sqrt
,
MAE
=
mean
(
abs
(
GOSE
-
prediction
),
na
.
rm
=
TRUE
),
MAE
=
mean
(
abs
(
GOSE
-
prediction
),
na
.
rm
=
TRUE
),
Bias
=
mean
(
prediction
,
na
.
rm
=
TRUE
)
-
mean
(
GOSE
,
na
.
rm
=
TRUE
),
Bias
=
mean
(
prediction
,
na
.
rm
=
TRUE
)
-
mean
(
GOSE
,
na
.
rm
=
TRUE
),
`
Pr
[
est
.
>
true
]
-
Pr
[
est
.
<
true
]`
=
mean
(
prediction
>
GOSE
,
na
.
rm
=
TRUE
)
-
mean
(
prediction
<
GOSE
,
na
.
rm
=
TRUE
)
`
Pr
[
imp
.
>
true
]
-
Pr
[
imp
.
<
true
]`
=
mean
(
prediction
>
GOSE
,
na
.
rm
=
TRUE
)
-
mean
(
prediction
<
GOSE
,
na
.
rm
=
TRUE
)
)
%>%
)
%>%
ungroup
%>%
ungroup
%>%
gather
(
error
,
value
,
-
model
,
-
fold
)
%>%
gather
(
error
,
value
,
-
model
,
-
fold
)
%>%
mutate
(
mutate
(
error
=
factor
(
error
,
c
(
error
=
factor
(
error
,
c
(
"Bias"
,
"Bias"
,
"Pr[
est. > true] - Pr[est
. < true]"
,
"Pr[
imp. > true] - Pr[imp
. < true]"
,
"MAE"
,
"MAE"
,
"RMSE"
"RMSE"
))
))
)
%>%
)
%>%
ggplot
(
aes
(
model
,
value
))
+
group_by
(
model
,
error
)
%>%
summarize
(
mean_error
=
mean
(
value
),
se_error
=
sd
(
value
)
/
sqrt
(
n
())
)
%>%
ggplot
(
aes
(
x
=
model
,
y
=
mean_error
))
+
geom_hline
(
yintercept
=
0
,
color
=
"black"
)
+
geom_hline
(
yintercept
=
0
,
color
=
"black"
)
+
geom_boxplot
()
+
geom_point
(
size
=
.8
)
+
facet_wrap
(~
error
,
nrow
=
1
)
+
geom_errorbar
(
scale_y_continuous
(
name
=
""
,
breaks
=
seq
(-
2
,
8
,
.25
),
limits
=
c
(-
.5
,
1.5
))
+
aes
(
ymin
=
mean_error
-
1.96
*
se_error
,
ymax
=
mean_error
+
1.96
*
se_error
),
width
=
.25
)
+
facet_wrap
(~
error
,
nrow
=
2
)
+
scale_y_continuous
(
name
=
""
,
breaks
=
seq
(-
2
,
8
,
.25
),
limits
=
c
(-
.5
,
1.25
))
+
scale_x_discrete
(
""
)
+
scale_x_discrete
(
""
)
+
theme_bw
()
+
theme_bw
()
+
theme
(
theme
(
...
@@ -621,7 +629,7 @@ plot_summary_measures <- function(df, label) {
...
@@ -621,7 +629,7 @@ plot_summary_measures <- function(df, label) {
cowplot
::
plot_grid
(
cowplot
::
plot_grid
(
plot_summary_measures
(
plot_summary_measures
(
df_predictions
%>%
filter
(
gupi
%
in
%
idx
),
df_predictions
%>%
filter
(
!(gupi %in% idx)
),
"Summary measures, LOCF subset"
"Summary measures, LOCF subset"
),
),
plot_summary_measures
(
plot_summary_measures
(
...
@@ -674,7 +682,7 @@ We first consider results for the set of test cases which allow LOCF imputation
...
@@ -674,7 +682,7 @@ We first consider results for the set of test cases which allow LOCF imputation
Both
the
raw
count
as
well
as
the
relative
(
by
left
-
out
observed
GOSe
)
confusion
matrices
Both
the
raw
count
as
well
as
the
relative
(
by
left
-
out
observed
GOSe
)
confusion
matrices
are
presented
in
Figure
???.
are
presented
in
Figure
???.
```{
r
confusion
-
matrix
-
locf
,
warning
=
FALSE
,
message
=
FALSE
,
echo
=
FALSE
,
fig
.
cap
=
"Confusion matrices on LOCF subset."
}
```{
r
confusion
-
matrix
-
locf
,
warning
=
FALSE
,
message
=
FALSE
,
echo
=
FALSE
,
fig
.
cap
=
"Confusion matrices on LOCF subset."
,
fig
.
height
=
9
,
fig
.
width
=
6
}
plot_confusion_matrices
<-
function
(
df_predictions
,
models
)
{
plot_confusion_matrices
<-
function
(
df_predictions
,
models
)
{
df_average_confusion_matrices
<-
df_predictions
%>%
df_average_confusion_matrices
<-
df_predictions
%>%
...
@@ -702,7 +710,7 @@ plot_confusion_matrices <- function(df_predictions, models) {
...
@@ -702,7 +710,7 @@ plot_confusion_matrices <- function(df_predictions, models) {
label
=
sprintf
(
"%.1f"
,
n
)
%>%
label
=
sprintf
(
"%.1f"
,
n
)
%>%
ifelse
(.
==
"0.0"
,
""
,
.)
ifelse
(.
==
"0.0"
,
""
,
.)
),
),
size
=
1.5
size
=
2
)
+
)
+
geom_hline
(
yintercept
=
c
(
2
,
4
,
6
)
+
.5
,
color
=
"black"
)
+
geom_hline
(
yintercept
=
c
(
2
,
4
,
6
)
+
.5
,
color
=
"black"
)
+
geom_vline
(
xintercept
=
c
(
2
,
4
,
6
)
+
.5
,
color
=
"black"
)
+
geom_vline
(
xintercept
=
c
(
2
,
4
,
6
)
+
.5
,
color
=
"black"
)
+
...
@@ -713,7 +721,7 @@ plot_confusion_matrices <- function(df_predictions, models) {
...
@@ -713,7 +721,7 @@ plot_confusion_matrices <- function(df_predictions, models) {
theme
(
theme
(
panel
.
grid
=
element_blank
()
panel
.
grid
=
element_blank
()
)
+
)
+
facet_wrap
(~
model
,
nrow
=
1
)
+
facet_wrap
(~
model
,
nrow
=
2
)
+
ggtitle
(
"Average confusion matrix accross folds (absolute counts)"
)
ggtitle
(
"Average confusion matrix accross folds (absolute counts)"
)
p_cnf_mtrx_colnrm
<-
df_average_confusion_matrices
%>%
p_cnf_mtrx_colnrm
<-
df_average_confusion_matrices
%>%
...
@@ -733,7 +741,7 @@ plot_confusion_matrices <- function(df_predictions, models) {
...
@@ -733,7 +741,7 @@ plot_confusion_matrices <- function(df_predictions, models) {
theme
(
theme
(
panel
.
grid
=
element_blank
()
panel
.
grid
=
element_blank
()
)
+
)
+
facet_wrap
(~
model
,
nrow
=
1
)
+
facet_wrap
(~
model
,
nrow
=
2
)
+
ggtitle
(
"Average confusion matrix accross folds (column fraction)"
)
ggtitle
(
"Average confusion matrix accross folds (column fraction)"
)
cowplot
::
plot_grid
(
p_cnf_mtrx_raw
,
p_cnf_mtrx_colnrm
,
ncol
=
1
,
align
=
"v"
)
cowplot
::
plot_grid
(
p_cnf_mtrx_raw
,
p_cnf_mtrx_colnrm
,
ncol
=
1
,
align
=
"v"
)
...
@@ -745,8 +753,8 @@ plot_confusion_matrices(
...
@@ -745,8 +753,8 @@ plot_confusion_matrices(
c
(
"MSM"
,
"GP + cov"
,
"MM"
,
"LOCF"
)
c
(
"MSM"
,
"GP + cov"
,
"MM"
,
"LOCF"
)
)
)
ggsave
(
filename
=
"confusion_matrices_locf.pdf"
,
width
=
7
,
height
=
6
)
ggsave
(
filename
=
"confusion_matrices_locf.pdf"
,
width
=
6
,
height
=
9
)
ggsave
(
filename
=
"confusion_matrices_locf.png"
,
width
=
7
,
height
=
6
)
ggsave
(
filename
=
"confusion_matrices_locf.png"
,
width
=
6
,
height
=
9
)
```
```
The
absolute
-
count
confusion
matrices
show
that
most
imputed
values
are
The
absolute
-
count
confusion
matrices
show
that
most
imputed
values
are
...
@@ -831,7 +839,7 @@ we also consider the performance conditional on the respective ground-truth
...
@@ -831,7 +839,7 @@ we also consider the performance conditional on the respective ground-truth
(
i
.
e
.
the
observed
GOSe
values
in
the
test
sets
).
(
i
.
e
.
the
observed
GOSe
values
in
the
test
sets
).
The
results
are
shown
in
Figure
???
(
vertical
bars
are
=/-
one
standard
error
of
the
mean
).
The
results
are
shown
in
Figure
???
(
vertical
bars
are
=/-
one
standard
error
of
the
mean
).
```{
r
error
-
scores
-
locf
,
echo
=
FALSE
,
fig
.
height
=
3
,
fig
.
width
=
9
}
```{
r
error
-
scores
-
locf
,
echo
=
FALSE
,
fig
.
height
=
5
,
fig
.
width
=
9
}
plot_summary_measures_cond
<-
function
(
df_predictions
,
models
,
label
)
{
plot_summary_measures_cond
<-
function
(
df_predictions
,
models
,
label
)
{
df_predictions
%>%
df_predictions
%>%
...
@@ -841,7 +849,7 @@ plot_summary_measures_cond <- function(df_predictions, models, label) {
...
@@ -841,7 +849,7 @@ plot_summary_measures_cond <- function(df_predictions, models, label) {
RMSE
=
mean
((
GOSE
-
prediction
)^
2
,
na
.
rm
=
TRUE
)
%>%
sqrt
,
RMSE
=
mean
((
GOSE
-
prediction
)^
2
,
na
.
rm
=
TRUE
)
%>%
sqrt
,
MAE
=
mean
(
abs
(
GOSE
-
prediction
),
na
.
rm
=
TRUE
),
MAE
=
mean
(
abs
(
GOSE
-
prediction
),
na
.
rm
=
TRUE
),
Bias
=
mean
(
prediction
,
na
.
rm
=
TRUE
)
-
mean
(
GOSE
,
na
.
rm
=
TRUE
),
Bias
=
mean
(
prediction
,
na
.
rm
=
TRUE
)
-
mean
(
GOSE
,
na
.
rm
=
TRUE
),
`
Pr
[
est
.
>
true
]
-
Pr
[
est
.
<
true
]`
=
mean
(
prediction
>
GOSE
,
na
.
rm
=
TRUE
)
-
mean
(
prediction
<
GOSE
,
na
.
rm
=
TRUE
)
`
Pr
[
imp
.
>
true
]
-
Pr
[
imp
.
<
true
]`
=
mean
(
prediction
>
GOSE
,
na
.
rm
=
TRUE
)
-
mean
(
prediction
<
GOSE
,
na
.
rm
=
TRUE
)
)
%>%
)
%>%
gather
(
error
,
value
,
-
model
,
-
GOSE
,
-
fold
)
%>%
gather
(
error
,
value
,
-
model
,
-
GOSE
,
-
fold
)
%>%
group_by
(
GOSE
,
model
,
error
,
fold
)
%>%
group_by
(
GOSE
,
model
,
error
,
fold
)
%>%
...
@@ -858,7 +866,7 @@ plot_summary_measures_cond <- function(df_predictions, models, label) {
...
@@ -858,7 +866,7 @@ plot_summary_measures_cond <- function(df_predictions, models, label) {
model
=
factor
(
model
,
models
),
model
=
factor
(
model
,
models
),
error
=
factor
(
error
,
c
(
error
=
factor
(
error
,
c
(
"Bias"
,
"Bias"
,
"Pr[
est. > true] - Pr[est
. < true]"
,
"Pr[
imp. > true] - Pr[imp
. < true]"
,
"MAE"
,
"MAE"
,
"RMSE"
"RMSE"
))
))
...
@@ -869,7 +877,7 @@ plot_summary_measures_cond <- function(df_predictions, models, label) {
...
@@ -869,7 +877,7 @@ plot_summary_measures_cond <- function(df_predictions, models, label) {
width
=
.2
,
width
=
.2
,
position
=
position_dodge
(
.33
)
position
=
position_dodge
(
.33
)
)
+
)
+
geom_line
(
aes
(
y
=
mean
),
alpha
=
.
5
)
+
geom_line
(
aes
(
y
=
mean
),
alpha
=
.
66
)
+
xlab
(
"GOSe"
)
+
xlab
(
"GOSe"
)
+
facet_wrap
(~
error
,
nrow
=
1
)
+
facet_wrap
(~
error
,
nrow
=
1
)
+
scale_y_continuous
(
name
=
""
,
breaks
=
seq
(-
2
,
8
,
.25
))
+
scale_y_continuous
(
name
=
""
,
breaks
=
seq
(-
2
,
8
,
.25
))
+
...
@@ -889,8 +897,8 @@ plot_summary_measures_cond(
...
@@ -889,8 +897,8 @@ plot_summary_measures_cond(
"Summary measures by observed GOSe, LOCF subset"
"Summary measures by observed GOSe, LOCF subset"
)
)
ggsave
(
filename
=
"errors_stratified_locf.pdf"
,
width
=
9
,
height
=
3
)
ggsave
(
filename
=
"errors_stratified_locf.pdf"
,
width
=
9
,
height
=
5
)
ggsave
(
filename
=
"errors_stratified_locf.png"
,
width
=
9
,
height
=
3
)
ggsave
(
filename
=
"errors_stratified_locf.png"
,
width
=
9
,
height
=
5
)
```
```
Just
as
with
the
overall
performance
,
differences
are
most
pronounced
in
terms
Just
as
with
the
overall
performance
,
differences
are
most
pronounced
in
terms
...
@@ -938,14 +946,14 @@ to the LOCF subset.
...
@@ -938,14 +946,14 @@ to the LOCF subset.
*
decide
whether
figures
go
in
appendix
-
David
and
I
agree
on
them
being
actually
the
*
decide
whether
figures
go
in
appendix
-
David
and
I
agree
on
them
being
actually
the
primary
analysis
.
we
just
needto
convince
people
of
the
fact
that
LOCF
should
be
dropped
*
first
*.
As
always
,
I
am
open
to
debate
this
but
we
should
just
make
a
decision
,
figurexit
or
figuremain
?
primary
analysis
.
we
just
needto
convince
people
of
the
fact
that
LOCF
should
be
dropped
*
first
*.
As
always
,
I
am
open
to
debate
this
but
we
should
just
make
a
decision
,
figurexit
or
figuremain
?
```{
r
confusion
-
matrix
,
warning
=
FALSE
,
message
=
FALSE
,
echo
=
FALSE
,
fig
.
cap
=
"Confusion matrices, full training set without LOCF."
}
```{
r
confusion
-
matrix
,
warning
=
FALSE
,
message
=
FALSE
,
echo
=
FALSE
,
fig
.
cap
=
"Confusion matrices, full training set without LOCF."
,
fig
.
height
=
9
,
fig
.
width
=
6
}
plot_confusion_matrices
(
plot_confusion_matrices
(
df_predictions
,
df_predictions
,
c
(
"MSM"
,
"GP + cov"
,
"MM"
)
c
(
"MSM"
,
"GP + cov"
,
"MM"
)
)
)
ggsave
(
filename
=
"confusion_matrices_all.pdf"
,
width
=
7
,
height
=
6
)
ggsave
(
filename
=
"confusion_matrices_all.pdf"
,
width
=
6
,
height
=
9
)
ggsave
(
filename
=
"confusion_matrices_all.png"
,
width
=
7
,
height
=
6
)
ggsave
(
filename
=
"confusion_matrices_all.png"
,
width
=
6
,
height
=
9
)
```
```
```{
r
crossing
-
table
-
full
,
echo
=
FALSE
,
warning
=
FALSE
,
results
=
'asis'
}
```{
r
crossing
-
table
-
full
,
echo
=
FALSE
,
warning
=
FALSE
,
results
=
'asis'
}
...
@@ -997,15 +1005,15 @@ df_average_confusion_matrices %>%
...
@@ -997,15 +1005,15 @@ df_average_confusion_matrices %>%
pander
::
pandoc
.
table
(
"Some specific confusion percentages, full data set."
,
digits
=
3
)
pander
::
pandoc
.
table
(
"Some specific confusion percentages, full data set."
,
digits
=
3
)
```
```
```{
r
error
-
scores
-
all
,
echo
=
FALSE
,
fig
.
height
=
3
,
fig
.
width
=
9
}
```{
r
error
-
scores
-
all
,
echo
=
FALSE
,
fig
.
height
=
5
,
fig
.
width
=
9
9
}
plot_summary_measures_cond
(
plot_summary_measures_cond
(
df_predictions
%>%
filter
(
!(gupi %in% idx)),
df_predictions
%>%
filter
(
!(gupi %in% idx)),
c
(
"MSM"
,
"GP + cov"
,
"MM"
),
c
(
"MSM"
,
"GP + cov"
,
"MM"
),
"Summary measures by observed GOSe, full test set"
"Summary measures by observed GOSe, full test set"
)
)
ggsave
(
filename
=
"imputation_error.pdf"
,
width
=
9
,
height
=
3
)
ggsave
(
filename
=
"imputation_error.pdf"
,
width
=
9
,
height
=
5
)
ggsave
(
filename
=
"imputation_error.png"
,
width
=
9
,
height
=
3
)
ggsave
(
filename
=
"imputation_error.png"
,
width
=
9
,
height
=
5
)
```
```
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment