Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
GOSe-6mo-imputation-paper
Project overview
Project overview
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Kevin Kunzmann
GOSe-6mo-imputation-paper
Commits
1a64a6af
Commit
1a64a6af
authored
Mar 18, 2019
by
Kevin Kunzmann
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Rework curl data download
parent
fa7750c2
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
54 additions
and
61 deletions
+54
-61
README.md
README.md
+4
-0
Snakefile
Snakefile
+3
-37
download.R
download.R
+6
-0
scripts/download_v1.1.sh
scripts/download_v1.1.sh
+41
-0
scripts/import_neurobot_data.R
scripts/import_neurobot_data.R
+0
-24
No files found.
README.md
View file @
1a64a6af
...
...
@@ -3,10 +3,14 @@
# Prerequisites
We assume a Unix command line workflow. The following software is required to take advantage of the pre-defined workflow:
*
curl for downloading the data (in case you do not have curl installed, it is also available from within the container)
*
[
python
](
https://www.python.org/download/releases/3.5.1/
)
3.5.1 (higher versions might work as well)
*
[
snakemake
](
https://snakemake.readthedocs.io/en/stable/getting_started/installation.html
)
version 5.2.1 (higher versions will work as well)
*
[
singularity
](
https://www.sylabs.io/guides/2.6/user-guide/index.html
)
2.6.0 (higher versions might work as well)
+
CENTER-TBI account and API key, store as NEUROBOT_USR and NEUROBOT_API
environment variables.
The entire analysis is containerized using a
[
docker container
](
https://cloud.docker.com/u/kkmann/repository/docker/kkmann/gose-6mo-imputation
)
.
The container can either be used to execute scripts individually inside the container, or it can be used to run the entire
pre-defined snakemake workflow using the container via singularity (recommended).
...
...
Snakefile
View file @
1a64a6af
...
...
@@ -6,7 +6,7 @@ configfile: "config.yml"
rule
import_neurobot_csv
:
rule
download_data
:
output:
"data/{version}/df_baseline.rds",
"data/{version}/df_ctmri.rds",
...
...
@@ -15,7 +15,7 @@ rule import_neurobot_csv:
"data/{version}/df_gose.rds"
shell:
"""
Rscript scripts/import_neurobot_data.R data/{wildcards.version} data/{wildcards.version}
bash scripts/download_{wildcards.version}.sh
"""
...
...
@@ -24,7 +24,7 @@ rule import_neurobot_csv:
rule prepare_data:
input:
rules.
import_neurobot_csv
.output,
rules.
download_data
.output,
markdown = "reports/prepare_data.Rmd"
output:
"output/{version}/data/df_gose.rds",
...
...
@@ -39,12 +39,6 @@ rule prepare_data:
mv reports/figures.zip {output.figures}
"""
# define corresponding target rule for ease of use
rule data_report_v1_1:
input:
pdf = "output/v1.1/prepare_data.pdf",
figures = "output/v1.1/prepare_data_figures.zip"
...
...
@@ -80,14 +74,6 @@ rule generate_validation_data:
Rscript scripts/generate_validation_data.R output/{wildcards.version}/data {config[mi_m]} {config[folds]} {config[seed]}
"""
rule generate_validation_data_v1_1:
input:
["output/v1.1/data/validation/df_%s_mi_%i_fold_%i.rds" % (s, i, j)
for s in ("train", "test")
for i in range(1, config["mi_m"] + 1)
for j in range(1, config["folds"] + 1)
]
...
...
@@ -125,26 +111,6 @@ rule model_posteriors:
for j in range(1, config["folds"] + 1)
]
rule model_assessment:
input:
pop_report = rules.prepare_data.output,
posteriors = rules.model_posteriors.input,
markdown = "reports/model_assessment.Rmd"
output:
pdf = "output/{version}/model_assessment.pdf",
figures = "output/{version}/model_assessment_figures.zip"
shell:
"""
mkdir -p output/{wildcards.version}
Rscript -e "rmarkdown::render(\\"{input.markdown}\\", output_dir = \\"output/{wildcards.version}\\", params = list(data_dir = \\"../output/{wildcards.version}/data\\", config_file = \\"../config.yml\\"))"
mv reports/figures.zip {output.figures}
"""
# define corresponding target rule for ease of use
rule cv_model_comparison_report_v1_1:
input:
pdf = "output/v1.1/model_assessment.pdf",
figures = "output/v1.1/model_assessment_figures.zip"
...
...
download.R
0 → 100644
View file @
1a64a6af
#!/usr/bin bash
curl
\
--
user
$
NEUROBOT_USR
:$
NEUROBOT_API
\
--
digest
https
://
neurobot
-
stage.incf.org
/
api
/
data
/
_
5
c8a757252dc3879e3b7cc35.csv
scripts/download_v1.1.sh
0 → 100644
View file @
1a64a6af
#!/usr/bin bash
VERSION
=
v1.1
OUT
=
data/
$VERSION
mkdir
-p
$OUT
# ctmri
curl
\
--user
$NEUROBOT_USR
:
$NEUROBOT_API
\
--digest
https://center-tbi.incf.org/api/data/_5c5485306b3f2f22e14d209a.csv
>
\
$OUT
/df_ctmri.csv
Rscript
-e
"library(tidyverse); saveRDS(as_tibble(read_csv('
$OUT
/df_ctmri.csv')), file = '
$OUT
/df_ctmri.rds')"
# imaging
curl
\
--user
$NEUROBOT_USR
:
$NEUROBOT_API
\
--digest
https://center-tbi.incf.org/api/data/_5c5488246b3f2f22e14d209d.csv
>
\
$OUT
/df_imaging.csv
Rscript
-e
"library(tidyverse); saveRDS(as_tibble(read_csv('
$OUT
/df_imaging.csv')), file = '
$OUT
/df_imaging.rds')"
# labs
curl
\
--user
$NEUROBOT_USR
:
$NEUROBOT_API
\
--digest
https://center-tbi.incf.org/api/data/_5c5489696b3f2f22e14d209f.csv
>
\
$OUT
/df_labs.csv
Rscript
-e
"library(tidyverse); saveRDS(as_tibble(read_csv('
$OUT
/df_labs.csv')), file = '
$OUT
/df_labs.rds')"
# GOSe
curl
\
--user
$NEUROBOT_USR
:
$NEUROBOT_API
\
--digest
https://center-tbi.incf.org/api/data/_5c548a056b3f2f22e14d20a0.csv
>
\
$OUT
/df_gose.csv
Rscript
-e
"library(tidyverse); saveRDS(as_tibble(read_csv('
$OUT
/df_gose.csv')), file = '
$OUT
/df_gose.rds')"
# baseline
curl
\
--user
$NEUROBOT_USR
:
$NEUROBOT_API
\
--digest
https://center-tbi.incf.org/api/data/_5c548a5b6b3f2f22e14d20a2.csv
>
\
$OUT
/df_baseline.csv
Rscript
-e
"library(tidyverse); saveRDS(as_tibble(read_csv('
$OUT
/df_baseline.csv')), file = '
$OUT
/df_baseline.rds')"
scripts/import_neurobot_data.R
deleted
100644 → 0
View file @
fa7750c2
library
(
tidyverse
)
args
<-
commandArgs
(
trailingOnly
=
TRUE
)
in_folder
<-
args
[[
1
]]
out_folder
<-
args
[[
2
]]
in_files
<-
sprintf
(
"%s/%s"
,
in_folder
,
list.files
(
path
=
in_folder
,
pattern
=
"*.csv"
))
if
(
length
(
in_files
)
!=
5
)
stop
(
"must have exactly 5 input files"
)
for
(
f
in
in_files
)
{
tmp
<-
read_csv
(
f
)
if
(
"Subject.Age"
%in%
names
(
tmp
))
saveRDS
(
as_tibble
(
tmp
),
file
=
sprintf
(
"%s/df_baseline.rds"
,
out_folder
))
if
(
"Outcomes.DerivedCompositeGOSE"
%in%
names
(
tmp
))
saveRDS
(
as_tibble
(
tmp
),
file
=
sprintf
(
"%s/df_gose.rds"
,
out_folder
))
if
(
"Labs.DLDate"
%in%
names
(
tmp
))
saveRDS
(
as_tibble
(
tmp
),
file
=
sprintf
(
"%s/df_labs.rds"
,
out_folder
))
if
(
"Imaging.MarshallCTClassification"
%in%
names
(
tmp
))
saveRDS
(
as_tibble
(
tmp
),
file
=
sprintf
(
"%s/df_imaging.rds"
,
out_folder
))
if
(
"CTMRI.CTSubarachnoidHem"
%in%
names
(
tmp
))
saveRDS
(
as_tibble
(
tmp
),
file
=
sprintf
(
"%s/df_ctmri.rds"
,
out_folder
))
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment