singularity: "docker://kkmann/gose-6mo-imputation@sha256:62540d4bc41b228639bce7e4fe764acfaaeef76e467b92d9c55b26f8ea4f4c5f" configfile: "config.yml" rule download_data: output: "data/{version}/df_baseline.rds", "data/{version}/df_ctmri.rds", "data/{version}/df_imaging.rds", "data/{version}/df_labs.rds", "data/{version}/df_gose.rds" shell: """ bash scripts/download_{wildcards.version}.sh """ rule prepare_data: input: rules.download_data.output, markdown = "reports/prepare_data.Rmd" output: "output/{version}/data/df_gose.rds", "output/{version}/data/df_baseline.rds", "output/{version}/prepare_data.html", figures = "output/{version}/prepare_data_figures.zip" shell: """ mkdir -p output/{wildcards.version}/data Rscript -e "rmarkdown::render(\\"{input.markdown}\\", output_dir = \\"output/{wildcards.version}\\", params = list(datapath = \\"../data/{wildcards.version}\\", max_lab_days = {config[max_lab_days]}, seed = {config[seed]}, age_min = {config[age_min]}, age_max = {config[age_max]}))" mv reports/*.rds output/{wildcards.version}/data mv reports/figures.zip {output.figures} """ rule impute_baseline: input: rules.prepare_data.output output: ["output/{version}/data/mi_baseline/df_baseline_mi_%i.rds" % i for i in range(1, config["mi_m"] + 1)] shell: """ mkdir -p output/{wildcards.version}/data/mi_baseline Rscript scripts/impute_baseline.R output/{wildcards.version}/data/df_baseline.rds output/{wildcards.version}/data/mi_baseline {config[mi_m]} {config[mi_maxiter]} {config[seed]} """ rule generate_validation_data: input: rules.prepare_data.output, rules.impute_baseline.output output: ["output/{version}/data/validation/df_%s_mi_%i_fold_%i.rds" % (s, i, j) for s in ("train", "test") for i in range(1, config["mi_m"] + 1) for j in range(1, config["folds"] + 1) ] shell: """ mkdir -p output/{wildcards.version}/data/validation Rscript scripts/generate_validation_data.R output/{wildcards.version}/data {config[mi_m]} {config[folds]} {config[seed]} """ # adjust threads by model type def get_rule_threads(wildcards): if wildcards.model in ("locf", "msm"): return 1 else: return config["stan"]["chains"] rule fit_model_validation_set: input: "config.yml", "output/{version}/data/validation/df_train_mi_{i}_fold_{j}.rds" output: "output/{version}/data/validation/posteriors/{model}/df_posterior_mi_{i}_fold_{j}.rds" threads: get_rule_threads shell: """ mkdir -p output/{wildcards.version}/data/validation/posteriors/{wildcards.model} Rscript models/{wildcards.model}/fit.R {input[1]} {output} """ # helper rule to just build all posterior datasets rule model_posteriors: input: ["output/v1.1/data/validation/posteriors/%s/df_posterior_mi_%i_fold_%i.rds" % (m, i, j) for m in ("locf", "msm", "msm_age", "gp", "gp_nb", "mm", "mm_nb") for i in range(1, config["mi_m"] + 1) for j in range(1, config["folds"] + 1) ] # rules for imputing on entire dataset rule generate_imputation_data: input: rules.prepare_data.output, rules.impute_baseline.output output: ["output/{version}/data/imputation/df_mi_%i.rds" % i for i in range(1, config["mi_m"] + 1) ] shell: """ mkdir -p output/{wildcards.version}/data/imputation Rscript scripts/generate_imputation_data.R output/{wildcards.version}/data {config[mi_m]} """ rule model_impute: input: "config.yml", "output/{version}/data/imputation/df_mi_{i}.rds" output: "output/{version}/data/imputation/{model}/df_gose_imputed_mi_{i}.rds" threads: get_rule_threads shell: """ mkdir -p output/{wildcards.version}/data/imputation/{wildcards.model} Rscript models/{wildcards.model}/fit.R {input[1]} {output} """ # final reported values are a combination of the imputed and per-protocol # observed ones rule post_process_imputations: input: "config.yml", ["output/{version}/data/imputation/{model}/df_gose_imputed_mi_%i.rds" % i for i in range(1, config["mi_m"] + 1) ], "data/{version}/df_baseline.rds" output: "output/{version}/data/imputation/{model}/df_gose_imputed.csv" shell: """ mkdir -p output/{wildcards.version}/data/imputation/{wildcards.model} Rscript scripts/post_process_imputations.R output/{wildcards.version}/data/imputation/{wildcards.model} output/{wildcards.version}/data/df_gose.rds data/{wildcards.version}/df_baseline.rds {output} """ rule imputation_report: input: "config.yml", rules.post_process_imputations.output, markdown = "reports/imputations.Rmd" output: html = "output/{version}/gose_imputations_{model}.html", figures = "output/{version}/gose_imputations_{model}_figures.zip" shell: """ mkdir -p output/{wildcards.version} Rscript -e "rmarkdown::render(\\"{input.markdown}\\", params = list(data_dir = \\"../output/{wildcards.version}/data\\", imputations = \\"../output/v1.1/data/imputation/{wildcards.model}/df_gose_imputed.csv\\"))" mv reports/imputations.html {output.html} mv reports/figures.zip {output.figures} """ # define corresponding target rule for ease of use rule impute_msm_v1_1: input: html = "output/v1.1/gose_imputations_msm.html", figures = "output/v1.1/gose_imputations_msm_figures.zip" rule manuscript: input: pop_report = rules.prepare_data.output, posteriors = rules.model_posteriors.input, markdown = "manuscript/manuscript.Rmd" output: pdf = "output/{version}/manuscript.docx", figures = "output/{version}/manuscript_figures.zip" shell: """ mkdir -p output/{wildcards.version} Rscript -e "rmarkdown::render(\\"{input.markdown}\\", output_dir = \\"output/{wildcards.version}\\", params = list(data_dir = \\"../output/{wildcards.version}/data\\", config_file = \\"../config.yml\\"))" mv manuscript/figures.zip {output.figures} """ rule manuscript_v1_1: input: "output/v1.1/manuscript.docx"