singularity: "docker://kkmann/gose-6mo-imputation@sha256:02a8f55c53d6f4917193abda823938e1ad7e8891fd6c392bd94f509e058eb34b" configfile: "config.yml" rule import_neurobot_csv: output: "data/{version}/df_baseline.rds", "data/{version}/df_ctmri.rds", "data/{version}/df_imaging.rds", "data/{version}/df_labs.rds", "data/{version}/df_gose.rds" shell: """ Rscript scripts/import_neurobot_data.R data/{wildcards.version} data/{wildcards.version} """ rule prepare_data: input: rules.import_neurobot_csv.output, markdown = "reports/prepare_data.Rmd" output: "output/{version}/data/df_gose.rds", "output/{version}/data/df_baseline.rds", "output/{version}/prepare_data.pdf", figures = "output/{version}/prepare_data_figures.zip" shell: """ mkdir -p output/{wildcards.version}/data Rscript -e "rmarkdown::render('{input.markdown}', output_dir = 'output/{wildcards.version}', params = list(datapath = '../data/{wildcards.version}', max_lab_days = {config[max_lab_days]}, seed = {config[seed]}))" mv reports/*.rds output/{wildcards.version}/data mv reports/figures.zip {output.figures} """ rule impute_baseline: input: rules.prepare_data.output output: ["output/{version}/data/mi_baseline/df_baseline_mi_%i.rds" % i for i in range(1, config["mi_m"] + 1)] shell: """ mkdir -p output/{wildcards.version}/data/mi_baseline Rscript scripts/impute_baseline.R output/{wildcards.version}/data/df_baseline.rds output/{wildcards.version}/data/mi_baseline {config[mi_m]} {config[mi_maxiter]} {config[seed]} """ rule generate_validation_data: input: rules.prepare_data.output, rules.impute_baseline.output output: ["output/{version}/data/validation/df_%s_mi_%i_fold_%i.rds" % (s, i, j) for s in ("train", "test") for i in range(1, config["mi_m"] + 1) for j in range(1, config["folds"] + 1) ] shell: """ mkdir -p output/{wildcards.version}/data/validation Rscript scripts/generate_validation_data.R output/{wildcards.version}/data {config[mi_m]} {config[folds]} {config[seed]} """ # adjust threads by model type def get_rule_threads(wildcards): if wildcards.model in ("locf", "msm"): return 1 else: return config["stan"]["chains"] rule fit_model_validation_set: input: "config.yml", "output/{version}/data/validation/df_train_mi_{i}_fold_{j}.rds" output: "output/{version}/data/validation/posteriors/{model}/df_posterior_mi_{i}_fold_{j}.rds" threads: get_rule_threads shell: """ mkdir -p output/{wildcards.version}/data/validation/posteriors/{wildcards.model} Rscript models/{wildcards.model}/fit.R {input[1]} {output} """ # helper rule to just build all posterior datasets rule model_posteriors: input: ["output/v1.1/data/validation/posteriors/%s/df_posterior_mi_%i_fold_%i.rds" % (m, i, j) for m in ("locf", "msm", "gp", "gp_nb", "mm", "mm_nb") for i in range(1, config["mi_m"] + 1) for j in range(1, config["folds"] + 1) ] rule model_assessment: input: pop_report = rules.prepare_data.output, posteriors = rules.model_posteriors.input, markdown = "reports/model_assessment.Rmd" output: pdf = "output/{version}/model_assessment.pdf", figures = "output/{version}/model_assessment_figures.zip" shell: """ mkdir -p output/{wildcards.version} Rscript -e "rmarkdown::render('{input.markdown}', output_dir = 'output/{wildcards.version}', params = list(data_dir = '../output/{wildcards.version}/data', config_file = '../config.yml'))" mv reports/figures.zip {output.figures} """ rule generate_imputation_data: input: rules.prepare_data.output, rules.impute_baseline.output output: ["output/{version}/data/imputation/df_mi_%i.rds" % i for i in range(1, config["mi_m"] + 1) ] shell: """ mkdir -p output/{wildcards.version}/data/imputation Rscript scripts/generate_imputation_data.R output/{wildcards.version}/data {config[mi_m]} """ # rules for imputing on entire dataset rule model_impute: input: "config.yml", "output/{version}/data/imputation/df_mi_{i}.rds" output: "output/{version}/data/imputation/{model}/df_gose_imputed_mi_{i}.rds" threads: get_rule_threads shell: """ mkdir -p output/{wildcards.version}/data/imputation/{wildcards.model} Rscript models/{wildcards.model}/fit.R {input[1]} {output} """ # final reported values are a combination of the imputed and per-protocol # observed ones rule post_process_imputations: input: "config.yml", ["output/{version}/data/imputation/{model}/df_gose_imputed_mi_%i.rds" % i for i in range(1, config["mi_m"] + 1) ] output: "output/{version}/data/imputation/{model}/df_gose_imputed.csv" shell: """ mkdir -p output/{wildcards.version}/data/imputation/{wildcards.model} Rscript scripts/post_process_imputations.R output/{wildcards.version}/data/imputation/{wildcards.model} output/{wildcards.version}/data/df_gose.rds {output} """