• Kevin's avatar
    ... · e9ba7f09
    Kevin authored
    e9ba7f09
Snakefile 6.67 KB
singularity: "docker://kkmann/gose-6mo-imputation@sha256:85724229d8f4243aaebd6228e5cc7833474577ac107f9719b00016765f9ee342"

configfile: "config.yml"



rule download_data:
    output:
        "data/{version}/df_baseline.rds",
        "data/{version}/df_ctmri.rds",
        "data/{version}/df_imaging.rds",
        "data/{version}/df_labs.rds",
        "data/{version}/df_gose.rds"
    shell:
        """
        bash scripts/download_{wildcards.version}.sh
        """

rule download_data_v1_1:
    input:
        "data/v1.1/df_baseline.rds",
        "data/v1.1/df_ctmri.rds",
        "data/v1.1/df_imaging.rds",
        "data/v1.1/df_labs.rds",
        "data/v1.1/df_gose.rds"


rule prepare_data:
    input:
        rules.download_data.output,
        markdown = "reports/prepare_data.Rmd"
    output:
        "output/{version}/data/df_gose.rds",
        "output/{version}/data/df_baseline.rds",
        "output/{version}/prepare_data.html",
        figures = "output/{version}/prepare_data_figures.zip"
    shell:
        """
        mkdir -p output/{wildcards.version}/data
        Rscript -e "rmarkdown::render(\\"{input.markdown}\\", output_dir = \\"output/{wildcards.version}\\", params = list(datapath = \\"../data/{wildcards.version}\\", max_lab_days = {config[max_lab_days]}, seed = {config[seed]}, age_min = {config[age_min]}, age_max = {config[age_max]}))"
        mv reports/*.rds output/{wildcards.version}/data
        mv reports/figures.zip {output.figures}
        """



rule impute_baseline:
    input:
        rules.prepare_data.output
    output:
        ["output/{version}/data/mi_baseline/df_baseline_mi_%i.rds" % i for i in range(1, config["mi_m"] + 1)]
    shell:
        """
        mkdir -p output/{wildcards.version}/data/mi_baseline
        Rscript scripts/impute_baseline.R output/{wildcards.version}/data/df_baseline.rds output/{wildcards.version}/data/mi_baseline {config[mi_m]} {config[mi_maxiter]} {config[seed]}
        """



rule generate_validation_data:
    input:
        rules.prepare_data.output,
        rules.impute_baseline.output
    output:
        ["output/{version}/data/validation/df_%s_mi_%i_fold_%i.rds" % (s, i, j)
          for s in ("train", "test")
          for i in range(1, config["mi_m"] + 1)
          for j in range(1, config["folds"] + 1)
        ]
    shell:
        """
        mkdir -p output/{wildcards.version}/data/validation
        Rscript scripts/generate_validation_data.R output/{wildcards.version}/data {config[mi_m]} {config[folds]} {config[seed]}
        """



# adjust threads by model type
def get_rule_threads(wildcards):
    if wildcards.model in ("locf", "msm"):
        return 1
    else:
        return config["stan"]["chains"]

rule fit_model_validation_set:
    input:
        "config.yml",
        "output/{version}/data/validation/df_train_mi_{i}_fold_{j}.rds"
    output:
        "output/{version}/data/validation/posteriors/{model}/df_posterior_mi_{i}_fold_{j}.rds"
    threads:
        get_rule_threads
    shell:
        """
        mkdir -p output/{wildcards.version}/data/validation/posteriors/{wildcards.model}
        Rscript models/{wildcards.model}/fit.R {input[1]} {output}
        """



# helper rule to just build all posterior datasets
rule model_posteriors:
    input:
        ["output/v1.1/data/validation/posteriors/%s/df_posterior_mi_%i_fold_%i.rds" % (m, i, j)
          for m in ("locf", "msm", "msm_age", "gp", "gp_nb", "mm", "mm_nb")
          for i in range(1, config["mi_m"] + 1)
          for j in range(1, config["folds"] + 1)
        ]



# rules for imputing on entire dataset
rule generate_imputation_data:
    input:
        rules.prepare_data.output,
        rules.impute_baseline.output
    output:
        ["output/{version}/data/imputation/df_mi_%i.rds" % i
            for i in range(1, config["mi_m"] + 1)
        ]
    shell:
        """
        mkdir -p output/{wildcards.version}/data/imputation
        Rscript scripts/generate_imputation_data.R output/{wildcards.version}/data {config[mi_m]}
        """



rule model_impute:
    input:
        "config.yml",
        "output/{version}/data/imputation/df_mi_{i}.rds"
    output:
        "output/{version}/data/imputation/{model}/df_gose_imputed_mi_{i}.rds"
    threads:
        get_rule_threads
    shell:
        """
        mkdir -p output/{wildcards.version}/data/imputation/{wildcards.model}
        Rscript models/{wildcards.model}/fit.R {input[1]} {output}
        """

# final reported values are a combination of the imputed and per-protocol
# observed ones
rule post_process_imputations:
    input:
        "config.yml",
        ["output/{version}/data/imputation/{model}/df_gose_imputed_mi_%i.rds" % i
            for i in range(1, config["mi_m"] + 1)
        ],
        "data/{version}/df_baseline.rds"
    output:
        "output/{version}/data/imputation/{model}/df_gose_imputed.csv"
    shell:
        """
        mkdir -p output/{wildcards.version}/data/imputation/{wildcards.model}
        Rscript scripts/post_process_imputations.R output/{wildcards.version}/data/imputation/{wildcards.model} output/{wildcards.version}/data/df_gose.rds data/{wildcards.version}/df_baseline.rds {output}
        """

rule imputation_report:
    input:
        "config.yml",
        rules.post_process_imputations.output,
        markdown = "reports/imputations.Rmd"
    output:
        html    = "output/{version}/gose_imputations_{model}.html",
        figures = "output/{version}/gose_imputations_{model}_figures.zip"
    shell:
        """
        mkdir -p output/{wildcards.version}
        Rscript -e "rmarkdown::render(\\"{input.markdown}\\", params = list(data_dir = \\"../output/{wildcards.version}/data\\", imputations = \\"../output/v1.1/data/imputation/{wildcards.model}/df_gose_imputed.csv\\"))"
        mv reports/imputations.html {output.html}
        mv reports/figures.zip {output.figures}
        """

# define corresponding target rule for ease of use
rule impute_msm_v1_1:
    input:
        html    = "output/v1.1/gose_imputations_msm.html",
        figures = "output/v1.1/gose_imputations_msm_figures.zip"




rule manuscript:
    input:
        pop_report = rules.prepare_data.output,
        posteriors = rules.model_posteriors.input,
        markdown   = "manuscript/manuscript.Rmd"
    output:
        pdf     = "output/{version}/manuscript.docx",
        figures = "output/{version}/manuscript_figures.zip"
    shell:
        """
        mkdir -p output/{wildcards.version}
        Rscript -e "rmarkdown::render(\\"{input.markdown}\\", output_dir = \\"output/{wildcards.version}\\", params = list(data_dir = \\"../output/{wildcards.version}/data\\", config_file = \\"../config.yml\\"))"
        mv manuscript/figures.zip {output.figures}
        """

rule manuscript_v1_1:
    input:
        "output/v1.1/manuscript.docx"