# edit configuration here (see cluster.json for running on slurm scheduler) configfile: "config.yml" localrules: impute, clean, download_container, generate_samples_file # this rule runs the entire pipeline and creates a R data set file (rds) # containing all imputed expression values rule impute: input: "container.sif", expand("{output_dir}/imputed-gene-expressions/{region}.expression.txt.gz", region = config['brain_regions'], output_dir = config['output_dir']) # delete output and logs (if run on slurm cluster) rule clean: shell: """ rm -rf {config[output_dir]} rm -rf logs rm -rf nohup.out """ # download vcf files using gsutil (needs to be set up and configured! rule download_imputed_genotype_chromosome: output: "output/imputed-genotypes/CENTER_TBI_imputed_genotype_chr{i}.vcf.gz" singularity: "container.sif" shell: """ mkdir -p output/imputed-genotypes gsutil cp gs://fimm-horizon-outgoing-data/CENTER_TBI_data_freeze_191014/Imputed_data/CENTER.TBI_imputed_N3741_hg19_HRC_MAC1_freeze_191014_chr1.vcf.gz \ output/imputed-genotypes/CENTER_TBI_imputed_genotype_chr{wildcards.i}.vcf.gz """ rule download_imputed_genotypes: input: expand("output/imputed-genotypes/CENTER_TBI_imputed_genotype_chr{i}.vcf.gz", i = list(map(str, range(1, 23))) + ['X'] ) # download the required singularity container from zenodo.org rule download_container: output: "container.sif" shell: """ bash scripts/download_container.sh """ # extract dosages in custom PrediXcan format from vcf file rule vcf_to_dosages: input: "container.sif", vcf_gz_file = "output/imputed-genotypes/CENTER_TBI_imputed_genotype_chr{i}.vcf.gz" output: "{output_dir}/dosages/chr{i}.dosage.txt.gz" singularity: "container.sif" shell: """ export prefix={wildcards.output_dir}/dosages mkdir -p $prefix echo "decompress and filter out: 'MAF[0]<{config[min_MAF]} | INFO<{config[min_INFO]} | TYPE!="snp" | N_ALT!=1' ..." bcftools filter -e 'MAF[0]<{config[min_MAF]} | INFO<{config[min_INFO]} | TYPE!="snp" | N_ALT!=1' {input.vcf_gz_file} > $prefix/chr{wildcards.i}_.vcf echo "add MAFs ..." bcftools +fill-tags $prefix/chr{wildcards.i}_.vcf > $prefix/chr{wildcards.i}.vcf rm $prefix/chr{wildcards.i}_.vcf echo 'querying dosages ...' bcftools query -f '%CHROM %ID %POS %REF %ALT %INFO/MAF [%DS ]\n' $prefix/chr{wildcards.i}.vcf > $prefix/chr{wildcards.i}.dosage.txt rm $prefix/chr{wildcards.i}.vcf echo 'compressing ...' gzip $prefix/chr{wildcards.i}.dosage.txt printf "done.\n\r\n\r" """ # extract sample file for PrediXcan rule generate_samples_file: input: "container.sif", vcf_gz_file = "output/imputed-genotypes/CENTER_TBI_imputed_genotype_chr1.vcf.gz" output: "{output_dir}/dosages/samples.txt" singularity: "container.sif" params: format = "\'{ printf(\"%s %s\\n\", $1, $1); }\'" shell: """ export prefix={wildcards.output_dir}/dosages mkdir -p $prefix bcftools query -l {input.vcf_gz_file} >> $prefix/samples_.txt # family ID = individual ID awk {params.format} < $prefix/samples_.txt > $prefix/samples.txt rm $prefix/samples_.txt """ # run PrediXcan to impute gene expression for individual tissue type rule impute_gene_expressions: input: "container.sif", samples_file = expand("{output_dir}/dosages/samples.txt", output_dir = config['output_dir']), dosage_files = expand("{output_dir}/dosages/chr{i}.dosage.txt.gz", i = list(map(str, range(1, 23))) + ['X'], output_dir = config['output_dir'] ) output: "{output_dir}/imputed-gene-expressions/{region}.expression.txt.gz" singularity: "container.sif" shell: """ mkdir -p {wildcards.output_dir}/imputed-gene-expressions predixcan \ --predict \ --dosages {config[output_dir]}/dosages \ --dosages_prefix chr \ --samples samples.txt \ --weights /usr/predixcan/GTEx-V7_HapMap-2017-11-29/gtex_v7_Brain_{wildcards.region}_imputed_europeans_tw_0.5_signif.db \ --output_prefix {wildcards.output_dir}/imputed-gene-expressions/{wildcards.region} mv {wildcards.output_dir}/imputed-gene-expressions/{wildcards.region}_predicted_expression.txt \ {wildcards.output_dir}/imputed-gene-expressions/{wildcards.region}.expression.txt gzip {wildcards.output_dir}/imputed-gene-expressions/{wildcards.region}.expression.txt """