Commit 3ba0a87b authored by Kevin Kunzmann's avatar Kevin Kunzmann

...

parent 494f49f5
configfile: "config/parameters.yml" configfile: "config/parameters.yml"
singularity: "container.sif" singularity: "container.sif"
localrules: impute, generate_samples_file localrules: data
# download vcf files using gsutil (needs to be set up and configured!) and
rule impute: # variant id <-> position mapping for GTEx v8 (based on hg38p7 assembly)
input: rule data:
expand("{output_dir}/imputed-gene-expressions/{region}.expression.txt.gz", region = config['brain_regions'], output_dir = config['output_dir'])
# download vcf files using gsutil (needs to be set up and configured!
rule download_imputed_genotype_chromosome:
output: output:
"output/imputed-genotypes/chromosome-{i}.vcf.gz" expand(
"{output_dir}/imputed-genotypes/chromosome-{i}.vcf.gz",
output_dir=config["output_dir"],
i=range(1,24)
),
expand(
"{output_dir}/GTEx_v8_hg38p7_variant_lookup_table.txt.gz",
output_dir=config["output_dir"]
)
shell: shell:
""" """
setr -ex set -ex
mkdir -p output/imputed-genotypes mkdir -p {config[output_dir]}/imputed-genotypes
FILEDIR=gs://fimm-horizon-outgoing-data/20201002-center-tbi-genetic-data/genome-wide-imputation-data FILEDIR=gs://fimm-horizon-outgoing-data/genetic-data/imputed-genomes/20200306/all-acgm-filtered
gsutil cp $FILEDIR/chromosome-{wildcards.i}.vcf.gz {output} for i in {{1..23}}
do
gsutil cp $FILEDIR/chromosome-$i.vcf.gz \
{config[output_dir]}/imputed-genotypes/chromosome-$i.vcf.gz
done
# download official GTEx version 8 mapping of variants to hg38 positions
# from https://gtexportal.org/home/datasets, 'reference', 2020-03-06
wget https://storage.googleapis.com/gtex_analysis_v8/reference/GTEx_Analysis_2017-06-05_v8_WholeGenomeSeq_838Indiv_Analysis_Freeze.lookup_table.txt.gz \
-O {config[output_dir]}/GTEx_v8_hg38p7_variant_lookup_table.txt.gz
""" """
rule dosages: rule dosage:
input: input:
expand("{out}/imputed-genotypes/chromosome-22.vcf.gz", out=config["output_dir"]) "{out}/imputed-genotypes/chromosome-{i}.vcf.gz",
"{out}/GTEx_v8_hg38p7_variant_lookup_table.txt.gz"
output: output:
expand("{out}/imputed-genotypes/chromosome-22.dosage.txt.gz", out=config["output_dir"]) "{out}/dosages/chromosome-{i}.dosage.txt.gz"
shell: shell:
""" """
set -ex set -ex
mkdir -p {config[output_dir]}/dosages mkdir -p {config[output_dir]}/dosages
# filter for SNPs of defined quality and extract dosage
pv {input} | \ pv {input} | \
bcftools filter -e 'MAF[0]<{config[min_MAF]} | INFO<{config[min_INFO]} | TYPE!="snp" | N_ALT!=1' | \ bcftools filter -e 'MAF[0]<{config[min_MAF]} | INFO<{config[min_INFO]} | TYPE!="snp" | N_ALT!=1' | \
bcftools +fill-tags | \ bcftools +fill-tags | \
bcftools query -f \ '%CHROM %ID %POS %REF %ALT %INFO/MAF [%DS ]\n' > \ bcftools query -f \ '%CHROM %ID %POS %REF %ALT %INFO/MAF [%DS ]\n' > \
{config[output_dir]}/dosages/chromosome-22.dosage.txt {config[output_dir]}/dosages/chromosome-{wildcards.i}.dosage.txt
gzip {config[output_dir]}/dosages/chromosome-22.dosage.txt # compress
# convert locations to GTEX v8 by hg38 position gzip {config[output_dir]}/dosages/chromosome-{wildcards.i}.dosage.txt
# convert locations to GTEx v8 by hg38 position
""" """
# compute dosage for all 23 chromosomes
rule dosages:
input:
rules.data.output,
expand(
"{out}/imputed-genotypes/chromosome-{i}.vcf.gz",
out=config["output_dir"],
i=range(1,24)
)
rule samples_file: rule samples_file:
input: input:
...@@ -57,29 +82,30 @@ rule samples_file: ...@@ -57,29 +82,30 @@ rule samples_file:
""" """
rule impute_gene_expressions: rule grex:
input: input:
"container.sif", samples_file = expand(
samples_file = expand("{output_dir}/dosages/samples.txt", output_dir = config['output_dir']), "{output_dir}/dosages/samples.txt",
dosage_files = expand("{output_dir}/dosages/chr{i}.dosage.txt.gz",
i = list(map(str, range(1, 23))) + ['X'],
output_dir = config['output_dir'] output_dir = config['output_dir']
),
dosage_files = expand(
"{output_dir}/dosages/chromosome-{i}.dosage.txt.gz",
i=range(1,24) ,
output_dir=config["output_dir"]
) )
output: output:
"{output_dir}/imputed-gene-expressions/{region}.expression.txt.gz" "{output_dir}/imputed-grex/{region}.igrex.txt.gz"
singularity:
"container.sif"
shell: shell:
""" """
mkdir -p {wildcards.output_dir}/imputed-gene-expressions mkdir -p {wildcards.output_dir}/imputed-grex
predixcan \ predixcan \
--predict \ --predict \
--dosages {config[output_dir]}/dosages \ --dosages {config[output_dir]}/dosages \
--dosages_prefix chr \ --dosages_prefix chromosome- \
--samples samples.txt \ --samples samples.txt \
--weights /usr/predixcan/GTEx-V7_HapMap-2017-11-29/gtex_v7_Brain_{wildcards.region}_imputed_europeans_tw_0.5_signif.db \ --weights /usr/predixcan/GTEx-V7_HapMap-2017-11-29/gtex_v7_Brain_{wildcards.region}_imputed_europeans_tw_0.5_signif.db \
--output_prefix {wildcards.output_dir}/imputed-gene-expressions/{wildcards.region} --output_prefix {wildcards.output_dir}/imputed-grex/{wildcards.region}
mv {wildcards.output_dir}/imputed-gene-expressions/{wildcards.region}_predicted_expression.txt \ mv {wildcards.output_dir}/imputed-grex/{wildcards.region}_predicted_expression.txt \
{wildcards.output_dir}/imputed-gene-expressions/{wildcards.region}.expression.txt {wildcards.output_dir}/imputed-grex/{wildcards.region}.igrex.txt
gzip {wildcards.output_dir}/imputed-gene-expressions/{wildcards.region}.expression.txt gzip {wildcards.output_dir}/imputed-grex/{wildcards.region}.igrex.txt
""" """
...@@ -28,7 +28,7 @@ From: rocker/verse:3.6.2 ...@@ -28,7 +28,7 @@ From: rocker/verse:3.6.2
export BCFVER=1.10.2 export BCFVER=1.10.2
apt-get -y install \ apt-get -y install \
gcc wget make zlib1g zlib1g-dev libbz2-dev liblzma-dev libcurl4-openssl-dev gcc wget make zlib1g zlib1g-dev libbz2-dev liblzma-dev libcurl4-openssl-dev
wget https://github.com/samtools/bcftools/releases/download/1.9/bcftools-$BCFVER.tar.bz2 wget https://github.com/samtools/bcftools/releases/download/$BCFVER/bcftools-$BCFVER.tar.bz2
tar -xvjf bcftools-$BCFVER.tar.bz2 tar -xvjf bcftools-$BCFVER.tar.bz2
cd bcftools-$BCFVER cd bcftools-$BCFVER
./configure --prefix=/usr/bcftools ./configure --prefix=/usr/bcftools
...@@ -36,21 +36,25 @@ From: rocker/verse:3.6.2 ...@@ -36,21 +36,25 @@ From: rocker/verse:3.6.2
make install make install
(cd /usr/bin; ln -s /usr/bcftools/bin/bcftools bcftools) (cd /usr/bin; ln -s /usr/bcftools/bin/bcftools bcftools)
# install PrediXcan and python dependencies (uses python 2.7) # install MetaXcan and python dependencies
apt-get -y install \ mkdir -p /usr/MetaXcan
wget python-pip (cd /usr/MetaXcan; git clone https://github.com/hakyimlab/MetaXcan; cd MetaXcan; git checkout b5a1741244c32a30cd16d5eca5da4d0f38bc90a7)
wget https://raw.githubusercontent.com/hakyimlab/PrediXcan/master/Software/PrediXcan.py -O /usr/bin/predixcan ln -s /usr/MetaXcan/software/PrediXcan.py /usr/bin/predixcan
chmod +x /usr/bin/predixcan chmod +x /usr/bin/predixcan
pip install \ pip3 install \
argparse datetime numpy numpy==1.18.1 scipy==1.4.1 pandas==1.0.1 mock==4.0.1 sqlalchemy==1.3.13 \
patsy==0.5.1 statsmodels==0.11.1 h5py==2.10.0 h5py-cache=1.0.1
# download, extract and store (brain) weights # download, extract and store weights
mkdir /usr/predixcan apt-get -y install wget
wget https://zenodo.org/record/3518299/files/mashr_eqtl.tar?download=1 -O /usr/predixcan/mashr_eqtl.tar.gz mkdir /usr/MetaXcan/weights
wget https://zenodo.org/record/3518299/files/mashr_eqtl.tar?download=1 \
-O /usr/MetaXcan/weights/mashr_eqtl.tar.gz
[todo untar]
# predixcan connects to the weights database with sql, needs write permission # predixcan connects to the weights database with sql, needs write permission
# even if the file system will be read only for the container # even if the file system will be read only for the container
chmod -R 777 /usr/predixcan chmod -R 777 /usr/MetaXcan
# install R packages # install R packages
Rscript /tmp/install.R Rscript /tmp/install.R
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment