Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
impute-gene-expression
Project overview
Project overview
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Commits
Issue Boards
Open sidebar
Kevin Kunzmann
impute-gene-expression
Commits
3ba0a87b
Commit
3ba0a87b
authored
Mar 06, 2020
by
Kevin Kunzmann
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
...
parent
494f49f5
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
75 additions
and
45 deletions
+75
-45
Snakefile
Snakefile
+60
-34
scripts/container.def
scripts/container.def
+15
-11
No files found.
Snakefile
View file @
3ba0a87b
configfile: "config/parameters.yml"
singularity: "container.sif"
localrules:
impute, generate_samples_file
localrules:
data
rule impute:
input:
expand("{output_dir}/imputed-gene-expressions/{region}.expression.txt.gz", region = config['brain_regions'], output_dir = config['output_dir'])
# download vcf files using gsutil (needs to be set up and configured!
rule download_imputed_genotype_chromosome:
# download vcf files using gsutil (needs to be set up and configured!) and
# variant id <-> position mapping for GTEx v8 (based on hg38p7 assembly)
rule data:
output:
"output/imputed-genotypes/chromosome-{i}.vcf.gz"
expand(
"{output_dir}/imputed-genotypes/chromosome-{i}.vcf.gz",
output_dir=config["output_dir"],
i=range(1,24)
),
expand(
"{output_dir}/GTEx_v8_hg38p7_variant_lookup_table.txt.gz",
output_dir=config["output_dir"]
)
shell:
"""
setr -ex
mkdir -p output/imputed-genotypes
FILEDIR=gs://fimm-horizon-outgoing-data/20201002-center-tbi-genetic-data/genome-wide-imputation-data
gsutil cp $FILEDIR/chromosome-{wildcards.i}.vcf.gz {output}
set -ex
mkdir -p {config[output_dir]}/imputed-genotypes
FILEDIR=gs://fimm-horizon-outgoing-data/genetic-data/imputed-genomes/20200306/all-acgm-filtered
for i in {{1..23}}
do
gsutil cp $FILEDIR/chromosome-$i.vcf.gz \
{config[output_dir]}/imputed-genotypes/chromosome-$i.vcf.gz
done
# download official GTEx version 8 mapping of variants to hg38 positions
# from https://gtexportal.org/home/datasets, 'reference', 2020-03-06
wget https://storage.googleapis.com/gtex_analysis_v8/reference/GTEx_Analysis_2017-06-05_v8_WholeGenomeSeq_838Indiv_Analysis_Freeze.lookup_table.txt.gz \
-O {config[output_dir]}/GTEx_v8_hg38p7_variant_lookup_table.txt.gz
"""
rule dosage
s
:
rule dosage:
input:
expand("{out}/imputed-genotypes/chromosome-22.vcf.gz", out=config["output_dir"])
"{out}/imputed-genotypes/chromosome-{i}.vcf.gz",
"{out}/GTEx_v8_hg38p7_variant_lookup_table.txt.gz"
output:
expand("{out}/imputed-genotypes/chromosome-22.dosage.txt.gz", out=config["output_dir"])
"{out}/dosages/chromosome-{i}.dosage.txt.gz"
shell:
"""
set -ex
mkdir -p {config[output_dir]}/dosages
# filter for SNPs of defined quality and extract dosage
pv {input} | \
bcftools filter -e 'MAF[0]<{config[min_MAF]} | INFO<{config[min_INFO]} | TYPE!="snp" | N_ALT!=1' | \
bcftools +fill-tags | \
bcftools query -f \ '%CHROM %ID %POS %REF %ALT %INFO/MAF [%DS ]\n' > \
{config[output_dir]}/dosages/chromosome-22.dosage.txt
gzip {config[output_dir]}/dosages/chromosome-22.dosage.txt
# convert locations to GTEX v8 by hg38 position
{config[output_dir]}/dosages/chromosome-{wildcards.i}.dosage.txt
# compress
gzip {config[output_dir]}/dosages/chromosome-{wildcards.i}.dosage.txt
# convert locations to GTEx v8 by hg38 position
"""
# compute dosage for all 23 chromosomes
rule dosages:
input:
rules.data.output,
expand(
"{out}/imputed-genotypes/chromosome-{i}.vcf.gz",
out=config["output_dir"],
i=range(1,24)
)
rule samples_file:
input:
...
...
@@ -57,29 +82,30 @@ rule samples_file:
"""
rule
impute_gene_expressions
:
rule
grex
:
input:
"container.sif",
samples_file = expand("{output_dir}/dosages/samples.txt", output_dir = config['output_dir']),
dosage_files = expand("{output_dir}/dosages/chr{i}.dosage.txt.gz",
i = list(map(str, range(1, 23))) + ['X'],
samples_file = expand(
"{output_dir}/dosages/samples.txt",
output_dir = config['output_dir']
),
dosage_files = expand(
"{output_dir}/dosages/chromosome-{i}.dosage.txt.gz",
i=range(1,24) ,
output_dir=config["output_dir"]
)
output:
"{output_dir}/imputed-gene-expressions/{region}.expression.txt.gz"
singularity:
"container.sif"
"{output_dir}/imputed-grex/{region}.igrex.txt.gz"
shell:
"""
mkdir -p {wildcards.output_dir}/imputed-g
ene-expressions
mkdir -p {wildcards.output_dir}/imputed-g
rex
predixcan \
--predict \
--dosages {config[output_dir]}/dosages \
--dosages_prefix chr \
--dosages_prefix chr
omosome-
\
--samples samples.txt \
--weights /usr/predixcan/GTEx-V7_HapMap-2017-11-29/gtex_v7_Brain_{wildcards.region}_imputed_europeans_tw_0.5_signif.db \
--output_prefix {wildcards.output_dir}/imputed-g
ene-expressions
/{wildcards.region}
mv {wildcards.output_dir}/imputed-g
ene-expressions
/{wildcards.region}_predicted_expression.txt \
{wildcards.output_dir}/imputed-g
ene-expressions/{wildcards.region}.expression
.txt
gzip {wildcards.output_dir}/imputed-g
ene-expressions/{wildcards.region}.expression
.txt
--output_prefix {wildcards.output_dir}/imputed-g
rex
/{wildcards.region}
mv {wildcards.output_dir}/imputed-g
rex
/{wildcards.region}_predicted_expression.txt \
{wildcards.output_dir}/imputed-g
rex/{wildcards.region}.igrex
.txt
gzip {wildcards.output_dir}/imputed-g
rex/{wildcards.region}.igrex
.txt
"""
scripts/container.def
View file @
3ba0a87b
...
...
@@ -28,7 +28,7 @@ From: rocker/verse:3.6.2
export BCFVER=1.10.2
apt-get -y install \
gcc wget make zlib1g zlib1g-dev libbz2-dev liblzma-dev libcurl4-openssl-dev
wget https://github.com/samtools/bcftools/releases/download/
1.9
/bcftools-$BCFVER.tar.bz2
wget https://github.com/samtools/bcftools/releases/download/
$BCFVER
/bcftools-$BCFVER.tar.bz2
tar -xvjf bcftools-$BCFVER.tar.bz2
cd bcftools-$BCFVER
./configure --prefix=/usr/bcftools
...
...
@@ -36,21 +36,25 @@ From: rocker/verse:3.6.2
make install
(cd /usr/bin; ln -s /usr/bcftools/bin/bcftools bcftools)
# install
PrediXcan and python dependencies (uses python 2.7)
apt-get -y install \
wget python-pip
wget https://raw.githubusercontent.com/hakyimlab/PrediXcan/master/Software/PrediXcan.py -O
/usr/bin/predixcan
# install
MetaXcan and python dependencies
mkdir -p /usr/MetaXcan
(cd /usr/MetaXcan; git clone https://github.com/hakyimlab/MetaXcan; cd MetaXcan; git checkout b5a1741244c32a30cd16d5eca5da4d0f38bc90a7)
ln -s /usr/MetaXcan/software/PrediXcan.py
/usr/bin/predixcan
chmod +x /usr/bin/predixcan
pip install \
argparse datetime numpy
pip3 install \
numpy==1.18.1 scipy==1.4.1 pandas==1.0.1 mock==4.0.1 sqlalchemy==1.3.13 \
patsy==0.5.1 statsmodels==0.11.1 h5py==2.10.0 h5py-cache=1.0.1
# download, extract and store (brain) weights
mkdir /usr/predixcan
wget https://zenodo.org/record/3518299/files/mashr_eqtl.tar?download=1 -O /usr/predixcan/mashr_eqtl.tar.gz
# download, extract and store weights
apt-get -y install wget
mkdir /usr/MetaXcan/weights
wget https://zenodo.org/record/3518299/files/mashr_eqtl.tar?download=1 \
-O /usr/MetaXcan/weights/mashr_eqtl.tar.gz
[todo untar]
# predixcan connects to the weights database with sql, needs write permission
# even if the file system will be read only for the container
chmod -R 777 /usr/
predix
can
chmod -R 777 /usr/
MetaX
can
# install R packages
Rscript /tmp/install.R
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment