Commit 18832746 authored by Kevin Kunzmann's avatar Kevin Kunzmann

earlier filtering

parent 5b0d1751
...@@ -69,16 +69,21 @@ rule vcf_to_dosages: ...@@ -69,16 +69,21 @@ rule vcf_to_dosages:
""" """
export prefix={wildcards.output_dir}/dosages export prefix={wildcards.output_dir}/dosages
mkdir -p $prefix mkdir -p $prefix
echo "extracting and computing MAFs ..." echo "decompress and filter out: 'MAF[0]<{config[min_MAF]} | INFO<{config[min_INFO]} | TYPE!="snp" | N_ALT!=1' ..."
bcftools +fill-tags {input.vcf_gz_file} > $prefix/chr{wildcards.i}.vcf bcftools filter -e 'MAF[0]<{config[min_MAF]} | INFO<{config[min_INFO]} | TYPE!="snp" | N_ALT!=1' > $prefix/chr{wildcards.i}_.vcf
echo "add computing MAFs ..."
bcftools $prefix/chr{wildcards.i}.vcf_ > $prefix/chr{wildcards.i}.vcf
rm $prefix/chr{wildcards.i}_.vcf
echo 'querying dosages ...' echo 'querying dosages ...'
bcftools query -e 'MAF[0]<{config[min_MAF]} | INFO<{config[min_INFO]} | TYPE!="snp" | N_ALT!=1' -f '%CHROM %ID %POS %REF %ALT %INFO/MAF [%DS ]\n' $prefix/chr{wildcards.i}.vcf > $prefix/chr{wildcards.i}.dosage.txt bcftools query -f '%CHROM %ID %POS %REF %ALT %INFO/MAF [%DS ]\n' $prefix/chr{wildcards.i}.vcf > $prefix/chr{wildcards.i}.dosage.txt
rm $prefix/chr{wildcards.i}.vcf
echo 'compressing ...' echo 'compressing ...'
gzip $prefix/chr{wildcards.i}.dosage.txt gzip $prefix/chr{wildcards.i}.dosage.txt
rm $prefix/chr{wildcards.i}.vcf
printf "done.\n\r\n\r" printf "done.\n\r\n\r"
""" """
bcftools +fill-tags -e 'MAF[0]<0.01 | INFO<0.8 | TYPE!="snp" | N_ALT!=1' CENTER_TBI_imputed_3695_1K_MAC1_freeze_190829_chr1.vcf.gz > chr1.vcf
# extract sample file for PrediXcan # extract sample file for PrediXcan
rule generate_samples_file: rule generate_samples_file:
input: input:
......
mkdir logs mkdir logs
nohup snakemake $1 \ nohup snakemake $1 \
--jobs 5 \ --jobs 99 \
--use-singularity \ --use-singularity \
--cluster-config cluster.json \ --cluster-config cluster.json \
--cluster "sbatch -A {cluster.account} -p {cluster.partition} --ntasks {cluster.ntasks} --cpus-per-task {cluster.ncpu} --nodes {cluster.nodes} -t {cluster.time} --job-name {cluster.name} --output {cluster.output} --error {cluster.error}" & --cluster "sbatch -A {cluster.account} -p {cluster.partition} --ntasks {cluster.ntasks} --cpus-per-task {cluster.ncpu} --nodes {cluster.nodes} -t {cluster.time} --job-name {cluster.name} --output {cluster.output} --error {cluster.error}" &
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment