Commit e2c4cf01 authored by Kevin Kunzmann's avatar Kevin Kunzmann

keep everything seperate

parent e211fe26
...@@ -10,15 +10,7 @@ localrules: impute, clean, download_container, generate_samples_file ...@@ -10,15 +10,7 @@ localrules: impute, clean, download_container, generate_samples_file
rule impute: rule impute:
input: input:
"container.sif", "container.sif",
expand("{output_dir}/imputed-gene-expressions/{region}.expression.txt", region = config['brain_regions'], output_dir = config['output_dir']) expand("{output_dir}/imputed-gene-expressions/{region}.expression.txt.gz", region = config['brain_regions'], output_dir = config['output_dir'])
output:
expand("{output_dir}/imputed-gene-expressionss_combined.rds", output_dir = config['output_dir'])
singularity:
"container.sif"
shell:
"""
Rscript scripts/combine_expression_data.R
"""
# delete output and logs (if run on slurm cluster) # delete output and logs (if run on slurm cluster)
rule clean: rule clean:
...@@ -108,9 +100,12 @@ rule impute_gene_expressions: ...@@ -108,9 +100,12 @@ rule impute_gene_expressions:
input: input:
"container.sif", "container.sif",
samples_file = expand("{output_dir}/dosages/samples.txt", output_dir = config['output_dir']), samples_file = expand("{output_dir}/dosages/samples.txt", output_dir = config['output_dir']),
dosage_files = expand("{output_dir}/dosages/chr{i}.dosage.txt.gz", i = range(1, 23), output_dir = config['output_dir']) dosage_files = expand("{output_dir}/dosages/chr{i}.dosage.txt.gz",
i = list(map(str, range(1, 23))) + ['X'],
output_dir = config['output_dir']
)
output: output:
"{output_dir}/imputed-gene-expressions/{region}.expression.txt" "{output_dir}/imputed-gene-expressions/{region}.expression.txt.gz"
singularity: singularity:
"container.sif" "container.sif"
shell: shell:
...@@ -125,4 +120,5 @@ rule impute_gene_expressions: ...@@ -125,4 +120,5 @@ rule impute_gene_expressions:
--output_prefix {wildcards.output_dir}/imputed-gene-expressions/{wildcards.region} --output_prefix {wildcards.output_dir}/imputed-gene-expressions/{wildcards.region}
mv {wildcards.output_dir}/imputed-gene-expressions/{wildcards.region}_predicted_expression.txt \ mv {wildcards.output_dir}/imputed-gene-expressions/{wildcards.region}_predicted_expression.txt \
{wildcards.output_dir}/imputed-gene-expressions/{wildcards.region}.expression.txt {wildcards.output_dir}/imputed-gene-expressions/{wildcards.region}.expression.txt
gzip {wildcards.output_dir}/imputed-gene-expressions/{wildcards.region}.expression.txt
""" """
#!/usr/bin/env Rscript
library(dplyr, warn.conflicts = FALSE)
library(readr, warn.conflicts = FALSE)
config <- yaml::read_yaml('config.yml')
col_types <- cols(
.default = col_double(),
FID = col_character(),
IID = col_character()
)
# read and combine individual expression data in long format
config$brain_regions %>%
purrr::map(
function(x) {
sprintf("%s/imputed-gene-expressions/%s.expression.txt", config$output_dir, x) %>%
read_tsv(col_types = col_types, progress = FALSE) %>%
tidyr::gather('ensembl_gene_id', 'expression', -FID, -IID) %>%
mutate(tissue = x) %>%
select(tissue, everything())
}
) %>%
{do.call(rbind, .)} %>%
# make missing genes explicit
tidyr::spread(ensembl_gene_id, expression, fill = NA_real_) %>%
tidyr::gather('ensembl_gene_id', 'expression', -FID, -IID, -tissue) %>%
write_rds(sprintf('%s/imputed-gene-expressionss_combined.rds', config$output_dir), compress = 'gz')
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment