Commit e7a546d4 authored by van den Berg's avatar van den Berg
Browse files

Make the scatter output dynamic

dynamic is a special keyword in Snakemake that can be used to mark
outputs when the number of output files is not known before execution.
This is the case when using scatterregions, since the number of outputs
depends on the size of the reference genome and the size of the
scattered chunks.

See
https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#dynamic-files
for details
parent f0e48bfc
......@@ -325,7 +325,7 @@ rule scatterregions:
input:
ref = REFERENCE,
output:
regions = "scatter/scatter-{chunk}.bed"
regions = dynamic("scatter/scatter-{chunk}.bed")
singularity: containers["biopet-scatterregions"]
shell: "mkdir -p scatter && "
"biopet-scatterregions "
......@@ -354,7 +354,7 @@ rule gvcf_scatter:
rule gvcf_gather:
"""Gather all GVCF scatters"""
input:
gvcfs = "{sample}/vcf/{sample}.{chunk}.part.vcf.gz",
gvcfs = dynamic("{sample}/vcf/{sample}.{chunk}.part.vcf.gz"),
output:
gvcf = "{sample}/vcf/{sample}.g.vcf.gz"
singularity: containers["bcftools"]
......
......@@ -60,3 +60,23 @@
- path: "micro/vcf/micro.13.part.vcf.gz"
- path: "scatter/scatter-1.bed"
- path: "scatter/scatter-13.bed"
- name: test-new-scatter-gvcf
tags:
- integration
- scatter
command: >
snakemake
--use-singularity
--singularity-prefix /tmp/singularity
--singularity-args ' --cleanenv --bind /tmp'
--jobs 1 -w 120
-r -p -s Snakefile
micro/vcf/micro.g.vcf.gz
--config
REFERENCE=tests/data/ref.fa
DBSNP=tests/data/database.vcf.gz
KNOWN_SITES=tests/data/database.vcf.gz
SAMPLE_CONFIG=tests/data/sample_config.json
files:
- path: "micro/vcf/micro.g.vcf.gz"
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment