Commit 745aff96 authored by van den Berg's avatar van den Berg
Browse files

Switch to using snakemake checkpoints

parent 57eaea50
......@@ -320,12 +320,12 @@ rule baserecal:
"-R {input.ref} -cov ReadGroupCovariate -cov QualityScoreCovariate "
"-cov CycleCovariate -cov ContextCovariate {params.known_sites}"
rule scatterregions:
checkpoint scatterregions:
"""Scatter the reference genome"""
input:
ref = REFERENCE,
output:
regions = dynamic("scatter/scatter-{chunk}.bed")
regions = "scatter/scatter-{chunk}.bed"
singularity: containers["biopet-scatterregions"]
shell: "mkdir -p scatter && "
"biopet-scatterregions "
......@@ -341,8 +341,8 @@ rule gvcf_scatter:
ref=REFERENCE,
region="scatter/scatter-{chunk}.bed"
output:
gvcf=temp("{sample}/vcf/{sample}.{chunk}.part.vcf.gz"),
gvcf_tbi=temp("{sample}/vcf/{sample}.{chunk}.part.vcf.gz.tbi")
gvcf=temp("{sample}/vcf/{sample}.{chunk}.g.vcf.gz"),
gvcf_tbi=temp("{sample}/vcf/{sample}.{chunk}.g.vcf.gz.tbi")
singularity: containers["gatk"]
shell: "java -jar -Xmx4G -XX:ParallelGCThreads=1 /usr/GenomeAnalysisTK.jar "
"-T HaplotypeCaller -ERC GVCF -I "
......@@ -351,48 +351,45 @@ rule gvcf_scatter:
"-variant_index_type LINEAR -variant_index_parameter 128000 "
"-BQSR {input.bqsr}"
rule gvcf_gather:
"""Gather all GVCF scatters"""
input:
gvcfs = dynamic("{sample}/vcf/{sample}.{chunk}.part.vcf.gz"),
output:
gvcf = "{sample}/vcf/{sample}.g.vcf.gz"
singularity: containers["bcftools"]
shell: "bcftools concat {input.gvcfs} -n > {output.gvcf}"
#rule gvcf_gather:
# """Gather all GVCF scatters"""
# input:
# gvcfs = "{sample}/vcf/{sample}.{chunk}.part.vcf.gz",
# output:
# gvcf = "{sample}/vcf/{sample}.g.vcf.gz"
# singularity: containers["bcftools"]
# shell: "bcftools concat {input.gvcfs} -n > {output.gvcf}"
rule gvcf_gather_tbi:
"""Index GVCF"""
input:
gvcf = "{sample}/vcf/{sample}.g.vcf.gz"
output:
tbi = "{sample}/vcf/{sample}.g.vcf.gz.tbi"
singularity: containers["tabix"]
shell: "tabix -pvcf {input.gvcf}"
#rule gvcf_gather_tbi:
# """Index GVCF"""
# input:
# gvcf = "{sample}/vcf/{sample}.g.vcf.gz"
# output:
# tbi = "{sample}/vcf/{sample}.g.vcf.gz.tbi"
# singularity: containers["tabix"]
# shell: "tabix -pvcf {input.gvcf}"
rule genotype_scatter:
"""Run GATK's GenotypeGVCFs by chunk"""
input:
gvcf = "{sample}/vcf/{sample}.g.vcf.gz",
tbi = "{sample}/vcf/{sample}.g.vcf.gz.tbi",
ref=REFERENCE,
region=dynamic("scatter/scatter-{chunk}.bed")
params:
li=" -V ".join("{sample}/vcf/{sample}.g.vcf.gz")
gvcf = "{sample}/vcf/{sample}.{chunk}.g.vcf.gz",
tbi = "{sample}/vcf/{sample}.{chunk}.g.vcf.gz.tbi",
ref=REFERENCE
output:
vcf=dynamic("{sample}/vcf/{sample}.genotype.{chunk}.part.vcf.gz"),
vcf_tbi=dynamic("{sample}/vcf/{sample}.genotype.{chunk}.part.vcf.gz.tbi")
vcf="{sample}/vcf/{sample}.genotype.{chunk}.vcf.gz",
vcf_tbi="{sample}/vcf/{sample}.genotype.{chunk}.vcf.gz.tbi"
singularity: containers["gatk"]
shell: "java -jar -Xmx15G -XX:ParallelGCThreads=1 /usr/GenomeAnalysisTK.jar -T "
"GenotypeGVCFs -R {input.ref} "
"-V {params.li} -L '{input.region}' -o '{output.vcf}'"
"-V {input.gvcf} -o '{output.vcf}'"
rule genotype_gather:
"""Gather all genotyping VCFs"""
input:
vcfs = dynamic("{sample}/vcf/{sample}.genotype.{chunk}.part.vcf.gz"),
vcfs = "{sample}/vcf/{sample}.genotype.{chunk}.vcf.gz",
output:
vcf = "{sample}/vcf/{sample}.vcf.gz"
singularity: containers["bcftools"]
......
......@@ -37,7 +37,7 @@
- "chrM\t16023\t.\tG\tA\t1878.77\t."
- "GT:AD:DP:GQ:PL\t0/1:73,73:146:99:1907,0,1879"
- name: test-new-scatter
- name: test-new-scatter-gvcf
tags:
- integration
- scatter
......@@ -48,35 +48,13 @@
--singularity-args ' --cleanenv --bind /tmp'
--jobs 1 -w 120
-r -p -s Snakefile
micro/vcf/micro.1.part.vcf.gz
micro/vcf/micro.13.part.vcf.gz
micro/vcf/micro.1.g.vcf.gz
--config
REFERENCE=tests/data/ref.fa
DBSNP=tests/data/database.vcf.gz
KNOWN_SITES=tests/data/database.vcf.gz
SAMPLE_CONFIG=tests/data/sample_config.json
files:
- path: "micro/vcf/micro.1.part.vcf.gz"
- path: "micro/vcf/micro.13.part.vcf.gz"
- path: "micro/vcf/micro.1.g.vcf.gz"
- path: "scatter/scatter-1.bed"
- path: "scatter/scatter-13.bed"
- name: test-new-scatter-gvcf
tags:
- integration
- scatter
command: >
snakemake
--use-singularity
--singularity-prefix /tmp/singularity
--singularity-args ' --cleanenv --bind /tmp'
--jobs 1 -w 120
-r -p -s Snakefile
micro/vcf/micro.g.vcf.gz
--config
REFERENCE=tests/data/ref.fa
DBSNP=tests/data/database.vcf.gz
KNOWN_SITES=tests/data/database.vcf.gz
SAMPLE_CONFIG=tests/data/sample_config.json
files:
- path: "micro/vcf/micro.g.vcf.gz"
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment