Commit e0602c86 authored by Sander Bollen's avatar Sander Bollen
Browse files

gather gvcfs with bcftools

parent f424c2f9
......@@ -363,32 +363,56 @@ rule gvcf_scatter:
"-BQSR {input.bqsr}"
rule gvcf_chunkfile:
"""
Create simple text file with paths to chunks for GVCF.
This uses a run directive in stead of a shell directive because
the amount of chunks may be so large the shell would error out with
an "argument list too long" error.
See https://unix.stackexchange.com/a/120842 for more info
This also means this rule lives outside of singularity/conda and is
executed in snakemake's own environment.
"""
params:
chunkfiles = expand("{{sample}}/vcf/{{sample}}.{chunk}.part.vcf.gz",
chunk=CHUNKS)
output:
file = "{sample}/vcf/chunkfile.txt"
run:
with open(output.file) as ohandle:
for filename in params.chunkfiles:
ohandle.write(filename + "\n")
rule gvcf_gather:
"""Gather all gvcf scatters"""
"""Gather all GVCF scatters"""
input:
gvcfs=expand("{{sample}}/vcf/{{sample}}.{chunk}.part.vcf.gz",
chunk=CHUNKS),
tbis=expand("{{sample}}/vcf/{{sample}}.{chunk}.part.vcf.gz.tbi",
chunk=CHUNKS),
ref=REFERENCE,
gatk=GATK
params:
gvcfs="' -V '".join(expand("{{sample}}/vcf/{{sample}}.{chunk}.part.vcf.gz",
chunk=CHUNKS))
gvcfs = expand("{{sample}}/vcf/{{sample}}.{chunk}.part.vcf.gz",
chunk=CHUNKS),
chunkfile = "{sample}/vcf/chunkfile.txt"
output:
gvcf="{sample}/vcf/{sample}.g.vcf.gz"
singularity: "docker://quay.io/biocontainers/gatk:3.7--py36_1"
conda: "envs/gatk.yml"
shell: "java -Xmx4G -XX:ParallelGCThreads=1 -cp {input.gatk} "
"org.broadinstitute.gatk.tools.CatVariants "
"-R {input.ref} -V '{params.gvcfs}' -out {output.gvcf} "
"-assumeSorted"
gvcf = "{sample}/vcf/{sample}.g.vcf.gz"
singularity: "docker://quay.io/biocontainers/bcftools:1.9--ha228f0b_4"
shell: "bcftools concat -f {input.chunkfile} -n > {output.gvcf}"
rule gvcf_gather_tbi:
"""Index GVCF"""
input:
gvcf = "{sample}/vcf/{sample}.g.vcf.gz"
output:
tbi = "{sample}/vcf/{sample}.g.vcf.gz.tbi"
singularity: "docker://quay.io/biocontainers/tabix:0.2.6--ha92aebf_0"
shell: "tabix -pvcf {input.gvcf}"
rule genotype_scatter:
"""Run GATK's GenotypeGVCFs by chunk"""
input:
gvcfs = expand("{sample}/vcf/{sample}.g.vcf.gz", sample=SAMPLES),
tbis = expand("{sample}/vcf/{sample}.g.vcf.gz.tbi",
samples=SAMPLES),
ref=REFERENCE,
gatk=GATK
params:
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment