Commit 7bc45284 authored by van den Berg's avatar van den Berg
Browse files

Add option to create a merged multi sample VCF

parent b1baa6de
Pipeline #4798 passed with stages
in 34 minutes and 12 seconds
......@@ -157,6 +157,7 @@ The following configuration options are **optional**:
| `scatter_size` | The size of chunks to divide the reference into for parallel execution. Default = 1000000000 |
| `coverage_threshold` | One or more threshold coverage values. For each value, a sample specific bed file will be created that contains the regions where the coverage is above the threshold |
| `restrict_BQSR` | Restrict GATK BaseRecalibration to a single chromosome. This is faster, but the recalibration is possibly less reliable |
| `merge_vcf` | Merge the VCF files for each sample into a single multisample VCF file |
## Cluster configuration
......
......@@ -43,7 +43,8 @@ rule all:
gvcfs = expand("{s}/vcf/{s}.g.vcf.gz", s=config["samples"]),
gvcf_tbi = expand("{s}/vcf/{s}.g.vcf.gz.tbi", s=config["samples"]),
coverage_stats = coverage_stats,
coverage_files = coverage_files
coverage_files = coverage_files,
merged_vcf = "merged_multisample.vcf.gz" if config["merge_vcf"] else []
rule create_tmp:
"""
......@@ -520,3 +521,23 @@ rule gvcf2coverage:
containers["gvcf2coverage"]
shell:
"gvcf2coverage -t {wildcards.threshold} < {input} 2> {log} | cut -f 1,2,3 > {output}"
rule merge_vcf:
""" Merge all vcf files into a single multisample vcf """
input:
vcfs = expand("{sample}/vcf/{sample}.vcf.gz", sample=config["samples"])
output:
"merged_multisample.vcf.gz"
log:
"log/merged_multisample.log"
container:
containers["bcftools"]
threads:
8
shell:
"bcftools merge --merge both "
"--output-type z "
"--output {output} "
"--threads 8 "
"{input} 2> {log} && "
"bcftools index --tbi --thread 8 {output}"
......@@ -59,6 +59,7 @@ def process_config():
# Set the default config values
set_default('scatter_size', 1000000000)
set_default('female_threshold', 0.6)
set_default('merge_vcf', False)
# Hide the absolute path so the snakemake linter doesn't cry about it
set_default('gatk_jar', os.path.join(os.path.sep,'usr','GenomeAnalysisTK.jar'))
......
......@@ -16,6 +16,7 @@
"coverage_threshold",
"restrict_BQSR",
"gatk_jar",
"merge_vcf",
"baitsfile"
],
"properties": {
......@@ -79,6 +80,10 @@
"description": "Restrict BQSR to the listed chromosome",
"type": "string"
},
"merge_vcf": {
"description": "Create a merged output vcf file containing all samples",
"type": "boolean"
},
"refflat": {
"description": "RefFlat file with transcripts",
"type": "string"
......
{
"samples": {
"micro1": {
"read_groups": {
"lib_01": {
"R1": "tests/data/fastq/micro_rg1_R1.fq.gz",
"R2": "tests/data/fastq/micro_rg1_R2.fq.gz"
}
}
},
"micro2": {
"read_groups": {
"lib_02": {
"R1": "tests/data/fastq/micro_rg2_R1.fq.gz",
"R2": "tests/data/fastq/micro_rg2_R2.fq.gz"
}
}
}
},
"reference":"tests/data/reference/ref.fa",
"dbsnp": "tests/data/reference/database.vcf.gz",
"known_sites": ["tests/data/reference/database.vcf.gz"],
"targetsfile": "tests/data/reference/full_chrM.bed",
"baitsfile": "tests/data/reference/target_baits.bed",
"merge_vcf": true
}
......@@ -17,6 +17,7 @@
- micro/bams/micro.insert_size_metrics
must_not_contain:
- rror
- rule merge_vcf
stderr:
must_not_contain:
- rror
......@@ -52,3 +53,15 @@
- 'output: micro/vcf/micro_60.bed'
- 'output: micro/vcf/micro_120.bed'
- 'output: micro/vcf/micro_196.bed'
- name: dry-run-merge
tags:
- dry-run
command: >
snakemake -s Snakefile -n --configfile
tests/data/config/sample_config_merge_samples.json
exit_code: 0
stdout:
contains:
- Job counts
- rule merge_vcf
......@@ -306,3 +306,14 @@
files:
- path: 'micro/bams/micro.hs_metrics.txt'
- path: 'multiqc_report/multiqc_data/multiqc_picard_HsMetrics.json'
- name: integration-merge
tags:
- integration
command: >
snakemake --use-singularity --singularity-args ' --containall --bind /tmp '
--jobs 1 -w 120 -r -p
--configfile tests/data/config/sample_config_merge_samples.json
files:
- path: 'merged_multisample.vcf.gz'
- path: 'merged_multisample.vcf.gz.tbi'
......@@ -68,6 +68,13 @@
contains:
- 'Invalid --configfile: sample names should not overlap ("micro1" is contained in "micro12")'
- name: sanity-merge
tags:
- sanity
command: >
snakemake -s Snakefile -n --configfile
tests/data/config/sample_config_merge_samples.json
- name: sanity-snakemake-lint
tags:
- sanity
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment