Commit e35be6a7 authored by van den Berg's avatar van den Berg
Browse files

Add picard CollectHsMetrics to the pipeline

If both a target and bait bedfiles have been specified, calculate the
hybrid-selection (HS) statistics using picard.
parent c9b4fd84
......@@ -454,6 +454,43 @@ rule multiple_metrics:
"PROGRAM=CollectAlignmentSummaryMetrics "
"PROGRAM=CollectInsertSizeMetrics "
rule bed_to_interval:
"""Run picard BedToIntervalList
This is needed to convert the bed files for the capture kit to a format
picard can read
"""
input:
targets = config.get("bedfile",""),
baits = config.get("baitsfile",""),
ref = config["reference"]
output:
target_interval = "target.interval",
baits_interval = "baits.interval"
output:
container: containers["picard"]
shell: "picard BedToIntervalList "
"I={input.targets} O={output.target_interval} "
"SD={input.ref} && "
"picard BedToIntervalList "
"I={input.baits} O={output.baits_interval} "
"SD={input.ref} "
rule hs_metrics:
"""Run picard CollectHsMetrics"""
input:
bam = rules.markdup.output.bam,
ref = config["reference"],
targets = rules.bed_to_interval.output.target_interval,
baits = rules.bed_to_interval.output.baits_interval,
output: "{sample}/bams/{sample}.hs_metrics.txt"
container: containers["picard"]
shell: "picard CollectHsMetrics "
"I={input.bam} O={output} "
"R={input.ref} "
"BAIT_INTERVALS={input.baits} "
"TARGET_INTERVALS={input.targets}"
rule multiqc:
"""
Create multiQC report
......@@ -475,7 +512,11 @@ rule multiqc:
for read_group, sample in get_readgroup_per_sample()),
fastqc_trim = (f"{sample}/pre_process/trimmed-{sample}-{read_group}/"
for read_group, sample in get_readgroup_per_sample())
for read_group, sample in get_readgroup_per_sample()),
hs_metric = expand("{sample}/bams/{sample}.hs_metrics.txt",
sample=config["samples"]) if "baitsfile" in config else []
output:
html = "multiqc_report/multiqc_report.html",
insert = "multiqc_report/multiqc_data/multiqc_picard_insertSize.json"
......
......@@ -12,7 +12,8 @@
"optional": [
"scatter_size",
"female_threshold",
"bedfile"
"bedfile",
"baitsfile"
],
"properties": {
"samples": {
......@@ -62,6 +63,10 @@
"description": "Bed file to calculate the coverage over",
"type": "string"
},
"baitsfile": {
"description": "Bed file of the baits of the capture kit",
"type": "string"
},
"refflat": {
"description": "RefFlat file with transcripts",
"type": "string"
......
{
"samples": {
"micro": {
"read_groups": {
"lib_01": {
"R1": "tests/data/fastq/micro_R1.fq.gz",
"R2": "tests/data/fastq/micro_R2.fq.gz"
}
}
}
},
"reference":"tests/data/reference/ref.fa",
"dbsnp": "tests/data/reference/database.vcf.gz",
"known_sites": ["tests/data/reference/database.vcf.gz"],
"bedfile": "tests/data/reference/target_genes.bed",
"baitsfile": "tests/data/reference/target_baits.bed"
}
chrM 7480 7490
chrM 7495 7500
......@@ -21,3 +21,23 @@
- "rror"
tags:
- dry-run
- name: test-dry-run-target-baits
tags:
- dry-run
command: >
snakemake
-n -r -p -s Snakefile
--configfile tests/data/config/sample_config_target_baits.json
exit_code: 0
stdout:
contains:
- "Job counts"
- "localrule all:"
- "hs_metrics"
- "micro.hs_metrics.txt"
must_not_contain:
- "rror"
stderr:
must_not_contain:
- "rror"
......@@ -288,3 +288,18 @@
- "sample_name\tpreqc_reads\tpreqc_bases\tpostqc_reads\tpostqc_bases"
- "micro1\t7720\t1137566\t7720\t1136416"
- "micro2\t7720\t1139177\t7720\t1137997"
- name: test-integration-target-baits
tags:
- integration
- new
command: >
snakemake
--use-singularity
--singularity-args ' --cleanenv --bind /tmp'
--jobs 1 -w 120
-r -p -s Snakefile
--configfile tests/data/config/sample_config_target_baits.json
files:
- path: "micro/bams/micro.hs_metrics.txt"
- path: "multiqc_report/multiqc_data/multiqc_picard_HsMetrics.json"
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment