Commit 4f5695d8 authored by van den Berg's avatar van den Berg
Browse files

Add test for input multiple readgroups

Add test to make sure the markdup and baserecal groups receive the
correct inputs when a sample has multiple readgroups.
parent 1ce11c64
......@@ -111,7 +111,7 @@ rule align:
rule markdup:
"""Mark duplicates in BAM file"""
input:
bam = markdup_input_files,
bam = sample_bamfiles,
tmp = ancient("tmp")
output:
bam = "{sample}/bams/{sample}.bam",
......@@ -119,7 +119,7 @@ rule markdup:
metrics = "{sample}/bams/{sample}.metrics"
log: "log/{sample}/markdup.log"
params:
bams=markdup_input_string
bams = lambda wc: expand('INPUT={bam}', bam=sample_bamfiles(wc))
container: containers["picard"]
shell: "picard -Xmx4G -Djava.io.tmpdir={input.tmp} MarkDuplicates "
"CREATE_INDEX=TRUE TMP_DIR={input.tmp} "
......@@ -136,21 +136,16 @@ def bqsr_bam_input(wildcards):
rule baserecal:
"""Base recalibrated BAM files"""
input:
bam = lambda wildcards:
("{sample}/bams/{sample}-{read_group}.sorted.bam".format(
sample=wildcards.sample, read_group=rg)
for rg in get_readgroup(wildcards)),
bam = sample_bamfiles,
ref = config["reference"],
vcfs = config["known_sites"]
output: "{sample}/bams/{sample}.baserecal.grp"
log: "log/{sample}/baserecal.log"
params:
known_sites = " ".join(
expand("-knownSites {vcf}", vcf=config["known_sites"])
),
known_sites = expand("-knownSites {vcf}", vcf=config["known_sites"]),
region = f"-L {config['restrict_BQSR']}" if "restrict_BQSR" in config else "",
gatk_jar = config["gatk_jar"],
bams = bqsr_bam_input
bams = lambda wc: expand("-I {bam}", bam=sample_bamfiles(wc))
container: containers["gatk"]
shell: "java -XX:ParallelGCThreads=1 -jar {params.gatk_jar} -T "
"BaseRecalibrator {params.bams} -o {output} -nct 8 "
......
......@@ -81,10 +81,8 @@ def coverage_files(wildcards):
files.append(f'{sample}/vcf/{sample}_{threshold}.bed')
return files
def markdup_input_files(wildcards):
""" Determine the input files for markduplicaates
This is the step where we merge the different per-readgroup bam files
def sample_bamfiles(wildcards):
""" Determine the bam files for a sample (one for each readgroup)
"""
files = list()
sample = config['samples'][wildcards.sample]
......@@ -92,7 +90,3 @@ def markdup_input_files(wildcards):
for readgroup in sample['read_groups']:
files.append(f'{sample_name}/bams/{sample_name}-{readgroup}.sorted.bam')
return files
def markdup_input_string(wildcards):
"""Generate the INPUT for each bam file """
return [f'INPUT={file} ' for file in markdup_input_files(wildcards)]
......@@ -177,6 +177,9 @@
- Job counts
- localrule all
- (100%) done
- BaseRecalibrator -I micro/bams/micro-lib_01.sorted.bam -I micro/bams/micro-lib_02.sorted.bam
contains_regex:
- MarkDuplicates.*INPUT=micro/bams/micro-lib_01.sorted.bam INPUT=micro/bams/micro-lib_02.sorted.bam
must_not_contain:
- rror
files:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment