Commit 1ce11c64 authored by van den Berg's avatar van den Berg
Browse files

Move markdup inputs to separate file

parent f16a49c1
Pipeline #4058 passed with stages
in 41 minutes and 59 seconds
......@@ -108,19 +108,10 @@ rule align:
"INPUT=/dev/stdin OUTPUT={output} SORT_ORDER=coordinate 2> {log}"
def markdup_bam_input(wildcards):
"""Generate the INPUT for each bam file """
return ["INPUT={sample}/bams/{sample}-{read_group}.sorted.bam".format(
sample=wildcards.sample, read_group=rg)
for rg in get_readgroup(wildcards)]
rule markdup:
"""Mark duplicates in BAM file"""
bam = lambda wildcards:
sample=wildcards.sample, read_group=rg)
for rg in get_readgroup(wildcards)),
bam = markdup_input_files,
tmp = ancient("tmp")
bam = "{sample}/bams/{sample}.bam",
......@@ -128,7 +119,7 @@ rule markdup:
metrics = "{sample}/bams/{sample}.metrics"
log: "log/{sample}/markdup.log"
container: containers["picard"]
shell: "picard -Xmx4G{input.tmp} MarkDuplicates "
containers = {
"bcftools": "docker://",
"bedtools-2.26-python-2.7": "docker://",
"biopet-scatterregions": "docker://",
"bwa-0.7.17-picard-2.22.8": "docker://",
"cutadapt": "docker://",
"debian": "docker://debian:buster-slim",
"fastqc": "docker://",
"gatk": "docker://broadinstitute/gatk3:3.7-0",
"gvcf2coverage": "docker://lumc/gvcf2coverage:0.1-dirty-2",
"multiqc": "docker://",
"picard": "docker://",
"python3": "docker://python:3.6-slim",
"samtools-1.7-python-3.6": "docker://",
"vtools": "docker://"
'bcftools': 'docker://',
'bedtools-2.26-python-2.7': 'docker://',
'biopet-scatterregions': 'docker://',
'bwa-0.7.17-picard-2.22.8': 'docker://',
'cutadapt': 'docker://',
'debian': 'docker://debian:buster-slim',
'fastqc': 'docker://',
'gatk': 'docker://broadinstitute/gatk3:3.7-0',
'gvcf2coverage': 'docker://lumc/gvcf2coverage:0.1-dirty-2',
'multiqc': 'docker://',
'picard': 'docker://',
'python3': 'docker://python:3.6-slim',
'samtools-1.7-python-3.6': 'docker://',
'vtools': 'docker://'
def process_config():
......@@ -35,7 +35,7 @@ def process_config():
# If you specify a baitsfile, you also have to specify a targets file for
# picard
if "baitsfile" in config and "targetsfile" not in config:
if 'baitsfile' in config and 'targetsfile' not in config:
msg = 'Invalid --configfile: "baitsfile" specified without "targetsfile"'
raise jsonschema.ValidationError(msg)
......@@ -54,12 +54,12 @@ def process_config():
set_default('gatk_jar', os.path.join(os.path.sep,'usr','GenomeAnalysisTK.jar'))
# Set the script paths
set_default("covstats", srcdir("src/"))
set_default("collect_stats", srcdir("src/"))
set_default("merge_stats", srcdir("src/"))
set_default("stats_to_tsv", srcdir("src/"))
set_default("py_wordcount", srcdir("src/"))
set_default("cutadapt_summary", srcdir("src/"))
set_default('covstats', srcdir('src/'))
set_default('collect_stats', srcdir('src/'))
set_default('merge_stats', srcdir('src/'))
set_default('stats_to_tsv', srcdir('src/'))
set_default('py_wordcount', srcdir('src/'))
set_default('cutadapt_summary', srcdir('src/'))
def coverage_files(wildcards):
""" Return a list of all coverage files
......@@ -72,9 +72,27 @@ def coverage_files(wildcards):
if 'coverage_threshold' not in config:
return list()
# Fetch the values we need from the configuration
samples = config['samples']
thresholds = config['coverage_threshold']
files = list()
for sample, threshold in itertools.product(samples, thresholds):
return files
def markdup_input_files(wildcards):
""" Determine the input files for markduplicaates
This is the step where we merge the different per-readgroup bam files
files = list()
sample = config['samples'][wildcards.sample]
sample_name = wildcards.sample
for readgroup in sample['read_groups']:
return files
def markdup_input_string(wildcards):
"""Generate the INPUT for each bam file """
return [f'INPUT={file} ' for file in markdup_input_files(wildcards)]
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment