Commit 1ce11c64 authored by van den Berg's avatar van den Berg
Browse files

Move markdup inputs to separate file

parent f16a49c1
Pipeline #4058 passed with stages
in 41 minutes and 59 seconds
......@@ -108,19 +108,10 @@ rule align:
"CREATE_INDEX=TRUE TMP_DIR={input.tmp} "
"INPUT=/dev/stdin OUTPUT={output} SORT_ORDER=coordinate 2> {log}"
def markdup_bam_input(wildcards):
"""Generate the INPUT for each bam file """
return ["INPUT={sample}/bams/{sample}-{read_group}.sorted.bam".format(
sample=wildcards.sample, read_group=rg)
for rg in get_readgroup(wildcards)]
rule markdup:
"""Mark duplicates in BAM file"""
input:
bam = lambda wildcards:
("{sample}/bams/{sample}-{read_group}.sorted.bam".format(
sample=wildcards.sample, read_group=rg)
for rg in get_readgroup(wildcards)),
bam = markdup_input_files,
tmp = ancient("tmp")
output:
bam = "{sample}/bams/{sample}.bam",
......@@ -128,7 +119,7 @@ rule markdup:
metrics = "{sample}/bams/{sample}.metrics"
log: "log/{sample}/markdup.log"
params:
bams=markdup_bam_input
bams=markdup_input_string
container: containers["picard"]
shell: "picard -Xmx4G -Djava.io.tmpdir={input.tmp} MarkDuplicates "
"CREATE_INDEX=TRUE TMP_DIR={input.tmp} "
......
containers = {
"bcftools": "docker://quay.io/biocontainers/bcftools:1.9--ha228f0b_4",
"bedtools-2.26-python-2.7": "docker://quay.io/biocontainers/mulled-v2-3251e6c49d800268f0bc575f28045ab4e69475a6:4ce073b219b6dabb79d154762a9b67728c357edb-0",
"biopet-scatterregions": "docker://quay.io/biocontainers/biopet-scatterregions:0.2--0",
"bwa-0.7.17-picard-2.22.8": "docker://quay.io/biocontainers/mulled-v2-002f51ea92721407ef440b921fb5940f424be842:76d16eabff506ac13338d7f14644a0ad301b9d7e-0",
"cutadapt": "docker://quay.io/biocontainers/cutadapt:2.9--py37h516909a_0",
"debian": "docker://debian:buster-slim",
"fastqc": "docker://quay.io/biocontainers/fastqc:0.11.7--4",
"gatk": "docker://broadinstitute/gatk3:3.7-0",
"gvcf2coverage": "docker://lumc/gvcf2coverage:0.1-dirty-2",
"multiqc": "docker://quay.io/biocontainers/multiqc:1.8--py_2",
"picard": "docker://quay.io/biocontainers/picard:2.22.8--0",
"python3": "docker://python:3.6-slim",
"samtools-1.7-python-3.6": "docker://quay.io/biocontainers/mulled-v2-eb9e7907c7a753917c1e4d7a64384c047429618a:1abf1824431ec057c7d41be6f0c40e24843acde4-0",
"vtools": "docker://quay.io/biocontainers/vtools:1.0.0--py37h3010b51_0"
'bcftools': 'docker://quay.io/biocontainers/bcftools:1.9--ha228f0b_4',
'bedtools-2.26-python-2.7': 'docker://quay.io/biocontainers/mulled-v2-3251e6c49d800268f0bc575f28045ab4e69475a6:4ce073b219b6dabb79d154762a9b67728c357edb-0',
'biopet-scatterregions': 'docker://quay.io/biocontainers/biopet-scatterregions:0.2--0',
'bwa-0.7.17-picard-2.22.8': 'docker://quay.io/biocontainers/mulled-v2-002f51ea92721407ef440b921fb5940f424be842:76d16eabff506ac13338d7f14644a0ad301b9d7e-0',
'cutadapt': 'docker://quay.io/biocontainers/cutadapt:2.9--py37h516909a_0',
'debian': 'docker://debian:buster-slim',
'fastqc': 'docker://quay.io/biocontainers/fastqc:0.11.7--4',
'gatk': 'docker://broadinstitute/gatk3:3.7-0',
'gvcf2coverage': 'docker://lumc/gvcf2coverage:0.1-dirty-2',
'multiqc': 'docker://quay.io/biocontainers/multiqc:1.8--py_2',
'picard': 'docker://quay.io/biocontainers/picard:2.22.8--0',
'python3': 'docker://python:3.6-slim',
'samtools-1.7-python-3.6': 'docker://quay.io/biocontainers/mulled-v2-eb9e7907c7a753917c1e4d7a64384c047429618a:1abf1824431ec057c7d41be6f0c40e24843acde4-0',
'vtools': 'docker://quay.io/biocontainers/vtools:1.0.0--py37h3010b51_0'
}
def process_config():
......@@ -35,7 +35,7 @@ def process_config():
# If you specify a baitsfile, you also have to specify a targets file for
# picard
if "baitsfile" in config and "targetsfile" not in config:
if 'baitsfile' in config and 'targetsfile' not in config:
msg = 'Invalid --configfile: "baitsfile" specified without "targetsfile"'
raise jsonschema.ValidationError(msg)
......@@ -54,12 +54,12 @@ def process_config():
set_default('gatk_jar', os.path.join(os.path.sep,'usr','GenomeAnalysisTK.jar'))
# Set the script paths
set_default("covstats", srcdir("src/covstats.py"))
set_default("collect_stats", srcdir("src/collect_stats.py"))
set_default("merge_stats", srcdir("src/merge_stats.py"))
set_default("stats_to_tsv", srcdir("src/stats_to_tsv.py"))
set_default("py_wordcount", srcdir("src/pywc.py"))
set_default("cutadapt_summary", srcdir("src/cutadapt_summary.py"))
set_default('covstats', srcdir('src/covstats.py'))
set_default('collect_stats', srcdir('src/collect_stats.py'))
set_default('merge_stats', srcdir('src/merge_stats.py'))
set_default('stats_to_tsv', srcdir('src/stats_to_tsv.py'))
set_default('py_wordcount', srcdir('src/pywc.py'))
set_default('cutadapt_summary', srcdir('src/cutadapt_summary.py'))
def coverage_files(wildcards):
""" Return a list of all coverage files
......@@ -72,9 +72,27 @@ def coverage_files(wildcards):
if 'coverage_threshold' not in config:
return list()
# Fetch the values we need from the configuration
samples = config['samples']
thresholds = config['coverage_threshold']
files = list()
for sample, threshold in itertools.product(samples, thresholds):
files.append(f'{sample}/vcf/{sample}_{threshold}.bed')
return files
def markdup_input_files(wildcards):
""" Determine the input files for markduplicaates
This is the step where we merge the different per-readgroup bam files
"""
files = list()
sample = config['samples'][wildcards.sample]
sample_name = wildcards.sample
for readgroup in sample['read_groups']:
files.append(f'{sample_name}/bams/{sample_name}-{readgroup}.sorted.bam')
return files
def markdup_input_string(wildcards):
"""Generate the INPUT for each bam file """
return [f'INPUT={file} ' for file in markdup_input_files(wildcards)]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment