Snakefile 26.8 KB
Newer Older
Sander Bollen's avatar
Sander Bollen committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#   hutspot - a DNAseq variant calling pipeline
#   Copyright (C) 2017-2019, Sander Bollen, Leiden University Medical Center
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU Affero General Public License as published by
#   the Free Software Foundation, either version 3 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU Affero General Public License for more details.
#
#   You should have received a copy of the GNU Affero General Public License
#   along with this program.  If not, see <https://www.gnu.org/licenses/>.
"""
17
Main Snakefile for the pipeline.
Sander Bollen's avatar
Sander Bollen committed
18
19
20
21
22
23

:copyright: (c) 2017-2019 Sander Bollen
:copyright: (c) 2017-2019 Leiden University Medical Center
:license: AGPL-3.0
"""

Sander Bollen's avatar
Sander Bollen committed
24
import json
Sander Bollen's avatar
Sander Bollen committed
25
from functools import partial
Sander Bollen's avatar
Sander Bollen committed
26
from os.path import join, basename
27
from pathlib import Path
28
import itertools
Sander Bollen's avatar
Sander Bollen committed
29
30

from pyfaidx import Fasta
Sander Bollen's avatar
Sander Bollen committed
31
32

REFERENCE = config.get("REFERENCE")
33
34
if REFERENCE is None:
    raise ValueError("You must set --config REFERENCE=<path>")
Sander Bollen's avatar
Sander Bollen committed
35
if not Path(REFERENCE).exists():
Sander Bollen's avatar
Sander Bollen committed
36
37
    raise FileNotFoundError("Reference file {0} "
                            "does not exist.".format(REFERENCE))
Sander Bollen's avatar
Sander Bollen committed
38
DBSNP = config.get("DBSNP")
39
40
if DBSNP is None:
    raise ValueError("You must set --config DBSNP=<path>")
Sander Bollen's avatar
Sander Bollen committed
41
42
if not Path(DBSNP).exists():
    raise FileNotFoundError("{0} does not exist".format(DBSNP))
43

44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
SCONFIG = config.get("SAMPLE_CONFIG")
if SCONFIG is None:
    raise ValueError("You must set --config SAMPLE_CONFIG=<path>")
if not Path(SCONFIG).exists():
    raise FileNotFoundError("{0} does not exist".format(SCONFIG))

KNOWN_SITES = config.get("KNOWN_SITES")
if KNOWN_SITES is None:
    raise ValueError("You must set --config KNOWN_SITES=<path>")

# The user can specify multiple known sites
KNOWN_SITES = KNOWN_SITES.split(',')
for filename in KNOWN_SITES:
    if not Path(filename).exists():
        raise FileNotFoundError("{0} does not exist".format(filename))

60
61

# these are all optional
62
63
BED = config.get("BED", "")  # comma-separated list of BED files
REFFLAT = config.get("REFFLAT", "")  # comma-separated list of refFlat files
Sander Bollen's avatar
Sander Bollen committed
64
FEMALE_THRESHOLD = config.get("FEMALE_THRESHOLD", 0.6)
Sander Bollen's avatar
Sander Bollen committed
65
MAX_BASES = config.get("MAX_BASES", "")
Sander Bollen's avatar
Sander Bollen committed
66

67
# Generate the input string for basrecalibration
68
69
70
BSQR_known_sites = ''
for argument, filename in zip(itertools.repeat('-knownSites'), KNOWN_SITES):
    BSQR_known_sites +=' {} {}'.format(argument, filename)
71

72
73
74
75
76
def fsrc_dir(*args):
    """Wrapper around snakemake's srcdir to work like os.path.join"""
    if len(args) == 1:
        return srcdir(args[0])
    return srcdir(join(*args))
Sander Bollen's avatar
Sander Bollen committed
77

78
79
80
covpy = fsrc_dir("src", "covstats.py")
colpy = fsrc_dir("src", "collect_stats.py")
mpy = fsrc_dir("src", "merge_stats.py")
Sander Bollen's avatar
Sander Bollen committed
81
seq = fsrc_dir("src", "seqtk.sh")
Sander Bollen's avatar
Sander Bollen committed
82
fqpy = fsrc_dir("src", "fastqc_stats.py")
Sander Bollen's avatar
Sander Bollen committed
83
tsvpy = fsrc_dir("src", "stats_to_tsv.py")
Sander Bollen's avatar
Sander Bollen committed
84
fqsc = fsrc_dir("src", "safe_fastqc.sh")
85
pywc = fsrc_dir("src", "pywc.py")
Sander Bollen's avatar
Sander Bollen committed
86

Sander Bollen's avatar
Sander Bollen committed
87
# sample config parsing
Sander Bollen's avatar
Sander Bollen committed
88
89
90
91
with open(config.get("SAMPLE_CONFIG")) as handle:
    SAMPLE_CONFIG = json.load(handle)
SAMPLES = SAMPLE_CONFIG['samples'].keys()

Sander Bollen's avatar
Sander Bollen committed
92
93
94
95
96
97
98
99
100
if BED != "":
    BEDS = BED.split(",")
else:
    BEDS = []

if REFFLAT != "":
    REFFLATS = REFFLAT.split(",")
else:
    REFFLATS = []
Sander Bollen's avatar
Sander Bollen committed
101
102

BASE_BEDS = [basename(x) for x in BEDS]
Sander Bollen's avatar
Sander Bollen committed
103
BASE_REFFLATS = [basename(x) for x in REFFLATS]
Sander Bollen's avatar
Sander Bollen committed
104

105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
containers = {
    "bcftools": "docker://quay.io/biocontainers/bcftools:1.9--ha228f0b_4",
    "bedtools-2.26-python-2.7": "docker://quay.io/biocontainers/mulled-v2-3251e6c49d800268f0bc575f28045ab4e69475a6:4ce073b219b6dabb79d154762a9b67728c357edb-0",
    "bwa-0.7.17-picard-2.18.7": "docker://quay.io/biocontainers/mulled-v2-002f51ea92721407ef440b921fb5940f424be842:43ec6124f9f4f875515f9548733b8b4e5fed9aa6-0",
    "cutadapt": "docker://quay.io/biocontainers/cutadapt:1.14--py36_0",
    "debian": "docker://debian:buster-slim",
    "fastq-count": "docker://quay.io/biocontainers/fastq-count:0.1.0--h14c3975_0",
    "fastqc": "docker://quay.io/biocontainers/fastqc:0.11.7--4",
    "gatk": "docker://broadinstitute/gatk3:3.7-0",
    "multiqc": "docker://quay.io/biocontainers/multiqc:1.5--py36_0",
    "picard-2.14": "docker://quay.io/biocontainers/picard:2.14--py36_0",
    "python3": "docker://python:3.6-slim",
    "samtools-1.7-python-3.6": "docker://quay.io/biocontainers/mulled-v2-eb9e7907c7a753917c1e4d7a64384c047429618a:1abf1824431ec057c7d41be6f0c40e24843acde4-0",
    "seqtk-1.2-jq-1.6": "docker://quay.io/biocontainers/mulled-v2-13686261ac0aa5682c680670ff8cda7b09637943:d143450dec169186731bb4df6f045a3c9ee08eb6-0",
    "sickle": "docker://quay.io/biocontainers/sickle-trim:1.33--ha92aebf_4",
    "tabix": "docker://quay.io/biocontainers/tabix:0.2.6--ha92aebf_0",
    "vtools": "docker://quay.io/biocontainers/vtools:1.0.0--py37h3010b51_0"
}
Sander Bollen's avatar
Sander Bollen committed
123

124
def split_genome(ref, approx_n_chunks=100):
Sander Bollen's avatar
Sander Bollen committed
125
126
127
128
129
130
131
132
    """
    Split genome in chunks.

    Chunks are strings in the format: `<ctg>:<start>-<end>`
    These follow the region string format as used by htslib,
    which uses _1_-based indexing.
    See: http://www.htslib.org/doc/tabix.html
    """
Sander Bollen's avatar
Sander Bollen committed
133
134
135
136
137
    fa = Fasta(ref)
    tot_size = sum([len(x) for x in fa.records.values()])
    chunk_size = tot_size//approx_n_chunks
    chunks = []
    for chrom_name, chrom_value in fa.records.items():
Sander Bollen's avatar
Sander Bollen committed
138
        pos = 1
Sander Bollen's avatar
Sander Bollen committed
139
140
141
142
143
144
145
146
147
148
149
150
151
        while pos <= len(chrom_value):
            end = pos+chunk_size
            if end <= len(chrom_value):
                chunk = "{0}:{1}-{2}".format(chrom_name, pos, end)
            else:
                chunk = "{0}:{1}-{2}".format(chrom_name, pos, len(chrom_value))
            chunks.append(chunk)
            pos = end
    return chunks

CHUNKS = split_genome(REFERENCE)


Sander Bollen's avatar
Sander Bollen committed
152

Sander Bollen's avatar
Sander Bollen committed
153
154
def get_r(strand, wildcards):
    """Get fastq files on a single strand for a sample"""
Sander Bollen's avatar
Sander Bollen committed
155
    s = SAMPLE_CONFIG['samples'].get(wildcards.sample)
Sander Bollen's avatar
Sander Bollen committed
156
    rs = []
157
    for l in sorted(s['libraries'].keys()):
Sander Bollen's avatar
Sander Bollen committed
158
159
        rs.append(s['libraries'][l][strand])
    return rs
Sander Bollen's avatar
Sander Bollen committed
160

Sander Bollen's avatar
Sander Bollen committed
161
162
get_r1 = partial(get_r, "R1")
get_r2 = partial(get_r, "R2")
Sander Bollen's avatar
Sander Bollen committed
163
164


Sander Bollen's avatar
Sander Bollen committed
165
def get_bedpath(wildcards):
Sander Bollen's avatar
Sander Bollen committed
166
167
    """Get absolute path of a bed file"""
    return next(x for x in BEDS if basename(x) == wildcards.bed)
Sander Bollen's avatar
Sander Bollen committed
168
169
170


def get_refflatpath(wildcards):
Sander Bollen's avatar
Sander Bollen committed
171
    """Get absolute path of a refflat file"""
Sander Bollen's avatar
Sander Bollen committed
172
    return next(x for x in REFFLATS if basename(x) == wildcards.ref)
Sander Bollen's avatar
Sander Bollen committed
173
174


Sander Bollen's avatar
Sander Bollen committed
175
def sample_gender(wildcards):
Sander Bollen's avatar
Sander Bollen committed
176
    """Get sample gender"""
Sander Bollen's avatar
Sander Bollen committed
177
178
179
180
    sam = SAMPLE_CONFIG['samples'].get(wildcards.sample)
    return sam.get("gender", "null")


Sander Bollen's avatar
Sander Bollen committed
181
182
183
184
def metrics(do_metrics=True):
    if not do_metrics:
        return ""

Sander Bollen's avatar
Sander Bollen committed
185
186
187
188
189
190
    fqcr = expand("{sample}/pre_process/raw_fastqc/.done.txt",
                  sample=SAMPLES)
    fqcm = expand("{sample}/pre_process/merged_fastqc/{sample}.merged_R1_fastqc.zip",
                  sample=SAMPLES)
    fqcp = expand("{sample}/pre_process/postqc_fastqc/{sample}.cutadapt_R1_fastqc.zip",
                  sample=SAMPLES)
191
    if len(REFFLATS) >= 1:
Sander Bollen's avatar
Sander Bollen committed
192
193
        coverage_stats = expand("{sample}/coverage/{ref}.coverages.tsv",
                                sample=SAMPLES, ref=BASE_REFFLATS)
194
195
    else:
        coverage_stats = []
Sander Bollen's avatar
Sander Bollen committed
196
    stats = "stats.json"
Sander Bollen's avatar
Sander Bollen committed
197
    return  fqcr + fqcm + fqcp + coverage_stats + [stats]
Sander Bollen's avatar
Sander Bollen committed
198
199


Sander Bollen's avatar
Sander Bollen committed
200
201
202
localrules: gvcf_chunkfile, genotype_chunkfile


Sander Bollen's avatar
Sander Bollen committed
203
204
rule all:
    input:
Sander Bollen's avatar
Sander Bollen committed
205
206
        combined="multisample/genotyped.vcf.gz",
        report="multiqc_report/multiqc_report.html",
Sander Bollen's avatar
Sander Bollen committed
207
208
        bais=expand("{sample}/bams/{sample}.markdup.bam.bai",sample=SAMPLES),
        vcfs=expand("{sample}/vcf/{sample}_single.vcf.gz",sample=SAMPLES),
Sander Bollen's avatar
Sander Bollen committed
209
        stats=metrics()
Sander Bollen's avatar
Sander Bollen committed
210

Sander Bollen's avatar
Sander Bollen committed
211
212
213

rule create_markdup_tmp:
    """Create tmp directory for mark duplicates"""
214
    output: directory("tmp")
215
    singularity: containers["debian"]
Sander Bollen's avatar
Sander Bollen committed
216
217
    shell: "mkdir -p {output}"

Sander Bollen's avatar
Sander Bollen committed
218
rule genome:
Sander Bollen's avatar
Sander Bollen committed
219
    """Create genome file as used by bedtools"""
Sander Bollen's avatar
Sander Bollen committed
220
    input: REFERENCE
Sander Bollen's avatar
Sander Bollen committed
221
    output: "current.genome"
222
    singularity: containers["debian"]
Sander Bollen's avatar
Sander Bollen committed
223
224
225
    shell: "awk -v OFS='\t' {{'print $1,$2'}} {input}.fai > {output}"

rule merge_r1:
Sander Bollen's avatar
Sander Bollen committed
226
    """Merge all forward fastq files into one"""
Sander Bollen's avatar
Sander Bollen committed
227
    input: get_r1
Sander Bollen's avatar
Sander Bollen committed
228
    output: temp("{sample}/pre_process/{sample}.merged_R1.fastq.gz")
229
    singularity: containers["debian"]
Sander Bollen's avatar
Sander Bollen committed
230
231
232
    shell: "cat {input} > {output}"

rule merge_r2:
Sander Bollen's avatar
Sander Bollen committed
233
    """Merge all reverse fastq files into one"""
Sander Bollen's avatar
Sander Bollen committed
234
    input: get_r2
Sander Bollen's avatar
Sander Bollen committed
235
    output: temp("{sample}/pre_process/{sample}.merged_R2.fastq.gz")
236
    singularity: containers["debian"]
Sander Bollen's avatar
Sander Bollen committed
237
238
    shell: "cat {input} > {output}"

Sander Bollen's avatar
Sander Bollen committed
239
240

rule seqtk_r1:
Sander Bollen's avatar
Sander Bollen committed
241
    """Either subsample or link forward fastq file"""
Sander Bollen's avatar
Sander Bollen committed
242
    input:
Sander Bollen's avatar
Sander Bollen committed
243
244
        stats="{sample}/pre_process/{sample}.preqc_count.json",
        fastq="{sample}/pre_process/{sample}.merged_R1.fastq.gz",
Sander Bollen's avatar
Sander Bollen committed
245
        seqtk=seq
Sander Bollen's avatar
Sander Bollen committed
246
    params:
Sander Bollen's avatar
Sander Bollen committed
247
        max_bases=str(MAX_BASES)
Sander Bollen's avatar
Sander Bollen committed
248
    output:
Sander Bollen's avatar
Sander Bollen committed
249
        fastq=temp("{sample}/pre_process/{sample}.sampled_R1.fastq.gz")
250
    singularity: containers["seqtk-1.2-jq-1.6"]
Sander Bollen's avatar
Sander Bollen committed
251
252
    shell: "bash {input.seqtk} {input.stats} {input.fastq} {output.fastq} "
           "{params.max_bases}"
Sander Bollen's avatar
Sander Bollen committed
253
254
255


rule seqtk_r2:
Sander Bollen's avatar
Sander Bollen committed
256
    """Either subsample or link reverse fastq file"""
Sander Bollen's avatar
Sander Bollen committed
257
    input:
Sander Bollen's avatar
Sander Bollen committed
258
259
        stats = "{sample}/pre_process/{sample}.preqc_count.json",
        fastq = "{sample}/pre_process/{sample}.merged_R2.fastq.gz",
Sander Bollen's avatar
Sander Bollen committed
260
        seqtk=seq
Sander Bollen's avatar
Sander Bollen committed
261
    params:
Sander Bollen's avatar
Sander Bollen committed
262
        max_bases =str(MAX_BASES)
Sander Bollen's avatar
Sander Bollen committed
263
    output:
Sander Bollen's avatar
Sander Bollen committed
264
        fastq = temp("{sample}/pre_process/{sample}.sampled_R2.fastq.gz")
265
    singularity: containers["seqtk-1.2-jq-1.6"]
Sander Bollen's avatar
Sander Bollen committed
266
267
    shell: "bash {input.seqtk} {input.stats} {input.fastq} {output.fastq} "
           "{params.max_bases}"
Sander Bollen's avatar
Sander Bollen committed
268
269


270
# contains original merged fastq files as input to prevent them from being prematurely deleted
Sander Bollen's avatar
Sander Bollen committed
271
rule sickle:
Sander Bollen's avatar
Sander Bollen committed
272
    """Trim fastq files"""
Sander Bollen's avatar
Sander Bollen committed
273
    input:
Sander Bollen's avatar
Sander Bollen committed
274
275
276
277
        r1 = "{sample}/pre_process/{sample}.sampled_R1.fastq.gz",
        r2 = "{sample}/pre_process/{sample}.sampled_R2.fastq.gz",
        rr1 = "{sample}/pre_process/{sample}.merged_R1.fastq.gz",
        rr2 = "{sample}/pre_process/{sample}.merged_R2.fastq.gz"
Sander Bollen's avatar
Sander Bollen committed
278
    output:
Sander Bollen's avatar
Sander Bollen committed
279
280
        r1 = temp("{sample}/pre_process/{sample}.trimmed_R1.fastq"),
        r2 = temp("{sample}/pre_process/{sample}.trimmed_R2.fastq"),
Sander Bollen's avatar
Sander Bollen committed
281
        s = "{sample}/pre_process/{sample}.trimmed_singles.fastq"
282
    singularity: containers["sickle"]
Sander Bollen's avatar
Sander Bollen committed
283
    shell: "sickle pe -f {input.r1} -r {input.r2} -t sanger -o {output.r1} "
Sander Bollen's avatar
Sander Bollen committed
284
285
286
           "-p {output.r2} -s {output.s}"

rule cutadapt:
Sander Bollen's avatar
Sander Bollen committed
287
    """Clip fastq files"""
Sander Bollen's avatar
Sander Bollen committed
288
    input:
Sander Bollen's avatar
Sander Bollen committed
289
290
        r1 = "{sample}/pre_process/{sample}.trimmed_R1.fastq",
        r2 = "{sample}/pre_process/{sample}.trimmed_R2.fastq"
Sander Bollen's avatar
Sander Bollen committed
291
    output:
Sander Bollen's avatar
Sander Bollen committed
292
293
        r1 = temp("{sample}/pre_process/{sample}.cutadapt_R1.fastq"),
        r2 = temp("{sample}/pre_process/{sample}.cutadapt_R2.fastq")
294
    singularity: containers["cutadapt"]
Sander Bollen's avatar
Sander Bollen committed
295
    shell: "cutadapt -a AGATCGGAAGAG -A AGATCGGAAGAG -m 1 -o {output.r1} "
Sander Bollen's avatar
Sander Bollen committed
296
297
298
           "{input.r1} -p {output.r2} {input.r2}"

rule align:
Sander Bollen's avatar
Sander Bollen committed
299
    """Align fastq files"""
Sander Bollen's avatar
Sander Bollen committed
300
    input:
Sander Bollen's avatar
Sander Bollen committed
301
302
        r1 = "{sample}/pre_process/{sample}.cutadapt_R1.fastq",
        r2 = "{sample}/pre_process/{sample}.cutadapt_R2.fastq",
Sander Bollen's avatar
Sander Bollen committed
303
        ref = REFERENCE,
304
        tmp = ancient("tmp")
Sander Bollen's avatar
Sander Bollen committed
305
306
    params:
        rg = "@RG\\tID:{sample}_lib1\\tSM:{sample}\\tPL:ILLUMINA"
Sander Bollen's avatar
Sander Bollen committed
307
    output: temp("{sample}/bams/{sample}.sorted.bam")
308
    singularity: containers["bwa-0.7.17-picard-2.18.7"]
Sander Bollen's avatar
Sander Bollen committed
309
    shell: "bwa mem -t 8 -R '{params.rg}' {input.ref} {input.r1} {input.r2} "
310
311
           "| picard -Xmx4G -Djava.io.tmpdir={input.tmp} SortSam "
           "CREATE_INDEX=TRUE TMP_DIR={input.tmp} "
Sander Bollen's avatar
Sander Bollen committed
312
313
314
           "INPUT=/dev/stdin OUTPUT={output} SORT_ORDER=coordinate"

rule markdup:
Sander Bollen's avatar
Sander Bollen committed
315
    """Mark duplicates in BAM file"""
Sander Bollen's avatar
Sander Bollen committed
316
    input:
Sander Bollen's avatar
Sander Bollen committed
317
        bam = "{sample}/bams/{sample}.sorted.bam",
Sander Bollen's avatar
Sander Bollen committed
318
        tmp = ancient("tmp")
Sander Bollen's avatar
Sander Bollen committed
319
    output:
Sander Bollen's avatar
Sander Bollen committed
320
321
322
        bam = "{sample}/bams/{sample}.markdup.bam",
        bai = "{sample}/bams/{sample}.markdup.bai",
        metrics = "{sample}/bams/{sample}.markdup.metrics"
323
    singularity: containers["picard-2.14"]
324
325
    shell: "picard -Xmx4G -Djava.io.tmpdir={input.tmp} MarkDuplicates "
           "CREATE_INDEX=TRUE TMP_DIR={input.tmp} "
Sander Bollen's avatar
Sander Bollen committed
326
327
           "INPUT={input.bam} OUTPUT={output.bam} "
           "METRICS_FILE={output.metrics} "
Sander Bollen's avatar
Sander Bollen committed
328
329
           "MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=500"

Sander Bollen's avatar
Sander Bollen committed
330
331
332
rule bai:
    """Copy bai files as some genome browsers can only .bam.bai files"""
    input:
Sander Bollen's avatar
Sander Bollen committed
333
        bai = "{sample}/bams/{sample}.markdup.bai"
Sander Bollen's avatar
Sander Bollen committed
334
    output:
Sander Bollen's avatar
Sander Bollen committed
335
        bai = "{sample}/bams/{sample}.markdup.bam.bai"
336
    singularity: containers["debian"]
Sander Bollen's avatar
Sander Bollen committed
337
338
    shell: "cp {input.bai} {output.bai}"

Sander Bollen's avatar
Sander Bollen committed
339
rule baserecal:
Sander Bollen's avatar
Sander Bollen committed
340
    """Base recalibrated BAM files"""
Sander Bollen's avatar
Sander Bollen committed
341
    input:
Sander Bollen's avatar
Sander Bollen committed
342
        bam = "{sample}/bams/{sample}.markdup.bam",
Sander Bollen's avatar
Sander Bollen committed
343
344
        ref = REFERENCE,
    output:
Sander Bollen's avatar
Sander Bollen committed
345
        grp = "{sample}/bams/{sample}.baserecal.grp"
346
    params:
347
        known_sites = BSQR_known_sites
348
    singularity: containers["gatk"]
349
    shell: "java -XX:ParallelGCThreads=1 -jar /usr/GenomeAnalysisTK.jar -T "
Sander Bollen's avatar
Sander Bollen committed
350
351
           "BaseRecalibrator -I {input.bam} -o {output.grp} -nct 8 "
           "-R {input.ref} -cov ReadGroupCovariate -cov QualityScoreCovariate "
352
           "-cov CycleCovariate -cov ContextCovariate {params.known_sites}"
Sander Bollen's avatar
Sander Bollen committed
353

Sander Bollen's avatar
Sander Bollen committed
354
rule gvcf_scatter:
Sander Bollen's avatar
Sander Bollen committed
355
    """Run HaplotypeCaller in GVCF mode by chunk"""
Sander Bollen's avatar
Sander Bollen committed
356
    input:
Sander Bollen's avatar
Sander Bollen committed
357
358
        bam="{sample}/bams/{sample}.markdup.bam",
        bqsr="{sample}/bams/{sample}.baserecal.grp",
Sander Bollen's avatar
Sander Bollen committed
359
        dbsnp=DBSNP,
Sander Bollen's avatar
Sander Bollen committed
360
        ref=REFERENCE,
Sander Bollen's avatar
Sander Bollen committed
361
    params:
Sander Bollen's avatar
Sander Bollen committed
362
        chunk="{chunk}"
Sander Bollen's avatar
Sander Bollen committed
363
    output:
Sander Bollen's avatar
Sander Bollen committed
364
365
        gvcf=temp("{sample}/vcf/{sample}.{chunk}.part.vcf.gz"),
        gvcf_tbi=temp("{sample}/vcf/{sample}.{chunk}.part.vcf.gz.tbi")
366
    singularity: containers["gatk"]
367
    shell: "java -jar -Xmx4G -XX:ParallelGCThreads=1 /usr/GenomeAnalysisTK.jar "
Sander Bollen's avatar
Sander Bollen committed
368
           "-T HaplotypeCaller -ERC GVCF -I "
Sander Bollen's avatar
Sander Bollen committed
369
370
371
           "{input.bam} -R {input.ref} -D {input.dbsnp} "
           "-L '{params.chunk}' -o '{output.gvcf}' "
           "-variant_index_type LINEAR -variant_index_parameter 128000 "
Sander Bollen's avatar
Sander Bollen committed
372
           "-BQSR {input.bqsr}"
Sander Bollen's avatar
Sander Bollen committed
373
374


Sander Bollen's avatar
Sander Bollen committed
375
376
377
rule gvcf_chunkfile:
    """
    Create simple text file with paths to chunks for GVCF.
378

Sander Bollen's avatar
Sander Bollen committed
379
380
    This uses a run directive in stead of a shell directive because
    the amount of chunks may be so large the shell would error out with
381
    an "argument list too long" error.
Sander Bollen's avatar
Sander Bollen committed
382
    See https://unix.stackexchange.com/a/120842 for more info
383

van den Berg's avatar
van den Berg committed
384
    This also means this rule lives outside of singularity and is
385
    executed in snakemake's own environment.
Sander Bollen's avatar
Sander Bollen committed
386
387
388
389
390
391
392
    """
    params:
        chunkfiles = expand("{{sample}}/vcf/{{sample}}.{chunk}.part.vcf.gz",
                            chunk=CHUNKS)
    output:
        file = "{sample}/vcf/chunkfile.txt"
    run:
393
        with open(output.file, "w") as ohandle:
Sander Bollen's avatar
Sander Bollen committed
394
            for filename in params.chunkfiles:
395
396
                corrected = filename.format(sample=wildcards.sample)
                ohandle.write(corrected + "\n")
Sander Bollen's avatar
Sander Bollen committed
397

Sander Bollen's avatar
Sander Bollen committed
398
rule gvcf_gather:
Sander Bollen's avatar
Sander Bollen committed
399
    """Gather all GVCF scatters"""
Sander Bollen's avatar
Sander Bollen committed
400
    input:
Sander Bollen's avatar
Sander Bollen committed
401
402
403
        gvcfs = expand("{{sample}}/vcf/{{sample}}.{chunk}.part.vcf.gz",
                       chunk=CHUNKS),
        chunkfile = "{sample}/vcf/chunkfile.txt"
Sander Bollen's avatar
Sander Bollen committed
404
    output:
Sander Bollen's avatar
Sander Bollen committed
405
        gvcf = "{sample}/vcf/{sample}.g.vcf.gz"
406
    singularity: containers["bcftools"]
Sander Bollen's avatar
Sander Bollen committed
407
408
409
410
411
412
413
414
415
    shell: "bcftools concat -f {input.chunkfile} -n > {output.gvcf}"


rule gvcf_gather_tbi:
    """Index GVCF"""
    input:
        gvcf = "{sample}/vcf/{sample}.g.vcf.gz"
    output:
        tbi = "{sample}/vcf/{sample}.g.vcf.gz.tbi"
416
    singularity: containers["tabix"]
Sander Bollen's avatar
Sander Bollen committed
417
    shell: "tabix -pvcf {input.gvcf}"
Sander Bollen's avatar
Sander Bollen committed
418
419
420


rule genotype_scatter:
Sander Bollen's avatar
Sander Bollen committed
421
    """Run GATK's GenotypeGVCFs by chunk"""
Sander Bollen's avatar
Sander Bollen committed
422
    input:
Sander Bollen's avatar
Sander Bollen committed
423
        gvcfs = expand("{sample}/vcf/{sample}.g.vcf.gz", sample=SAMPLES),
Sander Bollen's avatar
Sander Bollen committed
424
        tbis = expand("{sample}/vcf/{sample}.g.vcf.gz.tbi",
Sander Bollen's avatar
Sander Bollen committed
425
                      sample=SAMPLES),
426
        ref=REFERENCE
Sander Bollen's avatar
Sander Bollen committed
427
    params:
Sander Bollen's avatar
Sander Bollen committed
428
429
        li=" -V ".join(expand("{sample}/vcf/{sample}.g.vcf.gz",
                              sample=SAMPLES)),
Sander Bollen's avatar
Sander Bollen committed
430
431
        chunk="{chunk}"
    output:
Sander Bollen's avatar
Sander Bollen committed
432
433
        vcf=temp("multisample/genotype.{chunk}.part.vcf.gz"),
        vcf_tbi=temp("multisample/genotype.{chunk}.part.vcf.gz.tbi")
434
    singularity: containers["gatk"]
435
    shell: "java -jar -Xmx15G -XX:ParallelGCThreads=1 /usr/GenomeAnalysisTK.jar -T "
Sander Bollen's avatar
Sander Bollen committed
436
           "GenotypeGVCFs -R {input.ref} "
Sander Bollen's avatar
Sander Bollen committed
437
           "-V {params.li} -L '{params.chunk}' -o '{output.vcf}'"
Sander Bollen's avatar
Sander Bollen committed
438
439


440
441
442
rule genotype_chunkfile:
    """
    Create simple text file with paths to chunks for genotyping
443

444
445
    This uses a run directive in stead of a shell directive because
    the amount of chunks may be so large the shell would error out with
446
    an "argument list too long" error.
447
    See https://unix.stackexchange.com/a/120842 for more info
448

van den Berg's avatar
van den Berg committed
449
    This also means this rule lives outside of singularity and is
450
    executed in snakemake's own environment.
451
452
    """
    params:
Sander Bollen's avatar
Sander Bollen committed
453
454
        vcfs = expand("multisample/genotype.{chunk}.part.vcf.gz",
                      chunk=CHUNKS)
455
456
457
458
459
460
461
462
    output:
        file = "multisample/chunkfile.txt"
    run:
        with open(output.file, "w") as ohandle:
            for filename in params.vcfs:
                ohandle.write(filename + "\n")


Sander Bollen's avatar
Sander Bollen committed
463
rule genotype_gather:
464
    """Gather all genotyping VCFs"""
Sander Bollen's avatar
Sander Bollen committed
465
    input:
466
467
468
        vcfs = expand("multisample/genotype.{chunk}.part.vcf.gz",
                      chunk=CHUNKS),
        chunkfile = "multisample/chunkfile.txt"
Sander Bollen's avatar
Sander Bollen committed
469
    output:
470
        vcf = "multisample/genotyped.vcf.gz"
471
    singularity: containers["bcftools"]
472
473
474
475
476
477
478
479
    shell: "bcftools concat -f {input.chunkfile} -n > {output.vcf}"


rule genotype_gather_tbi:
    """Index genotyped vcf file"""
    input:
        vcf = "multisample/genotyped.vcf.gz"
    output:
Sander Bollen's avatar
Sander Bollen committed
480
        tbi = "multisample/genotyped.vcf.gz.tbi"
481
    singularity: containers["tabix"]
482
    shell: "tabix -pvcf {input.vcf}"
Sander Bollen's avatar
Sander Bollen committed
483
484


485
486
487
rule split_vcf:
    """Split multisample VCF in single samples"""
    input:
Sander Bollen's avatar
Sander Bollen committed
488
        vcf="multisample/genotyped.vcf.gz",
489
        tbi = "multisample/genotyped.vcf.gz.tbi",
490
491
492
493
        ref=REFERENCE
    params:
        s="{sample}"
    output:
Sander Bollen's avatar
Sander Bollen committed
494
        splitted="{sample}/vcf/{sample}_single.vcf.gz"
495
    singularity: containers["gatk"]
496
    shell: "java -Xmx15G -XX:ParallelGCThreads=1 -jar /usr/GenomeAnalysisTK.jar "
Sander Bollen's avatar
Sander Bollen committed
497
498
           "-T SelectVariants -sn {params.s} -env -R {input.ref} -V "
           "{input.vcf} -o {output.splitted}"
499
500


Sander Bollen's avatar
Sander Bollen committed
501
## bam metrics
502
rule mapped_reads_bases:
Sander Bollen's avatar
Sander Bollen committed
503
    """Calculate number of mapped reads"""
Sander Bollen's avatar
Sander Bollen committed
504
    input:
505
506
        bam="{sample}/bams/{sample}.sorted.bam",
        pywc=pywc
Sander Bollen's avatar
Sander Bollen committed
507
    output:
508
509
        reads="{sample}/bams/{sample}.mapped.num",
        bases="{sample}/bams/{sample}.mapped.basenum"
510
    singularity: containers["samtools-1.7-python-3.6"]
511
512
    shell: "samtools view -F 4 {input.bam} | cut -f 10 | python {input.pywc} "
           "--reads {output.reads} --bases {output.bases}"
Sander Bollen's avatar
Sander Bollen committed
513
514


515
rule unique_reads_bases:
Sander Bollen's avatar
Sander Bollen committed
516
    """Calculate number of unique reads"""
Sander Bollen's avatar
Sander Bollen committed
517
    input:
518
519
        bam="{sample}/bams/{sample}.markdup.bam",
        pywc=pywc
Sander Bollen's avatar
Sander Bollen committed
520
    output:
521
522
        reads="{sample}/bams/{sample}.unique.num",
        bases="{sample}/bams/{sample}.usable.basenum"
523
    singularity: containers["samtools-1.7-python-3.6"]
524
525
    shell: "samtools view -F 4 -F 1024 {input.bam} | cut -f 10 | python {input.pywc} "
           "--reads {output.reads} --bases {output.bases}"
Sander Bollen's avatar
Sander Bollen committed
526
527
528
529


## fastqc

Sander Bollen's avatar
Sander Bollen committed
530
rule fastqc_raw:
531
532
    """
    Run fastqc on raw fastq files
533
    NOTE: singularity version uses 0.11.7 in stead of 0.11.5 due to
534
535
    perl missing in the container of 0.11.5
    """
Sander Bollen's avatar
Sander Bollen committed
536
    input:
Sander Bollen's avatar
Sander Bollen committed
537
        r1=get_r1,
Sander Bollen's avatar
Sander Bollen committed
538
539
        r2=get_r2
    params:
Sander Bollen's avatar
Sander Bollen committed
540
        odir="{sample}/pre_process/raw_fastqc"
Sander Bollen's avatar
Sander Bollen committed
541
    output:
Sander Bollen's avatar
Sander Bollen committed
542
        aux="{sample}/pre_process/raw_fastqc/.done.txt"
543
    singularity: containers["fastqc"]
van den Berg's avatar
van den Berg committed
544
    shell: "fastqc --threads 4 --nogroup -o {params.odir} {input.r1} {input.r2} "
Sander Bollen's avatar
Sander Bollen committed
545
           "&& echo 'done' > {output.aux}"
Sander Bollen's avatar
Sander Bollen committed
546
547


Sander Bollen's avatar
Sander Bollen committed
548
rule fastqc_merged:
549
    """
550
551
    Run fastqc on merged fastq files
    NOTE: singularity version uses 0.11.7 in stead of 0.11.5 due to
552
553
    perl missing in the container of 0.11.5
    """
Sander Bollen's avatar
Sander Bollen committed
554
    input:
Sander Bollen's avatar
Sander Bollen committed
555
556
        r1="{sample}/pre_process/{sample}.merged_R1.fastq.gz",
        r2="{sample}/pre_process/{sample}.merged_R2.fastq.gz",
Sander Bollen's avatar
Sander Bollen committed
557
        fq=fqsc
Sander Bollen's avatar
Sander Bollen committed
558
    params:
Sander Bollen's avatar
Sander Bollen committed
559
        odir="{sample}/pre_process/merged_fastqc"
Sander Bollen's avatar
Sander Bollen committed
560
    output:
Sander Bollen's avatar
Sander Bollen committed
561
562
        r1="{sample}/pre_process/merged_fastqc/{sample}.merged_R1_fastqc.zip",
        r2="{sample}/pre_process/merged_fastqc/{sample}.merged_R2_fastqc.zip"
563
    singularity: containers["fastqc"]
Sander Bollen's avatar
Sander Bollen committed
564
565
    shell: "bash {input.fq} {input.r1} {input.r2} "
           "{output.r1} {output.r2} {params.odir}"
Sander Bollen's avatar
Sander Bollen committed
566
567


Sander Bollen's avatar
Sander Bollen committed
568
rule fastqc_postqc:
569
570
    """
    Run fastqc on fastq files post pre-processing
571
572
    NOTE: singularity version uses 0.11.7 in stead of 0.11.5 due to
    perl missing in the container of 0.11.5
573
    """
Sander Bollen's avatar
Sander Bollen committed
574
    input:
Sander Bollen's avatar
Sander Bollen committed
575
576
        r1="{sample}/pre_process/{sample}.cutadapt_R1.fastq",
        r2="{sample}/pre_process/{sample}.cutadapt_R2.fastq",
Sander Bollen's avatar
Sander Bollen committed
577
        fq=fqsc
Sander Bollen's avatar
Sander Bollen committed
578
    params:
Sander Bollen's avatar
Sander Bollen committed
579
        odir="{sample}/pre_process/postqc_fastqc"
Sander Bollen's avatar
Sander Bollen committed
580
    output:
Sander Bollen's avatar
Sander Bollen committed
581
582
        r1="{sample}/pre_process/postqc_fastqc/{sample}.cutadapt_R1_fastqc.zip",
        r2="{sample}/pre_process/postqc_fastqc/{sample}.cutadapt_R2_fastqc.zip"
583
    singularity: containers["fastqc"]
Sander Bollen's avatar
Sander Bollen committed
584
585
    shell: "bash {input.fq} {input.r1} {input.r2} "
           "{output.r1} {output.r2} {params.odir}"
Sander Bollen's avatar
Sander Bollen committed
586
587
588
589
590


## fastq-count

rule fqcount_preqc:
Sander Bollen's avatar
Sander Bollen committed
591
    """Calculate number of reads and bases before pre-processing"""
Sander Bollen's avatar
Sander Bollen committed
592
    input:
Sander Bollen's avatar
Sander Bollen committed
593
594
        r1="{sample}/pre_process/{sample}.merged_R1.fastq.gz",
        r2="{sample}/pre_process/{sample}.merged_R2.fastq.gz"
Sander Bollen's avatar
Sander Bollen committed
595
    output:
Sander Bollen's avatar
Sander Bollen committed
596
        "{sample}/pre_process/{sample}.preqc_count.json"
597
    singularity: containers["fastq-count"]
Sander Bollen's avatar
Sander Bollen committed
598
    shell: "fastq-count {input.r1} {input.r2} > {output}"
Sander Bollen's avatar
Sander Bollen committed
599
600
601


rule fqcount_postqc:
Sander Bollen's avatar
Sander Bollen committed
602
    """Calculate number of reads and bases after pre-processing"""
Sander Bollen's avatar
Sander Bollen committed
603
    input:
Sander Bollen's avatar
Sander Bollen committed
604
605
        r1="{sample}/pre_process/{sample}.cutadapt_R1.fastq",
        r2="{sample}/pre_process/{sample}.cutadapt_R2.fastq"
Sander Bollen's avatar
Sander Bollen committed
606
    output:
Sander Bollen's avatar
Sander Bollen committed
607
        "{sample}/pre_process/{sample}.postqc_count.json"
608
    singularity: containers["fastq-count"]
Sander Bollen's avatar
Sander Bollen committed
609
    shell: "fastq-count {input.r1} {input.r2} > {output}"
Sander Bollen's avatar
Sander Bollen committed
610
611


Sander Bollen's avatar
Sander Bollen committed
612
613
614
615
# fastqc stats
rule fastqc_stats:
    """Collect fastq stats for a sample in json format"""
    input:
Sander Bollen's avatar
Sander Bollen committed
616
617
618
619
        preqc_r1="{sample}/pre_process/merged_fastqc/{sample}.merged_R1_fastqc.zip",
        preqc_r2="{sample}/pre_process/merged_fastqc/{sample}.merged_R2_fastqc.zip",
        postqc_r1="{sample}/pre_process/postqc_fastqc/{sample}.cutadapt_R1_fastqc.zip",
        postqc_r2="{sample}/pre_process/postqc_fastqc/{sample}.cutadapt_R2_fastqc.zip",
Sander Bollen's avatar
Sander Bollen committed
620
        sc=fqpy
621
    singularity: containers["python3"]
Sander Bollen's avatar
Sander Bollen committed
622
    output:
Sander Bollen's avatar
Sander Bollen committed
623
        "{sample}/pre_process/fastq_stats.json"
624
625
626
    shell: "python {input.sc} --preqc-r1 {input.preqc_r1} "
           "--preqc-r2 {input.preqc_r2} "
           "--postqc-r1 {input.postqc_r1} "
Sander Bollen's avatar
Sander Bollen committed
627
           "--postqc-r2 {input.postqc_r2} > {output}"
Sander Bollen's avatar
Sander Bollen committed
628

Sander Bollen's avatar
Sander Bollen committed
629
630
631
## coverages

rule covstats:
Sander Bollen's avatar
Sander Bollen committed
632
    """Calculate coverage statistics on bam file"""
Sander Bollen's avatar
Sander Bollen committed
633
    input:
Sander Bollen's avatar
Sander Bollen committed
634
635
        bam="{sample}/bams/{sample}.markdup.bam",
        genome="current.genome",
Sander Bollen's avatar
Sander Bollen committed
636
637
638
639
640
        covpy=covpy,
        bed=get_bedpath
    params:
        subt="Sample {sample}"
    output:
Sander Bollen's avatar
Sander Bollen committed
641
642
        covj="{sample}/coverage/{bed}.covstats.json",
        covp="{sample}/coverage/{bed}.covstats.png"
643
    singularity: containers["bedtools-2.26-python-2.7"]
Sander Bollen's avatar
Sander Bollen committed
644
645
646
647
    shell: "bedtools coverage -sorted -g {input.genome} -a {input.bed} "
           "-b {input.bam} -d  | python {input.covpy} - --plot {output.covp} "
           "--title 'Targets coverage' --subtitle '{params.subt}' "
           "> {output.covj}"
Sander Bollen's avatar
Sander Bollen committed
648
649


Sander Bollen's avatar
Sander Bollen committed
650
651
652
rule vtools_coverage:
    """Calculate coverage statistics per transcript"""
    input:
Sander Bollen's avatar
Sander Bollen committed
653
        gvcf="{sample}/vcf/{sample}.g.vcf.gz",
Sander Bollen's avatar
Sander Bollen committed
654
        tbi = "{sample}/vcf/{sample}.g.vcf.gz.tbi",
Sander Bollen's avatar
Sander Bollen committed
655
656
        ref=get_refflatpath
    output:
Sander Bollen's avatar
Sander Bollen committed
657
        tsv="{sample}/coverage/{ref}.coverages.tsv"
658
    singularity: containers["vtools"]
Sander Bollen's avatar
Sander Bollen committed
659
660
661
    shell: "vtools-gcoverage -I {input.gvcf} -R {input.ref} > {output.tsv}"


Sander Bollen's avatar
Sander Bollen committed
662
663
664
## vcfstats

rule vcfstats:
Sander Bollen's avatar
Sander Bollen committed
665
    """Calculate vcf statistics"""
Sander Bollen's avatar
Sander Bollen committed
666
    input:
Sander Bollen's avatar
Sander Bollen committed
667
668
        vcf="multisample/genotyped.vcf.gz",
        tbi = "multisample/genotyped.vcf.gz.tbi"
Sander Bollen's avatar
Sander Bollen committed
669
    output:
Sander Bollen's avatar
Sander Bollen committed
670
        stats="multisample/vcfstats.json"
671
    singularity: containers["vtools"]
Sander Bollen's avatar
Sander Bollen committed
672
    shell: "vtools-stats -i {input.vcf} > {output.stats}"
Sander Bollen's avatar
Sander Bollen committed
673
674


Sander Bollen's avatar
Sander Bollen committed
675
## collection
676
677
678
679
if len(BASE_BEDS) >= 1:
    rule collectstats:
        """Collect all stats for a particular sample with beds"""
        input:
Sander Bollen's avatar
Sander Bollen committed
680
681
682
683
684
685
686
            preqc="{sample}/pre_process/{sample}.preqc_count.json",
            postq="{sample}/pre_process/{sample}.postqc_count.json",
            mnum="{sample}/bams/{sample}.mapped.num",
            mbnum="{sample}/bams/{sample}.mapped.basenum",
            unum="{sample}/bams/{sample}.unique.num",
            ubnum="{sample}/bams/{sample}.usable.basenum",
            fastqc="{sample}/pre_process/fastq_stats.json",
Sander Bollen's avatar
Sander Bollen committed
687
688
            cov=expand("{{sample}}/coverage/{bed}.covstats.json",
                       bed=BASE_BEDS),
689
690
691
692
693
            colpy=colpy
        params:
            sample_name="{sample}",
            fthresh=FEMALE_THRESHOLD
        output:
Sander Bollen's avatar
Sander Bollen committed
694
            "{sample}/{sample}.stats.json"
695
        singularity: containers["vtools"]
696
697
698
699
        shell: "python {input.colpy} --sample-name {params.sample_name} "
               "--pre-qc-fastq {input.preqc} --post-qc-fastq {input.postq} "
               "--mapped-num {input.mnum} --mapped-basenum {input.mbnum} "
               "--unique-num {input.unum} --usable-basenum {input.ubnum} "
700
701
               "--female-threshold {params.fthresh} "
               "--fastqc-stats {input.fastqc} {input.cov} > {output}"
702
703
else:
    rule collectstats:
Sander Bollen's avatar
Sander Bollen committed
704
        """Collect all stats for a particular sample without beds"""
Sander Bollen's avatar
Sander Bollen committed
705
        input:
Sander Bollen's avatar
Sander Bollen committed
706
707
708
709
710
711
712
            preqc = "{sample}/pre_process/{sample}.preqc_count.json",
            postq = "{sample}/pre_process/{sample}.postqc_count.json",
            mnum = "{sample}/bams/{sample}.mapped.num",
            mbnum = "{sample}/bams/{sample}.mapped.basenum",
            unum = "{sample}/bams/{sample}.unique.num",
            ubnum = "{sample}/bams/{sample}.usable.basenum",
            fastqc="{sample}/pre_process/fastq_stats.json",
Sander Bollen's avatar
Sander Bollen committed
713
714
715
716
717
            colpy = colpy
        params:
            sample_name = "{sample}",
            fthresh = FEMALE_THRESHOLD
        output:
Sander Bollen's avatar
Sander Bollen committed
718
            "{sample}/{sample}.stats.json"
719
        singularity: containers["vtools"]
Sander Bollen's avatar
Sander Bollen committed
720
721
722
723
        shell: "python {input.colpy} --sample-name {params.sample_name} "
               "--pre-qc-fastq {input.preqc} --post-qc-fastq {input.postq} "
               "--mapped-num {input.mnum} --mapped-basenum {input.mbnum} "
               "--unique-num {input.unum} --usable-basenum {input.ubnum} "
724
725
               "--female-threshold {params.fthresh} "
               "--fastqc-stats {input.fastqc}  > {output}"
Sander Bollen's avatar
Sander Bollen committed
726
727

rule merge_stats:
Sander Bollen's avatar
Sander Bollen committed
728
    """Merge all stats of all samples"""
Sander Bollen's avatar
Sander Bollen committed
729
    input:
Sander Bollen's avatar
Sander Bollen committed
730
731
        cols=expand("{sample}/{sample}.stats.json", sample=SAMPLES),
        vstat="multisample/vcfstats.json",
Sander Bollen's avatar
Sander Bollen committed
732
733
        mpy=mpy
    output:
Sander Bollen's avatar
Sander Bollen committed
734
        stats="stats.json"
735
    singularity: containers["vtools"]
Sander Bollen's avatar
Sander Bollen committed
736
737
    shell: "python {input.mpy} --vcfstats {input.vstat} {input.cols} "
           "> {output.stats}"
Sander Bollen's avatar
Sander Bollen committed
738
739


Sander Bollen's avatar
Sander Bollen committed
740
741
742
rule stats_tsv:
    """Convert stats.json to tsv"""
    input:
Sander Bollen's avatar
Sander Bollen committed
743
        stats="stats.json",
Sander Bollen's avatar
Sander Bollen committed
744
745
        sc=tsvpy
    output:
Sander Bollen's avatar
Sander Bollen committed
746
        stats="stats.tsv"
747
    singularity: containers["python3"]
Sander Bollen's avatar
Sander Bollen committed
748
749
750
    shell: "python {input.sc} -i {input.stats} > {output.stats}"


Sander Bollen's avatar
Sander Bollen committed
751
752
753
rule multiqc:
    """
    Create multiQC report
Sander Bollen's avatar
Sander Bollen committed
754
    Depends on stats.tsv to forcefully run at end of pipeline
Sander Bollen's avatar
Sander Bollen committed
755
756
    """
    input:
Sander Bollen's avatar
Sander Bollen committed
757
        stats="stats.tsv"
Sander Bollen's avatar
Sander Bollen committed
758
    params:
Sander Bollen's avatar
Sander Bollen committed
759
        odir=".",
Sander Bollen's avatar
Sander Bollen committed
760
        rdir="multiqc_report"
Sander Bollen's avatar
Sander Bollen committed
761
    output:
Sander Bollen's avatar
Sander Bollen committed
762
        report="multiqc_report/multiqc_report.html"
763
    singularity: containers["multiqc"]
764
    shell: "multiqc -f -o {params.rdir} {params.odir} || touch {output.report}"