Snakefile 24.7 KB
Newer Older
Sander Bollen's avatar
Sander Bollen committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#   hutspot - a DNAseq variant calling pipeline
#   Copyright (C) 2017-2019, Sander Bollen, Leiden University Medical Center
#
#   This program is free software: you can redistribute it and/or modify
#   it under the terms of the GNU Affero General Public License as published by
#   the Free Software Foundation, either version 3 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU Affero General Public License for more details.
#
#   You should have received a copy of the GNU Affero General Public License
#   along with this program.  If not, see <https://www.gnu.org/licenses/>.
"""
Main Snakefile for the pipeline. 

:copyright: (c) 2017-2019 Sander Bollen
:copyright: (c) 2017-2019 Leiden University Medical Center
:license: AGPL-3.0
"""

Sander Bollen's avatar
Sander Bollen committed
24
import json
Sander Bollen's avatar
Sander Bollen committed
25
from functools import partial
Sander Bollen's avatar
Sander Bollen committed
26
from os.path import join, basename
27
from pathlib import Path
Sander Bollen's avatar
Sander Bollen committed
28
29

from pyfaidx import Fasta
Sander Bollen's avatar
Sander Bollen committed
30
31

REFERENCE = config.get("REFERENCE")
32
33
if REFERENCE is None:
    raise ValueError("You must set --config REFERENCE=<path>")
Sander Bollen's avatar
Sander Bollen committed
34
if not Path(REFERENCE).exists():
Sander Bollen's avatar
Sander Bollen committed
35
36
    raise FileNotFoundError("Reference file {0} "
                            "does not exist.".format(REFERENCE))
37

Sander Bollen's avatar
Sander Bollen committed
38
GATK = config.get("GATK")
39
40
if GATK is None:
    raise ValueError("You must set --config GATK=<path>")
Sander Bollen's avatar
Sander Bollen committed
41
42
if not Path(GATK).exists():
    raise FileNotFoundError("{0} does not exist.".format(GATK))
43

Sander Bollen's avatar
Sander Bollen committed
44
DBSNP = config.get("DBSNP")
45
46
if DBSNP is None:
    raise ValueError("You must set --config DBSNP=<path>")
Sander Bollen's avatar
Sander Bollen committed
47
48
if not Path(DBSNP).exists():
    raise FileNotFoundError("{0} does not exist".format(DBSNP))
49

Sander Bollen's avatar
Sander Bollen committed
50
ONETHOUSAND = config.get("ONETHOUSAND")
51
52
if ONETHOUSAND is None:
    raise ValueError("You must set --config ONETHOUSAND=<path>")
Sander Bollen's avatar
Sander Bollen committed
53
54
if not Path(ONETHOUSAND).exists():
    raise FileNotFoundError("{0} does not exist".format(ONETHOUSAND))
55

Sander Bollen's avatar
Sander Bollen committed
56
HAPMAP = config.get("HAPMAP")
57
if HAPMAP is None:
Sander Bollen's avatar
Sander Bollen committed
58
59
60
    raise ValueError("You must set --config HAPMAP=<path>")
if not Path(HAPMAP).exists():
    raise FileNotFoundError("{0} does not exist".format(HAPMAP))
61
62

# these are all optional
63
64
BED = config.get("BED", "")  # comma-separated list of BED files
REFFLAT = config.get("REFFLAT", "")  # comma-separated list of refFlat files
Sander Bollen's avatar
Sander Bollen committed
65
FEMALE_THRESHOLD = config.get("FEMALE_THRESHOLD", 0.6)
66
FASTQ_COUNT = config.get("FASTQ_COUNT")
Sander Bollen's avatar
Sander Bollen committed
67
MAX_BASES = config.get("MAX_BASES", "")
Sander Bollen's avatar
Sander Bollen committed
68

69
70
71
72
73
def fsrc_dir(*args):
    """Wrapper around snakemake's srcdir to work like os.path.join"""
    if len(args) == 1:
        return srcdir(args[0])
    return srcdir(join(*args))
Sander Bollen's avatar
Sander Bollen committed
74

75
76
77
covpy = fsrc_dir("src", "covstats.py")
colpy = fsrc_dir("src", "collect_stats.py")
mpy = fsrc_dir("src", "merge_stats.py")
Sander Bollen's avatar
Sander Bollen committed
78
seq = fsrc_dir("src", "seqtk.sh")
Sander Bollen's avatar
Sander Bollen committed
79
fqpy = fsrc_dir("src", "fastqc_stats.py")
Sander Bollen's avatar
Sander Bollen committed
80
tsvpy = fsrc_dir("src", "stats_to_tsv.py")
Sander Bollen's avatar
Sander Bollen committed
81
fqsc = fsrc_dir("src", "safe_fastqc.sh")
Sander Bollen's avatar
Sander Bollen committed
82

83
if FASTQ_COUNT is None:
84
    fqc = "python {0}".format(fsrc_dir("src", "fastq-count.py"))
85
86
87
else:
    fqc = FASTQ_COUNT

Sander Bollen's avatar
Sander Bollen committed
88
89
90
91
92
93
94
# sample config parsing
SCONFIG = config.get("SAMPLE_CONFIG")
if SCONFIG is None:
    raise ValueError("You must set --config SAMPLE_CONFIG=<path>")
if not Path(SCONFIG).exists():
    raise FileNotFoundError("{0} does not exist".format(SCONFIG))

Sander Bollen's avatar
Sander Bollen committed
95
96
97
98
with open(config.get("SAMPLE_CONFIG")) as handle:
    SAMPLE_CONFIG = json.load(handle)
SAMPLES = SAMPLE_CONFIG['samples'].keys()

Sander Bollen's avatar
Sander Bollen committed
99
100
101
102
103
104
105
106
107
if BED != "":
    BEDS = BED.split(",")
else:
    BEDS = []

if REFFLAT != "":
    REFFLATS = REFFLAT.split(",")
else:
    REFFLATS = []
Sander Bollen's avatar
Sander Bollen committed
108
109

BASE_BEDS = [basename(x) for x in BEDS]
Sander Bollen's avatar
Sander Bollen committed
110
BASE_REFFLATS = [basename(x) for x in REFFLATS]
Sander Bollen's avatar
Sander Bollen committed
111

Sander Bollen's avatar
Sander Bollen committed
112
113

def split_genome(ref, approx_n_chunks=100):
Sander Bollen's avatar
Sander Bollen committed
114
115
116
117
118
119
120
121
    """
    Split genome in chunks.

    Chunks are strings in the format: `<ctg>:<start>-<end>`
    These follow the region string format as used by htslib,
    which uses _1_-based indexing.
    See: http://www.htslib.org/doc/tabix.html
    """
Sander Bollen's avatar
Sander Bollen committed
122
123
124
125
126
    fa = Fasta(ref)
    tot_size = sum([len(x) for x in fa.records.values()])
    chunk_size = tot_size//approx_n_chunks
    chunks = []
    for chrom_name, chrom_value in fa.records.items():
Sander Bollen's avatar
Sander Bollen committed
127
        pos = 1
Sander Bollen's avatar
Sander Bollen committed
128
129
130
131
132
133
134
135
136
137
138
139
140
        while pos <= len(chrom_value):
            end = pos+chunk_size
            if end <= len(chrom_value):
                chunk = "{0}:{1}-{2}".format(chrom_name, pos, end)
            else:
                chunk = "{0}:{1}-{2}".format(chrom_name, pos, len(chrom_value))
            chunks.append(chunk)
            pos = end
    return chunks

CHUNKS = split_genome(REFERENCE)


Sander Bollen's avatar
Sander Bollen committed
141

Sander Bollen's avatar
Sander Bollen committed
142
143
def get_r(strand, wildcards):
    """Get fastq files on a single strand for a sample"""
Sander Bollen's avatar
Sander Bollen committed
144
    s = SAMPLE_CONFIG['samples'].get(wildcards.sample)
Sander Bollen's avatar
Sander Bollen committed
145
    rs = []
146
    for l in sorted(s['libraries'].keys()):
Sander Bollen's avatar
Sander Bollen committed
147
148
        rs.append(s['libraries'][l][strand])
    return rs
Sander Bollen's avatar
Sander Bollen committed
149

Sander Bollen's avatar
Sander Bollen committed
150
151
get_r1 = partial(get_r, "R1")
get_r2 = partial(get_r, "R2")
Sander Bollen's avatar
Sander Bollen committed
152
153


Sander Bollen's avatar
Sander Bollen committed
154
def get_bedpath(wildcards):
Sander Bollen's avatar
Sander Bollen committed
155
156
    """Get absolute path of a bed file"""
    return next(x for x in BEDS if basename(x) == wildcards.bed)
Sander Bollen's avatar
Sander Bollen committed
157
158
159


def get_refflatpath(wildcards):
Sander Bollen's avatar
Sander Bollen committed
160
    """Get absolute path of a refflat file"""
Sander Bollen's avatar
Sander Bollen committed
161
    return next(x for x in REFFLATS if basename(x) == wildcards.ref)
Sander Bollen's avatar
Sander Bollen committed
162
163


Sander Bollen's avatar
Sander Bollen committed
164
def sample_gender(wildcards):
Sander Bollen's avatar
Sander Bollen committed
165
    """Get sample gender"""
Sander Bollen's avatar
Sander Bollen committed
166
167
168
169
    sam = SAMPLE_CONFIG['samples'].get(wildcards.sample)
    return sam.get("gender", "null")


Sander Bollen's avatar
Sander Bollen committed
170
171
172
173
def metrics(do_metrics=True):
    if not do_metrics:
        return ""

Sander Bollen's avatar
Sander Bollen committed
174
175
176
177
178
179
    fqcr = expand("{sample}/pre_process/raw_fastqc/.done.txt",
                  sample=SAMPLES)
    fqcm = expand("{sample}/pre_process/merged_fastqc/{sample}.merged_R1_fastqc.zip",
                  sample=SAMPLES)
    fqcp = expand("{sample}/pre_process/postqc_fastqc/{sample}.cutadapt_R1_fastqc.zip",
                  sample=SAMPLES)
180
    if len(REFFLATS) >= 1:
Sander Bollen's avatar
Sander Bollen committed
181
182
        coverage_stats = expand("{sample}/coverage/{ref}.coverages.tsv",
                                sample=SAMPLES, ref=BASE_REFFLATS)
183
184
    else:
        coverage_stats = []
Sander Bollen's avatar
Sander Bollen committed
185
    stats = "stats.json"
Sander Bollen's avatar
Sander Bollen committed
186
    return  fqcr + fqcm + fqcp + coverage_stats + [stats]
Sander Bollen's avatar
Sander Bollen committed
187
188


Sander Bollen's avatar
Sander Bollen committed
189
190
rule all:
    input:
Sander Bollen's avatar
Sander Bollen committed
191
192
        combined="multisample/genotyped.vcf.gz",
        report="multiqc_report/multiqc_report.html",
Sander Bollen's avatar
Sander Bollen committed
193
194
        bais=expand("{sample}/bams/{sample}.markdup.bam.bai",sample=SAMPLES),
        vcfs=expand("{sample}/vcf/{sample}_single.vcf.gz",sample=SAMPLES),
Sander Bollen's avatar
Sander Bollen committed
195
        stats=metrics()
Sander Bollen's avatar
Sander Bollen committed
196

Sander Bollen's avatar
Sander Bollen committed
197
198
199

rule create_markdup_tmp:
    """Create tmp directory for mark duplicates"""
200
    output: directory("tmp")
Sander Bollen's avatar
Sander Bollen committed
201
202
    shell: "mkdir -p {output}"

Sander Bollen's avatar
Sander Bollen committed
203
rule genome:
Sander Bollen's avatar
Sander Bollen committed
204
    """Create genome file as used by bedtools"""
Sander Bollen's avatar
Sander Bollen committed
205
    input: REFERENCE
Sander Bollen's avatar
Sander Bollen committed
206
    output: "current.genome"
Sander Bollen's avatar
Sander Bollen committed
207
208
209
    shell: "awk -v OFS='\t' {{'print $1,$2'}} {input}.fai > {output}"

rule merge_r1:
Sander Bollen's avatar
Sander Bollen committed
210
    """Merge all forward fastq files into one"""
Sander Bollen's avatar
Sander Bollen committed
211
    input: get_r1
Sander Bollen's avatar
Sander Bollen committed
212
    output: temp("{sample}/pre_process/{sample}.merged_R1.fastq.gz")
Sander Bollen's avatar
Sander Bollen committed
213
214
215
    shell: "cat {input} > {output}"

rule merge_r2:
Sander Bollen's avatar
Sander Bollen committed
216
    """Merge all reverse fastq files into one"""
Sander Bollen's avatar
Sander Bollen committed
217
    input: get_r2
Sander Bollen's avatar
Sander Bollen committed
218
    output: temp("{sample}/pre_process/{sample}.merged_R2.fastq.gz")
Sander Bollen's avatar
Sander Bollen committed
219
220
    shell: "cat {input} > {output}"

Sander Bollen's avatar
Sander Bollen committed
221
222

rule seqtk_r1:
Sander Bollen's avatar
Sander Bollen committed
223
    """Either subsample or link forward fastq file"""
Sander Bollen's avatar
Sander Bollen committed
224
    input:
Sander Bollen's avatar
Sander Bollen committed
225
226
        stats="{sample}/pre_process/{sample}.preqc_count.json",
        fastq="{sample}/pre_process/{sample}.merged_R1.fastq.gz",
Sander Bollen's avatar
Sander Bollen committed
227
        seqtk=seq
Sander Bollen's avatar
Sander Bollen committed
228
    params:
Sander Bollen's avatar
Sander Bollen committed
229
        max_bases=str(MAX_BASES)
Sander Bollen's avatar
Sander Bollen committed
230
    output:
Sander Bollen's avatar
Sander Bollen committed
231
        fastq=temp("{sample}/pre_process/{sample}.sampled_R1.fastq.gz")
Sander Bollen's avatar
Sander Bollen committed
232
    conda: "envs/seqtk.yml"
233
    singularity: "docker://quay.io/biocontainers/seqtk:1.3--h84994c4_1"
Sander Bollen's avatar
Sander Bollen committed
234
235
    shell: "bash {input.seqtk} {input.stats} {input.fastq} {output.fastq} "
           "{params.max_bases}"
Sander Bollen's avatar
Sander Bollen committed
236
237
238


rule seqtk_r2:
Sander Bollen's avatar
Sander Bollen committed
239
    """Either subsample or link reverse fastq file"""
Sander Bollen's avatar
Sander Bollen committed
240
    input:
Sander Bollen's avatar
Sander Bollen committed
241
242
        stats = "{sample}/pre_process/{sample}.preqc_count.json",
        fastq = "{sample}/pre_process/{sample}.merged_R2.fastq.gz",
Sander Bollen's avatar
Sander Bollen committed
243
        seqtk=seq
Sander Bollen's avatar
Sander Bollen committed
244
    params:
Sander Bollen's avatar
Sander Bollen committed
245
        max_bases =str(MAX_BASES)
Sander Bollen's avatar
Sander Bollen committed
246
    output:
Sander Bollen's avatar
Sander Bollen committed
247
        fastq = temp("{sample}/pre_process/{sample}.sampled_R2.fastq.gz")
Sander Bollen's avatar
Sander Bollen committed
248
    conda: "envs/seqtk.yml"
249
    singularity: "docker://quay.io/biocontainers/seqtk:1.3--h84994c4_1"
Sander Bollen's avatar
Sander Bollen committed
250
251
    shell: "bash {input.seqtk} {input.stats} {input.fastq} {output.fastq} "
           "{params.max_bases}"
Sander Bollen's avatar
Sander Bollen committed
252
253


254
# contains original merged fastq files as input to prevent them from being prematurely deleted
Sander Bollen's avatar
Sander Bollen committed
255
rule sickle:
Sander Bollen's avatar
Sander Bollen committed
256
    """Trim fastq files"""
Sander Bollen's avatar
Sander Bollen committed
257
    input:
Sander Bollen's avatar
Sander Bollen committed
258
259
260
261
        r1 = "{sample}/pre_process/{sample}.sampled_R1.fastq.gz",
        r2 = "{sample}/pre_process/{sample}.sampled_R2.fastq.gz",
        rr1 = "{sample}/pre_process/{sample}.merged_R1.fastq.gz",
        rr2 = "{sample}/pre_process/{sample}.merged_R2.fastq.gz"
Sander Bollen's avatar
Sander Bollen committed
262
    output:
Sander Bollen's avatar
Sander Bollen committed
263
264
        r1 = temp("{sample}/pre_process/{sample}.trimmed_R1.fastq"),
        r2 = temp("{sample}/pre_process/{sample}.trimmed_R2.fastq"),
Sander Bollen's avatar
Sander Bollen committed
265
        s = "{sample}/pre_process/{sample}.trimmed_singles.fastq"
266
    singularity: "docker://quay.io/biocontainers/sickle-trim:1.33--ha92aebf_4 "
Sander Bollen's avatar
Sander Bollen committed
267
    conda: "envs/sickle.yml"
Sander Bollen's avatar
Sander Bollen committed
268
    shell: "sickle pe -f {input.r1} -r {input.r2} -t sanger -o {output.r1} "
Sander Bollen's avatar
Sander Bollen committed
269
270
271
           "-p {output.r2} -s {output.s}"

rule cutadapt:
Sander Bollen's avatar
Sander Bollen committed
272
    """Clip fastq files"""
Sander Bollen's avatar
Sander Bollen committed
273
    input:
Sander Bollen's avatar
Sander Bollen committed
274
275
        r1 = "{sample}/pre_process/{sample}.trimmed_R1.fastq",
        r2 = "{sample}/pre_process/{sample}.trimmed_R2.fastq"
Sander Bollen's avatar
Sander Bollen committed
276
    output:
Sander Bollen's avatar
Sander Bollen committed
277
278
        r1 = temp("{sample}/pre_process/{sample}.cutadapt_R1.fastq"),
        r2 = temp("{sample}/pre_process/{sample}.cutadapt_R2.fastq")
279
    singularity: "docker://quay.io/biocontainers/cutadapt:1.14--py36_0 "
Sander Bollen's avatar
Sander Bollen committed
280
    conda: "envs/cutadapt.yml"
Sander Bollen's avatar
Sander Bollen committed
281
    shell: "cutadapt -a AGATCGGAAGAG -A AGATCGGAAGAG -m 1 -o {output.r1} "
Sander Bollen's avatar
Sander Bollen committed
282
283
284
           "{input.r1} -p {output.r2} {input.r2}"

rule align:
Sander Bollen's avatar
Sander Bollen committed
285
    """Align fastq files"""
Sander Bollen's avatar
Sander Bollen committed
286
    input:
Sander Bollen's avatar
Sander Bollen committed
287
288
        r1 = "{sample}/pre_process/{sample}.cutadapt_R1.fastq",
        r2 = "{sample}/pre_process/{sample}.cutadapt_R2.fastq",
Sander Bollen's avatar
Sander Bollen committed
289
290
291
        ref = REFERENCE
    params:
        rg = "@RG\\tID:{sample}_lib1\\tSM:{sample}\\tPL:ILLUMINA"
Sander Bollen's avatar
Sander Bollen committed
292
    output: temp("{sample}/bams/{sample}.sorted.bam")
Sander Bollen's avatar
Sander Bollen committed
293
    conda: "envs/bwa.yml"
Sander Bollen's avatar
Sander Bollen committed
294
295
    shell: "bwa mem -t 8 -R '{params.rg}' {input.ref} {input.r1} {input.r2} "
           "| picard SortSam CREATE_INDEX=TRUE TMP_DIR=null "
Sander Bollen's avatar
Sander Bollen committed
296
297
298
           "INPUT=/dev/stdin OUTPUT={output} SORT_ORDER=coordinate"

rule markdup:
Sander Bollen's avatar
Sander Bollen committed
299
    """Mark duplicates in BAM file"""
Sander Bollen's avatar
Sander Bollen committed
300
    input:
Sander Bollen's avatar
Sander Bollen committed
301
        bam = "{sample}/bams/{sample}.sorted.bam",
Sander Bollen's avatar
Sander Bollen committed
302
        tmp = ancient("tmp")
Sander Bollen's avatar
Sander Bollen committed
303
    output:
Sander Bollen's avatar
Sander Bollen committed
304
305
306
        bam = "{sample}/bams/{sample}.markdup.bam",
        bai = "{sample}/bams/{sample}.markdup.bai",
        metrics = "{sample}/bams/{sample}.markdup.metrics"
307
    singularity: "docker://quay.io/biocontainers/picard:2.19.2--0"
Sander Bollen's avatar
Sander Bollen committed
308
    conda: "envs/picard.yml"
Sander Bollen's avatar
Sander Bollen committed
309
310
311
    shell: "picard MarkDuplicates CREATE_INDEX=TRUE TMP_DIR={input.tmp} "
           "INPUT={input.bam} OUTPUT={output.bam} "
           "METRICS_FILE={output.metrics} "
Sander Bollen's avatar
Sander Bollen committed
312
313
           "MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=500"

Sander Bollen's avatar
Sander Bollen committed
314
315
316
rule bai:
    """Copy bai files as some genome browsers can only .bam.bai files"""
    input:
Sander Bollen's avatar
Sander Bollen committed
317
        bai = "{sample}/bams/{sample}.markdup.bai"
Sander Bollen's avatar
Sander Bollen committed
318
    output:
Sander Bollen's avatar
Sander Bollen committed
319
        bai = "{sample}/bams/{sample}.markdup.bam.bai"
Sander Bollen's avatar
Sander Bollen committed
320
321
    shell: "cp {input.bai} {output.bai}"

Sander Bollen's avatar
Sander Bollen committed
322
rule baserecal:
Sander Bollen's avatar
Sander Bollen committed
323
    """Base recalibrated BAM files"""
Sander Bollen's avatar
Sander Bollen committed
324
    input:
Sander Bollen's avatar
Sander Bollen committed
325
        bam = "{sample}/bams/{sample}.markdup.bam",
Sander Bollen's avatar
Sander Bollen committed
326
327
328
329
330
331
        gatk = GATK,
        ref = REFERENCE,
        dbsnp = DBSNP,
        one1kg = ONETHOUSAND,
        hapmap = HAPMAP
    output:
Sander Bollen's avatar
Sander Bollen committed
332
        grp = "{sample}/bams/{sample}.baserecal.grp"
333
    singularity: "docker://quay.io/biocontainers/gatk:3.8--py36_4"
Sander Bollen's avatar
Sander Bollen committed
334
    conda: "envs/gatk.yml"
Sander Bollen's avatar
Sander Bollen committed
335
    shell: "java -XX:ParallelGCThreads=1 -jar {input.gatk} -T "
Sander Bollen's avatar
Sander Bollen committed
336
337
           "BaseRecalibrator -I {input.bam} -o {output.grp} -nct 8 "
           "-R {input.ref} -cov ReadGroupCovariate -cov QualityScoreCovariate "
Sander Bollen's avatar
Sander Bollen committed
338
339
           "-cov CycleCovariate -cov ContextCovariate -knownSites "
           "{input.dbsnp} -knownSites {input.one1kg} "
Sander Bollen's avatar
Sander Bollen committed
340
341
           "-knownSites {input.hapmap}"

Sander Bollen's avatar
Sander Bollen committed
342
rule gvcf_scatter:
Sander Bollen's avatar
Sander Bollen committed
343
    """Run HaplotypeCaller in GVCF mode by chunk"""
Sander Bollen's avatar
Sander Bollen committed
344
    input:
Sander Bollen's avatar
Sander Bollen committed
345
346
        bam="{sample}/bams/{sample}.markdup.bam",
        bqsr="{sample}/bams/{sample}.baserecal.grp",
Sander Bollen's avatar
Sander Bollen committed
347
        dbsnp=DBSNP,
Sander Bollen's avatar
Sander Bollen committed
348
349
        ref=REFERENCE,
        gatk=GATK
Sander Bollen's avatar
Sander Bollen committed
350
    params:
Sander Bollen's avatar
Sander Bollen committed
351
        chunk="{chunk}"
Sander Bollen's avatar
Sander Bollen committed
352
    output:
Sander Bollen's avatar
Sander Bollen committed
353
354
        gvcf=temp("{sample}/vcf/{sample}.{chunk}.part.vcf.gz"),
        gvcf_tbi=temp("{sample}/vcf/{sample}.{chunk}.part.vcf.gz.tbi")
355
    singularity: "docker://quay.io/biocontainers/gatk:3.8--py36_4"
Sander Bollen's avatar
Sander Bollen committed
356
    conda: "envs/gatk.yml"
Sander Bollen's avatar
Sander Bollen committed
357
358
    shell: "java -jar -Xmx4G -XX:ParallelGCThreads=1 {input.gatk} "
           "-T HaplotypeCaller -ERC GVCF -I "
Sander Bollen's avatar
Sander Bollen committed
359
360
361
           "{input.bam} -R {input.ref} -D {input.dbsnp} "
           "-L '{params.chunk}' -o '{output.gvcf}' "
           "-variant_index_type LINEAR -variant_index_parameter 128000 "
Sander Bollen's avatar
Sander Bollen committed
362
           "-BQSR {input.bqsr}"
Sander Bollen's avatar
Sander Bollen committed
363
364


Sander Bollen's avatar
Sander Bollen committed
365
rule gvcf_gather:
Sander Bollen's avatar
Sander Bollen committed
366
    """Gather all gvcf scatters"""
Sander Bollen's avatar
Sander Bollen committed
367
    input:
Sander Bollen's avatar
Sander Bollen committed
368
369
370
371
        gvcfs=expand("{{sample}}/vcf/{{sample}}.{chunk}.part.vcf.gz",
                     chunk=CHUNKS),
        tbis=expand("{{sample}}/vcf/{{sample}}.{chunk}.part.vcf.gz.tbi",
                    chunk=CHUNKS),
Sander Bollen's avatar
Sander Bollen committed
372
        ref=REFERENCE,
Sander Bollen's avatar
Sander Bollen committed
373
        gatk=GATK
Sander Bollen's avatar
Sander Bollen committed
374
    params:
Sander Bollen's avatar
Sander Bollen committed
375
376
        gvcfs="' -V '".join(expand("{{sample}}/vcf/{{sample}}.{chunk}.part.vcf.gz",
                                   chunk=CHUNKS))
Sander Bollen's avatar
Sander Bollen committed
377
    output:
Sander Bollen's avatar
Sander Bollen committed
378
        gvcf="{sample}/vcf/{sample}.g.vcf.gz"
379
    singularity: "docker://quay.io/biocontainers/gatk:3.8--py36_4"
Sander Bollen's avatar
Sander Bollen committed
380
    conda: "envs/gatk.yml"
Sander Bollen's avatar
Sander Bollen committed
381
382
    shell: "java -Xmx4G -XX:ParallelGCThreads=1 -cp {input.gatk} "
           "org.broadinstitute.gatk.tools.CatVariants "
Sander Bollen's avatar
Sander Bollen committed
383
           "-R {input.ref} -V '{params.gvcfs}' -out {output.gvcf} "
Sander Bollen's avatar
Sander Bollen committed
384
385
386
387
           "-assumeSorted"


rule genotype_scatter:
Sander Bollen's avatar
Sander Bollen committed
388
    """Run GATK's GenotypeGVCFs by chunk"""
Sander Bollen's avatar
Sander Bollen committed
389
    input:
Sander Bollen's avatar
Sander Bollen committed
390
        gvcfs = expand("{sample}/vcf/{sample}.g.vcf.gz", sample=SAMPLES),
Sander Bollen's avatar
Sander Bollen committed
391
        ref=REFERENCE,
Sander Bollen's avatar
Sander Bollen committed
392
393
        gatk=GATK
    params:
Sander Bollen's avatar
Sander Bollen committed
394
395
        li=" -V ".join(expand("{sample}/vcf/{sample}.g.vcf.gz",
                              sample=SAMPLES)),
Sander Bollen's avatar
Sander Bollen committed
396
397
        chunk="{chunk}"
    output:
Sander Bollen's avatar
Sander Bollen committed
398
399
        vcf=temp("multisample/genotype.{chunk}.part.vcf.gz"),
        vcf_tbi=temp("multisample/genotype.{chunk}.part.vcf.gz.tbi")
400
    singularity: "docker://quay.io/biocontainers/gatk:3.8--py36_4"
Sander Bollen's avatar
Sander Bollen committed
401
    conda: "envs/gatk.yml"
Sander Bollen's avatar
Sander Bollen committed
402
403
    shell: "java -jar -Xmx15G -XX:ParallelGCThreads=1 {input.gatk} -T "
           "GenotypeGVCFs -R {input.ref} "
Sander Bollen's avatar
Sander Bollen committed
404
           "-V {params.li} -L '{params.chunk}' -o '{output.vcf}'"
Sander Bollen's avatar
Sander Bollen committed
405
406
407


rule genotype_gather:
Sander Bollen's avatar
Sander Bollen committed
408
    """Gather all genotyping scatters"""
Sander Bollen's avatar
Sander Bollen committed
409
    input:
Sander Bollen's avatar
Sander Bollen committed
410
        vcfs=expand("multisample/genotype.{chunk}.part.vcf.gz", chunk=CHUNKS),
Sander Bollen's avatar
Sander Bollen committed
411
412
        tbis=expand("multisample/genotype.{chunk}.part.vcf.gz.tbi",
                    chunk=CHUNKS),
Sander Bollen's avatar
Sander Bollen committed
413
414
        ref=REFERENCE,
        gatk=GATK
Sander Bollen's avatar
Sander Bollen committed
415
    params:
Sander Bollen's avatar
Sander Bollen committed
416
417
        vcfs="' -V '".join(expand("multisample/genotype.{chunk}.part.vcf.gz",
                                  chunk=CHUNKS))
Sander Bollen's avatar
Sander Bollen committed
418
    output:
Sander Bollen's avatar
Sander Bollen committed
419
        combined="multisample/genotyped.vcf.gz"
Sander Bollen's avatar
Sander Bollen committed
420
    conda: "envs/gatk.yml"
421
    singularity: "docker://quay.io/biocontainers/gatk:3.8--py36_4"
Sander Bollen's avatar
Sander Bollen committed
422
423
    shell: "java -Xmx4G -XX:ParallelGCThreads=1 -cp {input.gatk} "
           "org.broadinstitute.gatk.tools.CatVariants "
Sander Bollen's avatar
Sander Bollen committed
424
           "-R {input.ref} -V '{params.vcfs}' -out {output.combined} "
Sander Bollen's avatar
Sander Bollen committed
425
           "-assumeSorted"
Sander Bollen's avatar
Sander Bollen committed
426
427


428
429
430
rule split_vcf:
    """Split multisample VCF in single samples"""
    input:
Sander Bollen's avatar
Sander Bollen committed
431
        vcf="multisample/genotyped.vcf.gz",
432
433
434
435
436
        gatk=GATK,
        ref=REFERENCE
    params:
        s="{sample}"
    output:
Sander Bollen's avatar
Sander Bollen committed
437
        splitted="{sample}/vcf/{sample}_single.vcf.gz"
438
    singularity: "docker://quay.io/biocontainers/gatk:3.8--py36_4"
439
    conda: "envs/gatk.yml"
Sander Bollen's avatar
Sander Bollen committed
440
441
442
    shell: "java -Xmx15G -XX:ParallelGCThreads=1 -jar {input.gatk} "
           "-T SelectVariants -sn {params.s} -env -R {input.ref} -V "
           "{input.vcf} -o {output.splitted}"
443
444


Sander Bollen's avatar
Sander Bollen committed
445
446
447
## bam metrics

rule mapped_num:
Sander Bollen's avatar
Sander Bollen committed
448
    """Calculate number of mapped reads"""
Sander Bollen's avatar
Sander Bollen committed
449
    input:
Sander Bollen's avatar
Sander Bollen committed
450
        bam="{sample}/bams/{sample}.sorted.bam"
Sander Bollen's avatar
Sander Bollen committed
451
    output:
Sander Bollen's avatar
Sander Bollen committed
452
        num="{sample}/bams/{sample}.mapped.num"
453
    singularity: "docker://quay.io/biocontainers/samtools:1.6--he673b24_3"
Sander Bollen's avatar
Sander Bollen committed
454
455
456
457
458
    conda: "envs/samtools.yml"
    shell: "samtools view -F 4 {input.bam} | wc -l > {output.num}"


rule mapped_basenum:
Sander Bollen's avatar
Sander Bollen committed
459
    """Calculate number of mapped bases"""
Sander Bollen's avatar
Sander Bollen committed
460
    input:
Sander Bollen's avatar
Sander Bollen committed
461
        bam="{sample}/bams/{sample}.sorted.bam"
Sander Bollen's avatar
Sander Bollen committed
462
    output:
Sander Bollen's avatar
Sander Bollen committed
463
        num="{sample}/bams/{sample}.mapped.basenum"
464
    singularity: "docker://quay.io/biocontainers/samtools:1.6--he673b24_3"
Sander Bollen's avatar
Sander Bollen committed
465
    conda: "envs/samtools.yml"
Sander Bollen's avatar
Sander Bollen committed
466
    shell: "samtools view -F 4 {input.bam} | cut -f10 | wc -c > {output.num}"
Sander Bollen's avatar
Sander Bollen committed
467
468
469


rule unique_num:
Sander Bollen's avatar
Sander Bollen committed
470
    """Calculate number of unique reads"""
Sander Bollen's avatar
Sander Bollen committed
471
    input:
Sander Bollen's avatar
Sander Bollen committed
472
        bam="{sample}/bams/{sample}.markdup.bam"
Sander Bollen's avatar
Sander Bollen committed
473
    output:
Sander Bollen's avatar
Sander Bollen committed
474
        num="{sample}/bams/{sample}.unique.num"
475
    singularity: "docker://quay.io/biocontainers/samtools:1.6--he673b24_3"
Sander Bollen's avatar
Sander Bollen committed
476
    conda: "envs/samtools.yml"
477
    shell: "samtools view -F 4 -F 1024 {input.bam} | wc -l > {output.num}"
Sander Bollen's avatar
Sander Bollen committed
478
479
480


rule usable_basenum:
Sander Bollen's avatar
Sander Bollen committed
481
    """Calculate number of bases on unique reads"""
Sander Bollen's avatar
Sander Bollen committed
482
    input:
Sander Bollen's avatar
Sander Bollen committed
483
        bam="{sample}/bams/{sample}.markdup.bam"
Sander Bollen's avatar
Sander Bollen committed
484
    output:
Sander Bollen's avatar
Sander Bollen committed
485
        num="{sample}/bams/{sample}.usable.basenum"
486
    singularity: "docker://quay.io/biocontainers/samtools:1.6--he673b24_3"
Sander Bollen's avatar
Sander Bollen committed
487
    conda: "envs/samtools.yml"
Sander Bollen's avatar
Sander Bollen committed
488
489
    shell: "samtools view -F 4 -F 1024 {input.bam} | cut -f10 | wc -c > "
           "{output.num}"
Sander Bollen's avatar
Sander Bollen committed
490
491
492
493


## fastqc

Sander Bollen's avatar
Sander Bollen committed
494
rule fastqc_raw:
Sander Bollen's avatar
Sander Bollen committed
495
    """Run fastqc on raw fastq files"""
Sander Bollen's avatar
Sander Bollen committed
496
    input:
Sander Bollen's avatar
Sander Bollen committed
497
        r1=get_r1,
Sander Bollen's avatar
Sander Bollen committed
498
499
        r2=get_r2
    params:
Sander Bollen's avatar
Sander Bollen committed
500
        odir="{sample}/pre_process/raw_fastqc"
Sander Bollen's avatar
Sander Bollen committed
501
    output:
Sander Bollen's avatar
Sander Bollen committed
502
        aux="{sample}/pre_process/raw_fastqc/.done.txt"
503
    singularity: "docker://quay.io/biocontainers/fastqc:0.11.8--1"
504
    conda: "envs/fastqc.yml"
Sander Bollen's avatar
Sander Bollen committed
505
506
    shell: "fastqc --nogroup -o {params.odir} {input.r1} {input.r2} "
           "&& echo 'done' > {output.aux}"
Sander Bollen's avatar
Sander Bollen committed
507
508


Sander Bollen's avatar
Sander Bollen committed
509
rule fastqc_merged:
Sander Bollen's avatar
Sander Bollen committed
510
    """Run fastqc on merged fastq files"""
Sander Bollen's avatar
Sander Bollen committed
511
    input:
Sander Bollen's avatar
Sander Bollen committed
512
513
        r1="{sample}/pre_process/{sample}.merged_R1.fastq.gz",
        r2="{sample}/pre_process/{sample}.merged_R2.fastq.gz",
Sander Bollen's avatar
Sander Bollen committed
514
        fq=fqsc
Sander Bollen's avatar
Sander Bollen committed
515
    params:
Sander Bollen's avatar
Sander Bollen committed
516
        odir="{sample}/pre_process/merged_fastqc"
Sander Bollen's avatar
Sander Bollen committed
517
    output:
Sander Bollen's avatar
Sander Bollen committed
518
519
        r1="{sample}/pre_process/merged_fastqc/{sample}.merged_R1_fastqc.zip",
        r2="{sample}/pre_process/merged_fastqc/{sample}.merged_R2_fastqc.zip"
520
    singularity: "docker://quay.io/biocontainers/fastqc:0.11.8--1"
521
    conda: "envs/fastqc.yml"
Sander Bollen's avatar
Sander Bollen committed
522
523
    shell: "bash {input.fq} {input.r1} {input.r2} "
           "{output.r1} {output.r2} {params.odir}"
Sander Bollen's avatar
Sander Bollen committed
524
525


Sander Bollen's avatar
Sander Bollen committed
526
rule fastqc_postqc:
Sander Bollen's avatar
Sander Bollen committed
527
    """Run fastqc on fastq files post pre-processing"""
Sander Bollen's avatar
Sander Bollen committed
528
    input:
Sander Bollen's avatar
Sander Bollen committed
529
530
        r1="{sample}/pre_process/{sample}.cutadapt_R1.fastq",
        r2="{sample}/pre_process/{sample}.cutadapt_R2.fastq",
Sander Bollen's avatar
Sander Bollen committed
531
        fq=fqsc
Sander Bollen's avatar
Sander Bollen committed
532
    params:
Sander Bollen's avatar
Sander Bollen committed
533
        odir="{sample}/pre_process/postqc_fastqc"
Sander Bollen's avatar
Sander Bollen committed
534
    output:
Sander Bollen's avatar
Sander Bollen committed
535
536
        r1="{sample}/pre_process/postqc_fastqc/{sample}.cutadapt_R1_fastqc.zip",
        r2="{sample}/pre_process/postqc_fastqc/{sample}.cutadapt_R2_fastqc.zip"
537
    singularity: "docker://quay.io/biocontainers/fastqc:0.11.8--1"
538
    conda: "envs/fastqc.yml"
Sander Bollen's avatar
Sander Bollen committed
539
540
    shell: "bash {input.fq} {input.r1} {input.r2} "
           "{output.r1} {output.r2} {params.odir}"
Sander Bollen's avatar
Sander Bollen committed
541
542
543
544
545


## fastq-count

rule fqcount_preqc:
Sander Bollen's avatar
Sander Bollen committed
546
    """Calculate number of reads and bases before pre-processing"""
Sander Bollen's avatar
Sander Bollen committed
547
    input:
Sander Bollen's avatar
Sander Bollen committed
548
549
        r1="{sample}/pre_process/{sample}.merged_R1.fastq.gz",
        r2="{sample}/pre_process/{sample}.merged_R2.fastq.gz"
550
    params:
551
        fastqcount=fqc
Sander Bollen's avatar
Sander Bollen committed
552
    output:
Sander Bollen's avatar
Sander Bollen committed
553
        "{sample}/pre_process/{sample}.preqc_count.json"
554
    shell: "{params.fastqcount} {input.r1} {input.r2} > {output}"
Sander Bollen's avatar
Sander Bollen committed
555
556
557


rule fqcount_postqc:
Sander Bollen's avatar
Sander Bollen committed
558
    """Calculate number of reads and bases after pre-processing"""
Sander Bollen's avatar
Sander Bollen committed
559
    input:
Sander Bollen's avatar
Sander Bollen committed
560
561
        r1="{sample}/pre_process/{sample}.cutadapt_R1.fastq",
        r2="{sample}/pre_process/{sample}.cutadapt_R2.fastq"
562
    params:
563
        fastqcount=fqc
Sander Bollen's avatar
Sander Bollen committed
564
    output:
Sander Bollen's avatar
Sander Bollen committed
565
        "{sample}/pre_process/{sample}.postqc_count.json"
Sander Bollen's avatar
Sander Bollen committed
566
567
568
    shell: "{params.fastqcount} {input.r1} {input.r2} > {output}"


Sander Bollen's avatar
Sander Bollen committed
569
570
571
572
# fastqc stats
rule fastqc_stats:
    """Collect fastq stats for a sample in json format"""
    input:
Sander Bollen's avatar
Sander Bollen committed
573
574
575
576
        preqc_r1="{sample}/pre_process/merged_fastqc/{sample}.merged_R1_fastqc.zip",
        preqc_r2="{sample}/pre_process/merged_fastqc/{sample}.merged_R2_fastqc.zip",
        postqc_r1="{sample}/pre_process/postqc_fastqc/{sample}.cutadapt_R1_fastqc.zip",
        postqc_r2="{sample}/pre_process/postqc_fastqc/{sample}.cutadapt_R2_fastqc.zip",
Sander Bollen's avatar
Sander Bollen committed
577
        sc=fqpy
578
    conda: "envs/collectstats.yml"
Sander Bollen's avatar
Sander Bollen committed
579
    output:
Sander Bollen's avatar
Sander Bollen committed
580
        "{sample}/pre_process/fastq_stats.json"
581
582
583
    shell: "python {input.sc} --preqc-r1 {input.preqc_r1} "
           "--preqc-r2 {input.preqc_r2} "
           "--postqc-r1 {input.postqc_r1} "
Sander Bollen's avatar
Sander Bollen committed
584
           "--postqc-r2 {input.postqc_r2} > {output}"
Sander Bollen's avatar
Sander Bollen committed
585

Sander Bollen's avatar
Sander Bollen committed
586
587
588
## coverages

rule covstats:
Sander Bollen's avatar
Sander Bollen committed
589
    """Calculate coverage statistics on bam file"""
Sander Bollen's avatar
Sander Bollen committed
590
    input:
Sander Bollen's avatar
Sander Bollen committed
591
592
        bam="{sample}/bams/{sample}.markdup.bam",
        genome="current.genome",
Sander Bollen's avatar
Sander Bollen committed
593
594
595
596
597
        covpy=covpy,
        bed=get_bedpath
    params:
        subt="Sample {sample}"
    output:
Sander Bollen's avatar
Sander Bollen committed
598
599
        covj="{sample}/coverage/{bed}.covstats.json",
        covp="{sample}/coverage/{bed}.covstats.png"
Sander Bollen's avatar
Sander Bollen committed
600
    conda: "envs/covstat.yml"
Sander Bollen's avatar
Sander Bollen committed
601
602
603
604
    shell: "bedtools coverage -sorted -g {input.genome} -a {input.bed} "
           "-b {input.bam} -d  | python {input.covpy} - --plot {output.covp} "
           "--title 'Targets coverage' --subtitle '{params.subt}' "
           "> {output.covj}"
Sander Bollen's avatar
Sander Bollen committed
605
606


Sander Bollen's avatar
Sander Bollen committed
607
608
609
rule vtools_coverage:
    """Calculate coverage statistics per transcript"""
    input:
Sander Bollen's avatar
Sander Bollen committed
610
        gvcf="{sample}/vcf/{sample}.g.vcf.gz",
Sander Bollen's avatar
Sander Bollen committed
611
612
        ref=get_refflatpath
    output:
Sander Bollen's avatar
Sander Bollen committed
613
        tsv="{sample}/coverage/{ref}.coverages.tsv"
Sander Bollen's avatar
Sander Bollen committed
614
615
616
617
    conda: "envs/vcfstats.yml"
    shell: "vtools-gcoverage -I {input.gvcf} -R {input.ref} > {output.tsv}"


Sander Bollen's avatar
Sander Bollen committed
618
619
620
## vcfstats

rule vcfstats:
Sander Bollen's avatar
Sander Bollen committed
621
    """Calculate vcf statistics"""
Sander Bollen's avatar
Sander Bollen committed
622
    input:
Sander Bollen's avatar
Sander Bollen committed
623
        vcf="multisample/genotyped.vcf.gz"
Sander Bollen's avatar
Sander Bollen committed
624
    output:
Sander Bollen's avatar
Sander Bollen committed
625
        stats="multisample/vcfstats.json"
Sander Bollen's avatar
Sander Bollen committed
626
    conda: "envs/vcfstats.yml"
Sander Bollen's avatar
Sander Bollen committed
627
    shell: "vtools-stats -i {input.vcf} > {output.stats}"
Sander Bollen's avatar
Sander Bollen committed
628
629


Sander Bollen's avatar
Sander Bollen committed
630
## collection
631
632
633
634
if len(BASE_BEDS) >= 1:
    rule collectstats:
        """Collect all stats for a particular sample with beds"""
        input:
Sander Bollen's avatar
Sander Bollen committed
635
636
637
638
639
640
641
            preqc="{sample}/pre_process/{sample}.preqc_count.json",
            postq="{sample}/pre_process/{sample}.postqc_count.json",
            mnum="{sample}/bams/{sample}.mapped.num",
            mbnum="{sample}/bams/{sample}.mapped.basenum",
            unum="{sample}/bams/{sample}.unique.num",
            ubnum="{sample}/bams/{sample}.usable.basenum",
            fastqc="{sample}/pre_process/fastq_stats.json",
Sander Bollen's avatar
Sander Bollen committed
642
643
            cov=expand("{{sample}}/coverage/{bed}.covstats.json",
                       bed=BASE_BEDS),
644
645
646
647
648
            colpy=colpy
        params:
            sample_name="{sample}",
            fthresh=FEMALE_THRESHOLD
        output:
Sander Bollen's avatar
Sander Bollen committed
649
            "{sample}/{sample}.stats.json"
650
651
652
653
654
        conda: "envs/collectstats.yml"
        shell: "python {input.colpy} --sample-name {params.sample_name} "
               "--pre-qc-fastq {input.preqc} --post-qc-fastq {input.postq} "
               "--mapped-num {input.mnum} --mapped-basenum {input.mbnum} "
               "--unique-num {input.unum} --usable-basenum {input.ubnum} "
655
656
               "--female-threshold {params.fthresh} "
               "--fastqc-stats {input.fastqc} {input.cov} > {output}"
657
658
else:
    rule collectstats:
Sander Bollen's avatar
Sander Bollen committed
659
        """Collect all stats for a particular sample without beds"""
Sander Bollen's avatar
Sander Bollen committed
660
        input:
Sander Bollen's avatar
Sander Bollen committed
661
662
663
664
665
666
667
            preqc = "{sample}/pre_process/{sample}.preqc_count.json",
            postq = "{sample}/pre_process/{sample}.postqc_count.json",
            mnum = "{sample}/bams/{sample}.mapped.num",
            mbnum = "{sample}/bams/{sample}.mapped.basenum",
            unum = "{sample}/bams/{sample}.unique.num",
            ubnum = "{sample}/bams/{sample}.usable.basenum",
            fastqc="{sample}/pre_process/fastq_stats.json",
Sander Bollen's avatar
Sander Bollen committed
668
669
670
671
672
            colpy = colpy
        params:
            sample_name = "{sample}",
            fthresh = FEMALE_THRESHOLD
        output:
Sander Bollen's avatar
Sander Bollen committed
673
            "{sample}/{sample}.stats.json"
Sander Bollen's avatar
Sander Bollen committed
674
675
676
677
678
        conda: "envs/collectstats.yml"
        shell: "python {input.colpy} --sample-name {params.sample_name} "
               "--pre-qc-fastq {input.preqc} --post-qc-fastq {input.postq} "
               "--mapped-num {input.mnum} --mapped-basenum {input.mbnum} "
               "--unique-num {input.unum} --usable-basenum {input.ubnum} "
679
680
               "--female-threshold {params.fthresh} "
               "--fastqc-stats {input.fastqc}  > {output}"
Sander Bollen's avatar
Sander Bollen committed
681
682

rule merge_stats:
Sander Bollen's avatar
Sander Bollen committed
683
    """Merge all stats of all samples"""
Sander Bollen's avatar
Sander Bollen committed
684
    input:
Sander Bollen's avatar
Sander Bollen committed
685
686
        cols=expand("{sample}/{sample}.stats.json", sample=SAMPLES),
        vstat="multisample/vcfstats.json",
Sander Bollen's avatar
Sander Bollen committed
687
688
        mpy=mpy
    output:
Sander Bollen's avatar
Sander Bollen committed
689
        stats="stats.json"
Sander Bollen's avatar
Sander Bollen committed
690
    conda: "envs/collectstats.yml"
Sander Bollen's avatar
Sander Bollen committed
691
692
    shell: "python {input.mpy} --vcfstats {input.vstat} {input.cols} "
           "> {output.stats}"
Sander Bollen's avatar
Sander Bollen committed
693
694


Sander Bollen's avatar
Sander Bollen committed
695
696
697
rule stats_tsv:
    """Convert stats.json to tsv"""
    input:
Sander Bollen's avatar
Sander Bollen committed
698
        stats="stats.json",
Sander Bollen's avatar
Sander Bollen committed
699
700
        sc=tsvpy
    output:
Sander Bollen's avatar
Sander Bollen committed
701
        stats="stats.tsv"
Sander Bollen's avatar
Sander Bollen committed
702
703
704
705
    conda: "envs/collectstats.yml"
    shell: "python {input.sc} -i {input.stats} > {output.stats}"


Sander Bollen's avatar
Sander Bollen committed
706
707
708
rule multiqc:
    """
    Create multiQC report
Sander Bollen's avatar
Sander Bollen committed
709
    Depends on stats.tsv to forcefully run at end of pipeline
Sander Bollen's avatar
Sander Bollen committed
710
711
    """
    input:
Sander Bollen's avatar
Sander Bollen committed
712
        stats="stats.tsv"
Sander Bollen's avatar
Sander Bollen committed
713
    params:
Sander Bollen's avatar
Sander Bollen committed
714
        odir=".",
Sander Bollen's avatar
Sander Bollen committed
715
        rdir="multiqc_report"
Sander Bollen's avatar
Sander Bollen committed
716
    output:
Sander Bollen's avatar
Sander Bollen committed
717
        report="multiqc_report/multiqc_report.html"
718
    singularity: "docker://quay.io/biocontainers/multiqc:1.5--py36_0 "
Sander Bollen's avatar
Sander Bollen committed
719
    conda: "envs/multiqc.yml"
720
    shell: "multiqc -f -o {params.rdir} {params.odir} || touch {output.report}"