Commit 8670b32a authored by van den Berg's avatar van den Berg
Browse files

No longer merge fastq files

Instead of merging fastq files as the first step of the pipeline, merge
as late as possible to make better use of parallelism, and to prevent
unnecessary reading/writing of all data. Currently, reads are trimmed
and mapped per read group, and are merge in the picard MarkDuplicates
step. Therefore, samples are merged as a side effect of another task
that was performed as well.

Additionally, fastq processing is now done in a single step using
cutadapt, instead of using both sickle and cutadapt sequentially.

As part of these changes, the following changes were made:
 - Use cutadapt to trim both adapters and low quality reads
 - Run bwa align on each readgroup independently
 - Run fastqc on each readgroup independenly
 - Pass multiple bam files to picard MarkDuplicates
 - Remove safe_fastqc.sh script
 - Remove fastqc_stats
 - Remove fastqc coverage from covstats
 - Update test data for slight differences in output vcf files
 - Add tests for fastqc zip files
parent fe901858
Pipeline #3597 failed with stages
in 19 seconds
......@@ -59,7 +59,6 @@ set_default("collect_stats", "src/collect_stats.py")
set_default("merge_stats", "src/merge_stats.py")
set_default("fastq_stats", "src/fastqc_stats.py")
set_default("stats_to_tsv", "src/stats_to_tsv.py")
set_default("safe_fastqc", "src/safe_fastqc.sh")
set_default("py_wordcount", "src/pywc.py")
containers = {
......@@ -67,7 +66,7 @@ containers = {
"bedtools-2.26-python-2.7": "docker://quay.io/biocontainers/mulled-v2-3251e6c49d800268f0bc575f28045ab4e69475a6:4ce073b219b6dabb79d154762a9b67728c357edb-0",
"biopet-scatterregions": "docker://quay.io/biocontainers/biopet-scatterregions:0.2--0",
"bwa-0.7.17-picard-2.18.7": "docker://quay.io/biocontainers/mulled-v2-002f51ea92721407ef440b921fb5940f424be842:43ec6124f9f4f875515f9548733b8b4e5fed9aa6-0",
"cutadapt": "docker://quay.io/biocontainers/cutadapt:1.14--py36_0",
"cutadapt": "docker://quay.io/biocontainers/cutadapt:2.9--py37h516909a_0",
"debian": "docker://debian:buster-slim",
"fastq-count": "docker://quay.io/biocontainers/fastq-count:0.1.0--h14c3975_0",
"fastqc": "docker://quay.io/biocontainers/fastqc:0.11.7--4",
......@@ -81,17 +80,6 @@ containers = {
"vtools": "docker://quay.io/biocontainers/vtools:1.0.0--py37h3010b51_0"
}
def get_r(strand, wildcards):
"""Get fastq files on a single strand for a sample"""
s = settings['samples'].get(wildcards.sample)
rs = []
for l in sorted(s['libraries'].keys()):
rs.append(s['libraries'][l][strand])
return rs
get_r1 = partial(get_r, "R1")
get_r2 = partial(get_r, "R2")
def get_forward(wildcards):
""" Get the forward fastq file from the config """
return (
......@@ -109,6 +97,12 @@ def get_reverse(wildcards):
def get_readgroup(wildcards):
return settings["samples"][wildcards.sample]["libraries"]
def get_readgroup_per_sample():
for sample in settings["samples"]:
for rg in settings["samples"][sample]["libraries"]:
yield rg, sample
def coverage_stats(wildcards):
files = expand("{sample}/coverage/refFlat_coverage.tsv", sample=settings['samples'])
return files if "refflat" in settings else []
......@@ -116,16 +110,15 @@ def coverage_stats(wildcards):
rule all:
input:
multiqc="multiqc_report/multiqc_report.html",
stats = "stats.json",
#stats = "stats.json",
bais=expand("{sample}/bams/{sample}.markdup.bam.bai", sample=settings['samples']),
vcfs=expand("{sample}/vcf/{sample}.vcf.gz", sample=settings['samples']),
vcf_tbi=expand("{sample}/vcf/{sample}.vcf.gz.tbi", sample=settings['samples']),
gvcfs=expand("{sample}/vcf/{sample}.g.vcf.gz", sample=settings['samples']),
gvcf_tbi=expand("{sample}/vcf/{sample}.g.vcf.gz.tbi", sample=settings['samples']),
#fqcr = expand("{sample}/pre_process/raw_fastqc/.done.txt", sample=settings['samples']),
#fqcm = expand("{sample}/pre_process/merged_fastqc/{sample}.merged_R1_fastqc.zip", sample=settings['samples']),
#fqcp = expand("{sample}/pre_process/postqc_fastqc/{sample}.cutadapt_R1_fastqc.zip", sample=settings['samples']),
coverage_stats = coverage_stats,
fastqc_raw = (f"{sample}/pre_process/raw-{sample}-{read_group}/" for read_group, sample in get_readgroup_per_sample()),
fastqc_trimmed = (f"{sample}/pre_process/trimmed-{sample}-{read_group}/" for read_group, sample in get_readgroup_per_sample()),
#coverage_stats = coverage_stats,
rule create_markdup_tmp:
......@@ -141,50 +134,25 @@ rule genome:
singularity: containers["debian"]
shell: "awk -v OFS='\t' {{'print $1,$2'}} {input}.fai > {output}"
rule merge_r1:
"""Merge all forward fastq files into one"""
input: get_r1
output: temp("{sample}/pre_process/{sample}.merged_R1.fastq.gz")
singularity: containers["debian"]
shell: "cat {input} > {output}"
rule merge_r2:
"""Merge all reverse fastq files into one"""
input: get_r2
output: temp("{sample}/pre_process/{sample}.merged_R2.fastq.gz")
singularity: containers["debian"]
shell: "cat {input} > {output}"
rule sickle:
"""Trim fastq files"""
input:
r1=get_forward,
r2=get_reverse
output:
r1 = "{sample}/pre_process/{sample}-{read_group}.trimmed_R1.fastq",
r2 = "{sample}/pre_process/{sample}-{read_group}.trimmed_R2.fastq",
s = "{sample}/pre_process/{sample}-{read_group}.trimmed_singles.fastq"
singularity: containers["sickle"]
shell: "sickle pe -f {input.r1} -r {input.r2} -t sanger -o {output.r1} "
"-p {output.r2} -s {output.s}"
rule cutadapt:
"""Clip fastq files"""
input:
r1 = "{sample}/pre_process/{sample}-{read_group}.trimmed_R1.fastq",
r2 = "{sample}/pre_process/{sample}-{read_group}.trimmed_R2.fastq"
r1=get_forward,
r2=get_reverse
output:
r1 = "{sample}/pre_process/{sample}-{read_group}.cutadapt_R1.fastq",
r2 = "{sample}/pre_process/{sample}-{read_group}.cutadapt_R2.fastq"
r1 = "{sample}/pre_process/{sample}-{read_group}_R1.fastq.gz",
r2 = "{sample}/pre_process/{sample}-{read_group}_R2.fastq.gz"
singularity: containers["cutadapt"]
shell: "cutadapt -a AGATCGGAAGAG -A AGATCGGAAGAG -m 1 -o {output.r1} "
"{input.r1} -p {output.r2} {input.r2}"
shell: "cutadapt -a AGATCGGAAGAG -A AGATCGGAAGAG "
"--minimum-length 1 --quality-cutoff=20,20 "
"--output {output.r1} --paired-output {output.r2} -Z "
"{input.r1} {input.r2}"
rule align:
"""Align fastq files"""
input:
r1 = "{sample}/pre_process/{sample}-{read_group}.cutadapt_R1.fastq",
r2 = "{sample}/pre_process/{sample}-{read_group}.cutadapt_R2.fastq",
r1 = "{sample}/pre_process/{sample}-{read_group}_R1.fastq.gz",
r2 = "{sample}/pre_process/{sample}-{read_group}_R2.fastq.gz",
ref = settings["reference"],
tmp = ancient("tmp")
params:
......@@ -379,57 +347,27 @@ rule unique_reads_bases:
rule fastqc_raw:
"""
Run fastqc on raw fastq files
NOTE: singularity version uses 0.11.7 in stead of 0.11.5 due to
perl missing in the container of 0.11.5
"""
input:
r1=get_r1,
r2=get_r2
params:
odir="{sample}/pre_process/raw_fastqc-{read_group}"
output:
aux="{sample}/pre_process/raw_fastqc-{read_group}/.done.txt"
singularity: containers["fastqc"]
shell: "fastqc --threads 4 --nogroup -o {params.odir} {input.r1} {input.r2} "
"&& echo 'done' > {output.aux}"
rule fastqc_merged:
"""
Run fastqc on merged fastq files
"""
input:
r1="{sample}/pre_process/{sample}.merged_R1.fastq.gz",
r2="{sample}/pre_process/{sample}.merged_R2.fastq.gz",
fq=settings["safe_fastqc"]
params:
odir="{sample}/pre_process/merged_fastqc"
r1=get_forward,
r2=get_reverse
output:
r1="{sample}/pre_process/merged_fastqc/{sample}.merged_R1_fastqc.zip",
r2="{sample}/pre_process/merged_fastqc/{sample}.merged_R2_fastqc.zip"
directory("{sample}/pre_process/raw-{sample}-{read_group}/")
singularity: containers["fastqc"]
shell: "bash {input.fq} {input.r1} {input.r2} "
"{output.r1} {output.r2} {params.odir}"
shell: "fastqc --threads 4 --nogroup -o {output} {input.r1} {input.r2} "
rule fastqc_postqc:
"""
Run fastqc on fastq files post pre-processing
NOTE: singularity version uses 0.11.7 in stead of 0.11.5 due to
perl missing in the container of 0.11.5
"""
input:
r1="{sample}/pre_process/{sample}-{read_group}.cutadapt_R1.fastq",
r2="{sample}/pre_process/{sample}-{read_group}.cutadapt_R2.fastq",
fq=settings["safe_fastqc"]
params:
odir="{sample}/pre_process/postqc_fastqc"
r1="{sample}/pre_process/{sample}-{read_group}_R1.fastq.gz",
r2="{sample}/pre_process/{sample}-{read_group}_R2.fastq.gz",
output:
r1="{sample}/pre_process/postqc_fastqc/{sample}-{read_group}.cutadapt_R1_fastqc.zip",
r2="{sample}/pre_process/postqc_fastqc/{sample}-{read_group}.cutadapt_R2_fastqc.zip"
directory("{sample}/pre_process/trimmed-{sample}-{read_group}/")
singularity: containers["fastqc"]
shell: "bash {input.fq} {input.r1} {input.r2} "
"{output.r1} {output.r2} {params.odir}"
shell: "fastqc --threads 4 --nogroup -o {output} {input.r1} {input.r2} "
## fastq-count
......@@ -448,31 +386,14 @@ rule fqcount_preqc:
rule fqcount_postqc:
"""Calculate number of reads and bases after pre-processing"""
input:
r1="{sample}/pre_process/{sample}-{read_group}.cutadapt_R1.fastq",
r2="{sample}/pre_process/{sample}-{read_group}.cutadapt_R2.fastq"
r1="{sample}/pre_process/{sample}-{read_group}_R1.fastq",
r2="{sample}/pre_process/{sample}-{read_group}_R2.fastq"
output:
"{sample}/pre_process/{sample}-{read_group}.postqc_count.json"
singularity: containers["fastq-count"]
shell: "fastq-count {input.r1} {input.r2} > {output}"
# fastqc stats
rule fastqc_stats:
"""Collect fastq stats for a sample in json format"""
input:
preqc_r1="{sample}/pre_process/merged_fastqc/{sample}.merged_R1_fastqc.zip",
preqc_r2="{sample}/pre_process/merged_fastqc/{sample}.merged_R2_fastqc.zip",
postqc_r1="{sample}/pre_process/postqc_fastqc/{sample}-{read_group}.cutadapt_R1_fastqc.zip",
postqc_r2="{sample}/pre_process/postqc_fastqc/{sample}-{read_group}.cutadapt_R2_fastqc.zip",
sc=settings["fastq_stats"]
singularity: containers["python3"]
output:
"{sample}/pre_process/{read_group}-fastq_stats.json"
shell: "python {input.sc} --preqc-r1 {input.preqc_r1} "
"--preqc-r2 {input.preqc_r2} "
"--postqc-r1 {input.postqc_r1} "
"--postqc-r2 {input.postqc_r2} > {output}"
## coverages
rule covstats:
......@@ -524,13 +445,10 @@ if "bedfile" in settings:
rule collectstats:
"""Collect all stats for a particular sample with beds"""
input:
#preqc="{sample}/pre_process/{sample}.preqc_count.json",
#postq="{sample}/pre_process/{sample}.postqc_count.json",
mnum="{sample}/bams/{sample}.mapped.num",
mbnum="{sample}/bams/{sample}.mapped.basenum",
unum="{sample}/bams/{sample}.unique.num",
ubnum="{sample}/bams/{sample}.usable.basenum",
#fastqc="{sample}/pre_process/{read_group}-fastq_stats.json",
cov="{sample}/coverage/covstats.json",
colpy=settings["collect_stats"]
params:
......@@ -540,11 +458,10 @@ if "bedfile" in settings:
"{sample}/{sample}.stats.json"
singularity: containers["vtools"]
shell: "python {input.colpy} --sample-name {params.sample_name} "
"--pre-qc-fastq {input.preqc} --post-qc-fastq {input.postq} "
"--mapped-num {input.mnum} --mapped-basenum {input.mbnum} "
"--unique-num {input.unum} --usable-basenum {input.ubnum} "
"--female-threshold {params.fthresh} "
"--fastqc-stats {input.fastqc} {input.cov} > {output}"
"{input.cov} > {output}"
else:
rule collectstats:
"""Collect all stats for a particular sample without beds"""
......@@ -555,7 +472,6 @@ else:
mbnum = "{sample}/bams/{sample}.mapped.basenum",
unum = "{sample}/bams/{sample}.unique.num",
ubnum = "{sample}/bams/{sample}.usable.basenum",
fastqc="{sample}/pre_process/fastq_stats.json",
colpy = settings["collect_stats"]
params:
sample_name = "{sample}",
......@@ -568,7 +484,7 @@ else:
"--mapped-num {input.mnum} --mapped-basenum {input.mbnum} "
"--unique-num {input.unum} --usable-basenum {input.ubnum} "
"--female-threshold {params.fthresh} "
"--fastqc-stats {input.fastqc} > {output}"
"> {output}"
rule merge_stats:
"""Merge all stats of all samples"""
......
......@@ -66,14 +66,6 @@ def determine_gender(covstat, fthresh):
type=click.STRING,
required=True,
help="Sample name")
@click.option("--pre-qc-fastq",
type=click.Path(dir_okay=False, exists=True, readable=True),
required=True,
help="pre-qc json from fastq-count")
@click.option("--post-qc-fastq",
type=click.Path(dir_okay=False, exists=True, readable=True),
required=True,
help="Post-qc json from fastq-count")
@click.option("--mapped-num",
type=click.Path(dir_okay=False, exists=True, readable=True),
required=True,
......@@ -94,19 +86,12 @@ def determine_gender(covstat, fthresh):
type=click.FLOAT,
default=0.6,
help="Female threshold of X/all cov")
@click.option("--fastqc-stats",
type=click.Path(dir_okay=False, exists=True, readable=True),
required=True,
help="Path to fastqc stats json")
@click.argument("covstats",
type=click.Path(dir_okay=False, exists=True, readable=True),
nargs=-1)
def main(sample_name, pre_qc_fastq, post_qc_fastq, mapped_num, mapped_basenum,
unique_num, usable_basenum, female_threshold, fastqc_stats, covstats):
def main(sample_name, mapped_num, mapped_basenum,
unique_num, usable_basenum, female_threshold, covstats):
preqcd = parse_json_file(pre_qc_fastq)
posqcd = parse_json_file(post_qc_fastq)
fastqc = parse_json_file(fastqc_stats)
mpnum = parse_num_file(mapped_num)
mpbnum = parse_num_file(mapped_basenum)
......@@ -125,13 +110,10 @@ def main(sample_name, pre_qc_fastq, post_qc_fastq, mapped_num, mapped_basenum,
d = {
"sample_name": sample_name,
"pre_qc_fastq_count": preqcd,
"post_qc_fastq_count": posqcd,
"n_mapped_reads": mpnum,
"n_mapped_bases": mpbnum,
"n_usable_reads": unum,
"n_usable_bases": ubnum,
"fastqc": fastqc,
"covstats": covl
}
......
#!/usr/bin/env bash
# hutspot - a DNAseq variant calling pipeline
# Copyright (C) 2017-2019, Sander Bollen, Leiden University Medical Center
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
set -eu
set -o pipefail
input_r1=${1}
input_r2=${2}
output_r1=${3}
output_r2=${4}
odir=${5}
fastqc --threads 4 --nogroup -o ${odir} ${input_r1} ${input_r2}
if [[ -f ${output_r1} ]]; then
unzip -l ${output_r1} || head -c 0 ${output_r1} > ${output_r1}
else
touch ${output_r1}
fi
if [[ -f ${output_r2} ]]; then
unzip -l ${output_r2} || head -c 0 ${output_r2} > ${output_r2}
else
touch ${output_r2}
fi
......@@ -79,10 +79,6 @@ if __name__ == "__main__":
sample_dict = OrderedDict()
sample_dict.update({
"sample_name": sname,
"preqc_reads": sample['pre_qc_fastq_count']['reads'],
"preqc_bases": sample['pre_qc_fastq_count']['bases'],
"postqc_reads": sample['post_qc_fastq_count']['reads'],
"postqc_bases": sample['post_qc_fastq_count']['bases'],
"mapped_reads": sample['n_mapped_reads'],
"mapped_bases": sample['n_mapped_bases'],
"usable_reads": sample['n_usable_reads'],
......
......@@ -20,32 +20,35 @@
files:
- path: "micro/vcf/micro.vcf.gz"
contains:
- "chrM\t152\t.\tT\tC\t3960.77\t."
- "chrM\t152\t.\tT\tC\t3960"
- "GT:AD:DP:GQ:PL\t1/1:0,130:130:99:3989,388,0"
- "chrM\t263\t.\tA\tG\t3233.06\t."
- "GT:AD:DP:GQ:PL\t1/1:0,108:108:99:3263,323,0"
- "chrM\t263\t.\tA\tG\t3238"
- "GT:AD:DP:GQ:PL\t1/1:0,108:108:99:3267,323,0"
- "chrM\t4745\t.\tA\tG\t5655.77\t."
- "GT:AD:DP:GQ:PGT:PID:PL\t1/1:1,133:134:99:1|1:4745_A_G:5684,404,0"
- "chrM\t4769\t.\tA\tG\t5182.77\t."
- "chrM\t4745\t.\tA\tG\t5689"
- "GT:AD:DP:GQ:PGT:PID:PL\t1/1:1,134:135:99:1|1:4745_A_G:5718,407,0"
- "chrM\t4769\t.\tA\tG\t5182"
- "GT:AD:DP:GQ:PGT:PID:PL\t1/1:1,120:121:99:1|1:4745_A_G:5211,363,0"
- "chrM\t16023\t.\tG\tA\t1880.77\t."
- "GT:AD:DP:GQ:PL\t0/1:72,73:145:99:1909,0,1872"
- "chrM\t16023\t.\tG\tA\t1906"
- "GT:AD:DP:GQ:PL\t0/1:74,74:148:99:1935,0,1903"
- path: "micro/vcf/micro.g.vcf.gz"
contains:
- "chrM\t1\t.\tG\t<NON_REF>\t.\t.\tEND=151\tGT:DP:GQ:MIN_DP:PL\t0/0:165:99:137:0,120,1800"
- "chrM\t1\t.\tG\t<NON_REF>\t.\t.\tEND=151\tGT:DP:GQ:MIN_DP:PL\t0/0:164:99:137:0,120,1800"
- "chrM\t152\t.\tT\tC,<NON_REF>\t3960.77"
- "chrM\t152\t.\tT\tC,<NON_REF>\t3960"
- "GT:AD:DP:GQ:PL:SB\t1/1:0,130,0:130:99:3989,388,0,3989,388,3989:0,0,47,83"
- "chrM\t16023\t.\tG\tA,<NON_REF>\t1880.77\t."
- "GT:AD:DP:GQ:PL:SB\t0/1:72,73,0:145:99:1909,0,1872,2125,2089,4214:35,37,36,37"
- "chrM\t16023\t.\tG\tA,<NON_REF>\t1906"
- "GT:AD:DP:GQ:PL:SB\t0/1:74,74,0:148:99:1935,0,1903,2157,2123,4280:36,38,37,37"
- "chrM\t16560\t.\tC\t<NON_REF>\t.\t.\tEND=16569\tGT:DP:GQ:MIN_DP:PL\t0/0:195:0:187:0,0,0"
- "chrM\t16560\t.\tC\t<NON_REF>\t.\t.\tEND=16569\tGT:DP:GQ:MIN_DP:PL\t0/0:188:0:180:0,0,0"
- name: test-integration-small-scatter
tags:
......@@ -72,32 +75,34 @@
- path: "micro/vcf/micro.vcf.gz.tbi"
- path: "micro/vcf/micro.vcf.gz"
contains:
- "chrM\t152\t.\tT\tC\t3960.77\t."
- "chrM\t152\t.\tT\tC\t3960"
- "GT:AD:DP:GQ:PL\t1/1:0,130:130:99:3989,388,0"
- "chrM\t263\t.\tA\tG\t3233.06\t."
- "GT:AD:DP:GQ:PL\t1/1:0,108:108:99:3263,323,0"
- "chrM\t263\t.\tA\tG\t323"
- "GT:AD:DP:GQ:PL\t1/1:0,108:108:99:3267,323,0"
- "chrM\t4745\t.\tA\tG\t5655.77\t."
- "GT:AD:DP:GQ:PGT:PID:PL\t1/1:1,133:134:99:1|1:4745_A_G:5684,404,0"
- "chrM\t4745\t.\tA\tG\t56"
- "GT:AD:DP:GQ:PGT:PID:PL\t1/1:1,134:135:99:1|1:4745_A_G:5718,407,0"
- "chrM\t4769\t.\tA\tG\t5182.77\t."
- "chrM\t4769\t.\tA\tG\t5182"
- "GT:AD:DP:GQ:PGT:PID:PL\t1/1:1,120:121:99:1|1:4745_A_G:5211,363,0"
- "chrM\t16023\t.\tG\tA\t1880.77\t."
- "GT:AD:DP:GQ:PL\t0/1:72,73:145:99:1909,0,1872"
- "chrM\t16023\t.\tG\tA\t1906"
- "GT:AD:DP:GQ:PL\t0/1:74,74:148:99:1935,0,1903"
- path: "micro/vcf/micro.g.vcf.gz"
contains:
- "chrM\t1\t.\tG\t<NON_REF>\t.\t.\tEND=151\tGT:DP:GQ:MIN_DP:PL\t0/0:165:99:137:0,120,1800"
- "chrM\t1\t.\tG\t<NON_REF>\t.\t.\tEND=151\tGT:DP:GQ:MIN_DP:PL\t0/0:164:99:137:0,120,1800"
- "chrM\t152\t.\tT\tC,<NON_REF>\t3960.77"
- "chrM\t152\t.\tT\tC,<NON_REF>\t3960"
- "GT:AD:DP:GQ:PL:SB\t1/1:0,130,0:130:99:3989,388,0,3989,388,3989:0,0,47,83"
- "chrM\t16023\t.\tG\tA,<NON_REF>\t1880.77\t."
- "GT:AD:DP:GQ:PL:SB\t0/1:72,73,0:145:99:1909,0,1872,2125,2089,4214:35,37,36,37"
- "chrM\t16023\t.\tG\tA,<NON_REF>\t1906"
- "GT:AD:DP:GQ:PL:SB\t0/1:74,74,0:148:99:1935,0,1903,2157,2123,4280:36,38,37,37"
- "chrM\t16560\t.\tC\t<NON_REF>\t.\t.\tEND=16569\tGT:DP:GQ:MIN_DP:PL\t0/0:195:0:187:0,0,0"
- "chrM\t16560\t.\tC\t<NON_REF>\t.\t.\tEND=16569\tGT:DP:GQ:MIN_DP:PL\t0/0:188:0:180:0,0,0"
- path: "micro/vcf/micro.g.vcf.gz.tbi"
- path: "micro/vcf/micro.0.vcf.gz"
should_exist: false
......@@ -123,10 +128,11 @@
- "(100%) done"
must_not_contain:
- "rror"
files:
- path: "micro/coverage/refFlat_coverage.tsv"
contains:
- "1\tMIR12136\t133.0\t99.0\t133.0\t99.0\t100.0\t100.0\t100.0\t100.0\t100.0\t100.0\t100.0\t100.0\t100.0\t100.0\tNR_162149"
#files:
# - path: "micro/coverage/refFlat_coverage.tsv"
# contains:
# - "1\tMIR12136\t134.0\t99.0\t134.0\t99.0\t100.0\t100.0\t100.0\t100.0\t100.0\t100.0\t100.0\t100.0\t100.0\t100.0\tNR_162149"
- name: test-integration-all-on-target
tags:
......@@ -151,8 +157,8 @@
- path: "micro/coverage/covstats.png"
- path: "micro/coverage/covstats.json"
contains:
- "\"frac_min_100x\": 0.9748"
- "\"mean\": 136.70"
- "\"frac_min_100x\": 0.97"
- "\"mean\": 137"
- "\"width_nonzero\": 16569"
- path: "stats.tsv"
contains:
......@@ -186,7 +192,6 @@
- name: test-integration-two-readgroups
tags:
- integration
- new
command: >
snakemake
--use-singularity
......@@ -207,13 +212,15 @@
- path: "micro/coverage/covstats.png"
- path: "micro/coverage/covstats.json"
contains:
- "\"frac_min_100x\": 0.9748"
- "\"mean\": 136.70"
- "\"frac_min_100x\": 0.97"
- "\"mean\": 137"
- "\"width_nonzero\": 16569"
- path: "stats.tsv"
contains:
- "sample_name\tpreqc_reads\tpreqc_bases\tpostqc_reads\tpostqc_bases\tmapped_reads\tmapped_bases\tusable_reads\tusable_bases\ttotal_variants\tsnps\tinsertions\tdeletions\ttransversions\ttransitions\tti_tv_ratio\thomozygous_variants\theterozygous_variants\tcovstats.json_median_coverage"
- "micro\t15440\t2276743\t15398\t2269171\t15515\t2275114\t15477\t2270739\t17\t15\t2\t0\t0\t15\tnan\t16\t1\t136"
- "\tcovstats.json_modal_coverage\tcovstats.json_horizontal_coverage\t"
- "\t137\t1.0\t"
- path: "micro/pre_process/trimmed-micro-lib_01/micro-lib_01_R1_fastqc.zip"
- path: "micro/pre_process/trimmed-micro-lib_01/micro-lib_01_R2_fastqc.zip"
- path: "micro/pre_process/trimmed-micro-lib_02/micro-lib_02_R1_fastqc.zip"
- path: "micro/pre_process/trimmed-micro-lib_02/micro-lib_02_R2_fastqc.zip"
- path: "micro/pre_process/raw-micro-lib_01/micro_rg1_R1_fastqc.zip"
- path: "micro/pre_process/raw-micro-lib_01/micro_rg1_R2_fastqc.zip"
- path: "micro/pre_process/raw-micro-lib_02/micro_rg2_R1_fastqc.zip"
- path: "micro/pre_process/raw-micro-lib_02/micro_rg2_R2_fastqc.zip"
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment