Commit 734b19b1 authored by van den Berg's avatar van den Berg
Browse files

Upgrade gsnap to version 20.06.01

 - Upgrade pytest-workflow to version 1.4.0, which adds support for
   contains_regex.
 - Update test cases to match the values produced by the new version of gsnap.
   Use contains_regex to reduce the sensitivity of the test cases to small
   changes in values.
 - Change the MV4-11 FLT3 hotspot mutation test to a different variant. The
   previous variant was in a hard to map region, so it is unclear if it is an
   error when it could not be found.
parent d0f50aba
Pipeline #3809 passed with stages
in 44 minutes and 9 seconds
......@@ -7,6 +7,6 @@ channels:
- conda-forge
- defaults
dependencies:
- pytest-workflow
- pytest-workflow>=1.4.0
- python>3.6
- snakemake=5.9
......@@ -8,7 +8,7 @@ localrules: plot_vars, table_vars_all, table_vars_hi
containers = {
"bedtools-2.27-grep-2.14-gawk-5.0-click-7-python-3.7": "docker://quay.io/biocontainers/mulled-v2-a4b89e0b16b1d7db92e5a069e5c40405b3b53aab:98c4ac2f0e27869be58f6a4d8bb7ae3bc02a3a70-0",
"debian": "docker://debian:buster-slim",
"gsnap": "docker://quay.io/biocontainers/gmap:2019.06.10--pl526h2f06484_0",
"gsnap": "docker://quay.io/biocontainers/gmap:2020.06.01--pl526h2f06484_0",
"hamlet-scripts": "docker://lumc/hamlet-scripts:0.2",
"picard": "docker://quay.io/biocontainers/picard:2.20.5--0",
"python3": "docker://python:3.7.4-slim-stretch",
......@@ -57,9 +57,9 @@ rule align_vars:
params:
rg_sample="{sample}"
threads: 8
#singularity: containers["gsnap"]
singularity: containers["gsnap"]
shell:
"/exports/kg/rrvandenberg/bin/gsnap --dir `dirname {input.index}` --db `basename {input.index}`"
"gsnap --dir `dirname {input.index}` --db `basename {input.index}`"
" --batch 4 --nthreads {threads}"
" --novelsplicing 1 --npaths 1 --quiet-if-excessive"
" --read-group-name={params.rg_sample} --read-group-id={params.rg_sample}"
......
......@@ -71,29 +71,32 @@
- path: "HEL/expression/HEL.bases_per_exon"
contains:
- "exon\tHEL"
- "HMBS:119084882-119085066\t121064"
- "MECOM:169083498-169085043\t211124"
contains_regex:
- 'HMBS:119084882-119085066\t121\d{3}'
- 'MECOM:169083498-169085043\t211\d{3}'
- path: "HEL/expression/HEL.bases_per_gene"
contains:
- "gene\tHEL"
- "HMBS\t1661651"
- "MECOM\t707074"
contains_regex:
- 'HMBS\t166\d{4}'
- 'MECOM\t70\d{4}'
- path: "HEL/expression/HEL.exon_ratios"
contains:
- "sample_name\texon\tcount\tratio\tabove_threshold\tdivisor_gene\tdivisor_exp"
- "HEL\tMECOM:169146722-169147734\t28454\t0.017123932763257747\tno\tHMBS\t1661651"
contains_regex:
- 'HEL\tMECOM:169146722-169147734\t28\d{3}\t0.017\d*\tno\tHMBS\t166\d{4}'
- path: "HEL/expression/HEL.fragments_per_gene"
contains:
- "MECOM\t3677"
- "HMBS\t9154"
contains_regex:
- 'MECOM\t36\d{2}'
- 'HMBS\t91\d{2}'
- path: "HEL/expression/HEL.raw_base"
contains:
- "chr11\t119084882\t119085066\t121064\t657.956522\tHMBS"
- "chr3\t169115382\t169116739\t174584\t128.654385\tMECOM"
contains_regex:
- 'chr11\t119084882\t119085066\t121\d{3}\t65\d\.\d*\tHMBS'
- 'chr3\t169115382\t169116739\t17\d{4}\t128.\d*\tMECOM'
- name: test-expression-MECOM-vs-MECOM
tags:
......@@ -106,6 +109,5 @@
--use-singularity
files:
- path: "HEL/expression/HEL.exon_ratios"
contains:
- "HEL\tMECOM:169146722-169147734\t28454"
- "yes\tMECOM\t707074"
contains_regex:
- 'HEL\tMECOM:169146722-169147734\t28\d{3}\t0.04\d*\tyes\tMECOM\t70\d{4}'
......@@ -78,23 +78,26 @@
- "chr17\t7674220\t.\tC\tT\t.\tPASS"
- path: "NB4/snv-indels/NB4.annotated.vcf.gz"
contains:
- "chr17\t7674220\t.\tC\tT\t.\tPASS\tADP=416;WT=0;HET=0;HOM=1;NC=0;CSQ=T|missense_variant|MODERATE|TP53|ENSG00000141510|Transcript|ENST00000269305|protein_coding|7/11||ENST00000269305.8:c.743G>A"
contains_regex:
- 'chr17\t7674220\t.\tC\tT\t.\tPASS\t.*ENST00000269305.8:c.743G>A'
- path: "NB4/snv-indels/NB4.aln_stats"
contains:
- "CATEGORY\tTOTAL_READS\tPF_READS\tPCT_PF_READS\tPF_NOISE_READS\tPF_READS_ALIGNED\tPCT_PF_READS_ALIGNED\tPF_ALIGNED_BASES\tPF_HQ_ALIGNED_READS\tPF_HQ_ALIGNED_BASES\tPF_HQ_ALIGNED_Q20_BASES\tPF_HQ_MEDIAN_MISMATCHES\tPF_MISMATCH_RATE\tPF_HQ_ERROR_RATE\tPF_INDEL_RATE\tMEAN_READ_LENGTH\tREADS_ALIGNED_IN_PAIRS\tPCT_READS_ALIGNED_IN_PAIRS\tPF_READS_IMPROPER_PAIRS\tPCT_PF_READS_IMPROPER_PAIRS\tBAD_CYCLES\tSTRAND_BALANCE\tPCT_CHIMERAS\tPCT_ADAPTER"
- "PAIR\t14266\t14266\t1\t0\t14242\t0.998318\t1287643\t14242\t1287643\t1244050\t0\t0.002528\t0.002528\t0.000081\t92.401023\t14218\t0.998315\t148\t0.010392\t0\t0.501123\t0.013341\t0.000491"
contains_regex:
- 'PAIR\t14266\t14266\t1\t0\t14\d{3}\t0.99\d{4}\t12\d{5}\t14\d{3}\t12\d{5}\t124\d{4}\t0\t0.00[2,3]\d{3}\t0.00[2,3]\d{3}\t0.000081\t92.4\d{5}\t14\d{3}\t0.99\d{4}'
- path: "NB4/snv-indels/NB4.insert_stats"
contains:
- "MEDIAN_INSERT_SIZE\tMODE_INSERT_SIZE\tMEDIAN_ABSOLUTE_DEVIATION\tMIN_INSERT_SIZE\tMAX_INSERT_SIZE\tMEAN_INSERT_SIZE\tSTANDARD_DEVIATION\tREAD_PAIRS\tPAIR_ORIENTATION"
- "177\t128\t54\t31\t20794276\t199.914744\t106.425583\t7016\tFR"
contains_regex:
- '177\t128\t54\t31\t20794276\t200\.\d*\t106.4\d{5}\t699\d\tFR'
- path: "NB4/snv-indels/NB4.rna_stats"
contains:
- "PF_BASES\tPF_ALIGNED_BASES\tRIBOSOMAL_BASES\tCODING_BASES\tUTR_BASES\tINTRONIC_BASES\tINTERGENIC_BASES\tIGNORED_READS\tCORRECT_STRAND_READS\tINCORRECT_STRAND_READS\tNUM_R1_TRANSCRIPT_STRAND_READS\tNUM_R2_TRANSCRIPT_STRAND_READS\tNUM_UNEXPLAINED_READS"
- "1320386\t1289751\t0\t562538\t680479\t46273\t461\t0\t0\t0\t2802\t3145\t79"
contains_regex:
- '1320386\t12[8,9]\d{4}\t0\t56\d{4}\t68\d{4}\t4[6,7]\d{3}\t50\d\t0\t0\t0\t2[7,8]\d{2}\t31\d{2}\t89'
- path: "NB4/snv-indels/NB4.variants_hi.csv"
contains:
......@@ -123,7 +126,7 @@
- path: "MV4-11/snv-indels/MV4-11.variants_hi.csv"
contains:
- "sample_id,gene_symbol,gene_id,CHROM,POS,REF,alleles,genotype,is_in_hotspot"
- "MV4-11,FLT3,ENSG00000122025,chr13,28034148,A,\"A,G\",A/G,yes"
- "MV4-11,FLT3,ENSG00000122025,chr13,28034117,A,\"A,T\",A/T,yes"
- name: test-TET2-insertion-config
tags:
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment