From f5deabd29c6ef686d54025d8110b902a73b80818 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg <RedmarvandenBerg@lumc.nl> Date: Fri, 15 May 2020 10:55:33 +0200 Subject: [PATCH] Add tests removing multiallelic ALTs --- test/config/config-multiallelic.json | 9 ++++++ test/data/fake_chrM_multiallelic.vcf | 43 ++++++++++++++++++++++++++++ test/test-integration.yml | 23 +++++++++++++++ 3 files changed, 75 insertions(+) create mode 100644 test/config/config-multiallelic.json create mode 100644 test/data/fake_chrM_multiallelic.vcf diff --git a/test/config/config-multiallelic.json b/test/config/config-multiallelic.json new file mode 100644 index 0000000..0f30516 --- /dev/null +++ b/test/config/config-multiallelic.json @@ -0,0 +1,9 @@ +{ + "samples": { + "16699289": { + "disease_code": "TEST", + "gvcf": "test/data/16699289_chrM.g.vcf", + "vcf": "test/data/fake_chrM_multiallelic.vcf" + } + } +} diff --git a/test/data/fake_chrM_multiallelic.vcf b/test/data/fake_chrM_multiallelic.vcf new file mode 100644 index 0000000..83d6dd3 --- /dev/null +++ b/test/data/fake_chrM_multiallelic.vcf @@ -0,0 +1,43 @@ +##fileformat=VCFv4.2 +##ALT=<ID=NON_REF,Description="Represents any possible alternative allele at this location"> +##FILTER=<ID=LowQual,Description="Low quality"> +##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)"> +##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum DP observed within the GVCF block"> +##FORMAT=<ID=PGT,Number=1,Type=String,Description="Physical phasing haplotype information, describing how the alternate alleles are phased in relation to one another"> +##FORMAT=<ID=PID,Number=1,Type=String,Description="Physical phasing ID information, where each unique ID within a given sample (but not across samples) connects records within a phasing group"> +##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"> +##FORMAT=<ID=RGQ,Number=1,Type=Integer,Description="Unconditional reference genotype confidence, encoded as a phred quality -10*log10 p(genotype call is wrong)"> +##FORMAT=<ID=SB,Number=4,Type=Integer,Description="Per-sample component statistics which comprise the Fisher's Exact Test to detect strand bias."> +##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed"> +##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities"> +##INFO=<ID=ClippingRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref number of hard clipped bases"> +##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP Membership"> +##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered"> +##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?"> +##INFO=<ID=END,Number=1,Type=Integer,Description="Stop position of the interval"> +##INFO=<ID=ExcessHet,Number=1,Type=Float,Description="Phred-scaled p-value for exact test of excess heterozygosity"> +##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias"> +##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes"> +##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation"> +##INFO=<ID=MLEAC,Number=A,Type=Integer,Description="Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed"> +##INFO=<ID=MLEAF,Number=A,Type=Float,Description="Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed"> +##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality"> +##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities"> +##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth"> +##INFO=<ID=RAW_MQ,Number=1,Type=Float,Description="Raw data for RMS Mapping Quality"> +##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias"> +##INFO=<ID=SOR,Number=1,Type=Float,Description="Symmetric Odds Ratio of 2x2 contingency table to detect strand bias"> +##contig=<ID=chrM,length=16571,assembly=hg19> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 16699289 +chrM 73 . G A 4062.77 . AC=2;AF=1.00;AN=2;DP=131;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=58.26;QD=32.24;SOR=0.791 GT:AD:DP:GQ:PL 1/1:0,126:126:99:4091,378,0 +chrM 150 . T C 5149.77 . AC=2;AF=1.00;AN=2;DP=130;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=34.24;SOR=0.710 GT:AD:DP:GQ:PGT:PID:PL 1/1:0,117:117:99:1|1:150_T_C:5178,352,0 +chrM 152 . T C 5149.77 . AC=2;AF=1.00;AN=2;DP=126;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=30.63;SOR=0.711 GT:AD:DP:GQ:PGT:PID:PL 1/1:0,113:113:99:1|1:150_T_C:5178,352,0 +chrM 195 . C T,* 690.21 . AC=0,1;AF=0.00,0.500;AN=2;BaseQRankSum=-1.150e+00;ClippingRankSum=0.00;DP=17;ExcessHet=3.9794;FS=8.872;MQ=39.83;MQRankSum=-9.210e-01;QD=21.57;ReadPosRankSum=-4.390e-01;SOR=2.038 GT:AD:DP:GQ:PL 0/2:9,0,8:17:99:303,330,1227,0,897,873 +chrM 410 . A C,G 4849.13 . AC=1,0;AF=0.500,0.00;AN=2;BaseQRankSum=0.910;ClippingRankSum=0.00;DP=76;ExcessHet=3.0103;FS=6.027;MQ=52.15;MQRankSum=-4.344e+00;QD=22.98;ReadPosRankSum=0.687;SOR=1.420 GT:AD:DP:GQ:PL 0/1:33,35,8:76:99:836,0,820,685,673,1584 +chrM 2261 . C T,CATTTT 4096.90 . AC=2,0;AF=1.00,0.00;AN=2;DP=37;ExcessHet=3.0103;FS=0.000;MQ=60.00;QD=29.45;SOR=4.768 GT:AD:DP:GQ:PL 1/1:0,37,0:37:99:1248,110,0,1248,110,1248 +chrM 2354 . C T 3755.77 . AC=2;AF=1.00;AN=2;DP=117;ExcessHet=3.0103;FS=0.000;MLEAC=2;MLEAF=1.00;MQ=60.00;QD=32.95;SOR=1.054 GT:AD:DP:GQ:PL 1/1:0,114:114:99:3784,341,0 diff --git a/test/test-integration.yml b/test/test-integration.yml index bc2b9d0..f1473f4 100644 --- a/test/test-integration.yml +++ b/test/test-integration.yml @@ -34,3 +34,26 @@ - "chrM\t150\t.\tT\t." - "chrM\t152\t.\tT\t." - "chrM\t195\t.\tC\tT" + +- name: test-multiallelic + tags: + - integration + - new + command: > + snakemake + --configfile test/config/config-multiallelic.json + --use-singularity + --singularity-args ' --containall --bind /tmp' + --notemp + --cores 1 + 16699289_trimmed.vcf + files: + - path: 16699289_trimmed.vcf + must_not_contain: + - "chrM\t195\t.\tC\tT,*" + - "chrM\t410\t.\tA\tC,G" + - "chrM\t2261\t.\tC\tT,CATTTT" + contains: + - "chrM\t195\t.\tC\t*" + - "chrM\t410\t.\tA\tC" + - "chrM\t2261\t.\tC\tT" -- GitLab