Commit 2a90eac3 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Added unit testing

parent 741ab7f5
......@@ -36,7 +36,7 @@ object ValidateVcf extends ToolCommand {
val regions = BedRecordList.fromReference(cmdArgs.reference)
val vcfReader = new VCFFileReader(cmdArgs.inputVcf, true)
val vcfReader = new VCFFileReader(cmdArgs.inputVcf, false)
try {
for (record <- vcfReader.iterator()) {
......
##fileformat=VCFv4.1
##reference=file:///data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta
##INFO=<ID=DN,Number=1,Type=Integer,Description="inDbSNP">
##INFO=<ID=DT,Number=0,Type=Flag,Description="in1000Genomes">
##INFO=<ID=DA,Number=1,Type=String,Description="allelesDBSNP">
##INFO=<ID=FG,Number=.,Type=String,Description="functionGVS">
##INFO=<ID=FD,Number=.,Type=String,Description="functionDBSNP">
##INFO=<ID=GM,Number=.,Type=String,Description="accession">
##INFO=<ID=GL,Number=.,Type=String,Description="geneList">
##INFO=<ID=AAC,Number=.,Type=String,Description="aminoAcids">
##INFO=<ID=PP,Number=.,Type=String,Description="proteinPosition">
##INFO=<ID=CDP,Number=.,Type=String,Description="cDNAPosition">
##INFO=<ID=PH,Number=.,Type=String,Description="polyPhen">
##INFO=<ID=CP,Number=1,Type=String,Description="scorePhastCons">
##INFO=<ID=CG,Number=1,Type=String,Description="consScoreGERP">
##INFO=<ID=AA,Number=1,Type=String,Description="chimpAllele">
##INFO=<ID=CN,Number=.,Type=String,Description="CNV">
##INFO=<ID=HA,Number=1,Type=String,Description="AfricanHapMapFreq">
##INFO=<ID=HE,Number=1,Type=String,Description="EuropeanHapMapFreq">
##INFO=<ID=HC,Number=1,Type=String,Description="AsianHapMapFreq">
##INFO=<ID=DG,Number=0,Type=Flag,Description="hasGenotypes">
##INFO=<ID=DV,Number=.,Type=String,Description="dbSNPValidation">
##INFO=<ID=RM,Number=.,Type=String,Description="repeatMasker">
##INFO=<ID=RT,Number=.,Type=String,Description="tandemRepeat">
##INFO=<ID=CA,Number=0,Type=Flag,Description="clinicalAssociation">
##INFO=<ID=DSP,Number=1,Type=Integer,Description="distanceToSplice">
##INFO=<ID=GS,Number=.,Type=String,Description="granthamScore">
##INFO=<ID=MR,Number=.,Type=String,Description="microRNAs">
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities">
##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP Membership">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">
##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">
##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of Reads Containing Spanning Deletions">
##INFO=<ID=END,Number=1,Type=Integer,Description="Stop position of the interval">
##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias">
##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes">
##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation">
##INFO=<ID=MLEAC,Number=A,Type=Integer,Description="Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed">
##INFO=<ID=MLEAF,Number=A,Type=Float,Description="Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed">
##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads">
##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities">
##INFO=<ID=NEGATIVE_TRAIN_SITE,Number=0,Type=Flag,Description="This variant was used to build the negative training set of bad variants">
##INFO=<ID=POSITIVE_TRAIN_SITE,Number=0,Type=Flag,Description="This variant was used to build the positive training set of good variants">
##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">
##INFO=<ID=RPA,Number=.,Type=Integer,Description="Number of times tandem repeat unit is repeated, for each allele (including reference)">
##INFO=<ID=RU,Number=1,Type=String,Description="Tandem repeat unit (bases)">
##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias">
##INFO=<ID=STR,Number=0,Type=Flag,Description="Variant is a short tandem repeat">
##INFO=<ID=VQSLOD,Number=1,Type=Float,Description="Log odds ratio of being a true variant versus being false under the trained gaussian mixture model">
##INFO=<ID=culprit,Number=1,Type=String,Description="The annotation which was the worst performing in the Gaussian mixture model, likely the reason why the variant was filtered out">
##INFO=<ID=ClippingRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref number of hard clipped bases">
##INFO=<ID=GATKCaller,Number=.,Type=String,Description="GATK variant caller used to call the variant">
##INFO=<ID=PartOfCompound,Number=.,Type=String,Description="Whether the record was originally part of a record containing compound variants">
##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
##FILTER=<ID=LowQual,Description="Low quality">
##FILTER=<ID=VQSRTrancheINDEL99.00to99.90,Description="Truth sensitivity tranche level for INDEL model at VQS Lod: -1.4714 <= x < -0.3324">
##FILTER=<ID=VQSRTrancheINDEL99.90to100.00+,Description="Truth sensitivity tranche level for INDEL model at VQS Lod < -6.093">
##FILTER=<ID=VQSRTrancheINDEL99.90to100.00,Description="Truth sensitivity tranche level for INDEL model at VQS Lod: -6.093 <= x < -1.4714">
##FILTER=<ID=VQSRTrancheSNP99.00to99.90,Description="Truth sensitivity tranche level for SNP model at VQS Lod: -4.8126 <= x < 0.2264">
##FILTER=<ID=VQSRTrancheSNP99.90to100.00+,Description="Truth sensitivity tranche level for SNP model at VQS Lod < -39474.9285">
##FILTER=<ID=VQSRTrancheSNP99.90to100.00,Description="Truth sensitivity tranche level for SNP model at VQS Lod: -39474.9285 <= x < -4.8126">
##FILTER=<ID=TooHigh1000GAF,Description="Allele frequency in 1000G is more than 5%">
##FILTER=<ID=TooHighGoNLAF,Description="Allele frequency in 1000G is more than 5%">
##FILTER=<ID=IndexNotCalled,Description="Position in index sample is not called">
##FILTER=<ID=IndexIsVariant,Description="Index call is a variant">
##FILTER=<ID=InArtificialChrom,Description="Variant found in an artificial chromosome">
##FILTER=<ID=IsIntergenic,Description="Variant found in intergenic region">
##contig=<ID=chrQ,length=16571>
##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence type as predicted by VEP. Format: Allele|Gene|Feature|Feature_type|Consequence|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|AA_MAF|EA_MAF|ALLELE_NUM|DISTANCE|STRAND|CLIN_SIG|SYMBOL|SYMBOL_SOURCE|GMAF|HGVSc|HGVSp|AFR_MAF|AMR_MAF|ASN_MAF|EUR_MAF|PUBMED">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample_101 Sample_102 Sample_103
chrNotExist 1042 rs199537431 C CA 1541.12 PASS FG=intron;FD=unknown;GM=NM_152486.2;GL=SAMD11;CP=0.000;CG=-1.630;CN=2294,3274,30362,112930;DSP=107;AC=2;AF=0.333;AN=6;BaseQRankSum=4.068;DB;DP=124;FS=1.322;MLEAC=2;MLEAF=0.333;MQ=60.0;MQ0=0;MQRankSum=-0.197;QD=19.03;RPA=1,2;RU=A;ReadPosRankSum=-0.424;STR;VQSLOD=0.079;culprit=FS;GATKCaller=UG,HC;CSQ=A|ENSESTG00000013623|ENSESTT00000034081|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|ENSESTT00000034081.1:c.306-110_306-109insA||||||,A|CCDS2.2|CCDS2.2|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|CCDS2.2:c.306-110_306-109insA||||||,A|ENSESTG00000013623|ENSESTT00000034116|Transcript|upstream_gene_variant||||||rs199537431|||1|3610|1||||A:0.0078|||||||,A|ENSESTG00000013623|ENSESTT00000034091|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|ENSESTT00000034091.1:c.306-110_306-109insA||||||,A|ENSESTG00000013623|ENSESTT00000034102|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|ENSESTT00000034102.1:c.29-110_29-109insA||||||,A|148398|XM_005244723.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244723.1:c.306-110_306-109insA||||||,A|148398|XM_005244724.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244724.1:c.306-110_306-109insA||||||,A|148398|XM_005244725.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244725.1:c.306-110_306-109insA||||||,A|148398|NM_152486.2|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|NM_152486.2:c.306-110_306-109insA||||||,A|148398|XM_005244727.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244727.1:c.306-110_306-109insA||||||,A|148398|XM_005244726.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244726.1:c.306-110_306-109insA|||||| GT:AD:DP:GQ:PL 0/1:24,21:45:99:838,0,889 0/1:17,19:36:99:744,0,603 0/0:42,0:43:99:0,126,1717
package nl.lumc.sasc.biopet.tools
import java.nio.file.Paths
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test
/**
* Created by pjvan_thof on 12-12-16.
*/
class ValidateVcfTest extends TestNGSuite with Matchers {
private def resourcePath(p: String): String =
Paths.get(getClass.getResource(p).toURI).toString
@Test
def testMain(): Unit = {
noException shouldBe thrownBy {
ValidateVcf.main(Array("-i", resourcePath("/chrQ2.vcf"), "-R", resourcePath("/fake_chrQ.fa")))
}
an[IllegalArgumentException] shouldBe thrownBy {
ValidateVcf.main(Array("-i", resourcePath("/chrQ_wrong_contig.vcf"), "-R", resourcePath("/fake_chrQ.fa")))
}
noException shouldBe thrownBy {
ValidateVcf.main(Array("-i", resourcePath("/chrQ_wrong_contig.vcf"), "-R", resourcePath("/fake_chrQ.fa"), "--disableFail"))
}
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment