Commit 1ad5f7a1 authored by Peter van 't Hof's avatar Peter van 't Hof

Merge branch 'feature-vcfwithvcf-number' into 'develop'

Feature vcfwithvcf number

See http://res-jira-app01.researchlumc.nl/browse/BIOPET-384

See merge request !460
parents 531c04b7 f58ba276
......@@ -17,7 +17,6 @@ package nl.lumc.sasc.biopet.tools
import java.io.File
import java.util
import htsjdk.samtools.reference.FastaSequenceFile
import htsjdk.variant.variantcontext.{ VariantContext, VariantContextBuilder }
import htsjdk.variant.variantcontext.writer.{ AsyncVariantContextWriter, VariantContextWriterBuilder }
import htsjdk.variant.vcf._
......@@ -66,7 +65,10 @@ object VcfWithVcf extends ToolCommand {
else c.copy(fields = Fields(x, x) :: c.fields)
} text """| If only <field> is given, the field's identifier in the output VCF will be identical to <field>.
| By default we will return all values found for a given field.
| With <method> the values will processed after getting it from the secondary VCF file, posible methods are:
| For INFO fields with type R or A we will take the respective alleles present in the input file.
| If a <method> is supplied, a method will be applied over the contents of the field.
| In this case, all values will be considered.
| The following methods are available:
| - max : takes maximum of found value, only works for numeric (integer/float) fields
| - min : takes minimum of found value, only works for numeric (integer/float) fields
| - unique: takes only unique values """.stripMargin
......@@ -126,7 +128,7 @@ object VcfWithVcf extends ToolCommand {
require(vcfDict.getSequence(record.getContig) != null, s"Contig ${record.getContig} does not exist on reference")
val secondaryRecords = getSecondaryRecords(secondaryReader, record, commandArgs.matchAllele)
val fieldMap = createFieldMap(commandArgs.fields, secondaryRecords)
val fieldMap = createFieldMap(commandArgs.fields, record, secondaryRecords, secondHeader)
writer.add(createRecord(fieldMap, record, commandArgs.fields, header))
......@@ -147,17 +149,19 @@ object VcfWithVcf extends ToolCommand {
/**
* Create Map of field -> List of attributes in secondary records
* @param fields List of Field
* @param record Original record
* @param secondaryRecords List of VariantContext with secondary records
* @param header: header of secondary reader
* @return Map of fields and their values in secondary records
*/
def createFieldMap(fields: List[Fields], secondaryRecords: List[VariantContext]): Map[String, List[Any]] = {
def createFieldMap(fields: List[Fields], record: VariantContext, secondaryRecords: List[VariantContext], header: VCFHeader): Map[String, List[Any]] = {
val fieldMap = (for (
f <- fields if secondaryRecords.exists(_.hasAttribute(f.inputField))
) yield {
f.outputField -> (for (
secondRecord <- secondaryRecords if secondRecord.hasAttribute(f.inputField)
) yield {
secondRecord.getAttribute(f.inputField) match {
getSecondaryField(record, secondRecord, f.inputField, header) match {
case l: List[_] => l
case y: util.ArrayList[_] => y.toList
case x => List(x)
......@@ -207,4 +211,53 @@ object VcfWithVcf extends ToolCommand {
})
}).make()
}
/**
* Get the proper representation of a field from a secondary record given an original record
* @param record original record
* @param secondaryRecord secondary record
* @param field field
* @param header header of secondary record
* @return
*/
def getSecondaryField(record: VariantContext, secondaryRecord: VariantContext, field: String, header: VCFHeader): Any = {
header.getInfoHeaderLine(field).getCountType match {
case VCFHeaderLineCount.A => numberA(record, secondaryRecord, field)
case VCFHeaderLineCount.R => numberR(record, secondaryRecord, field)
case _ => secondaryRecord.getAttribute(field)
}
}
/**
* Get the correct values from a field that has number=A
* @param referenceRecord the reference record
* @param annotateRecord the to-be-annotated record
* @param field the field to annotate
* @return
*/
def numberA(referenceRecord: VariantContext, annotateRecord: VariantContext, field: String): List[Any] = {
val refValues = referenceRecord.getAttributeAsList(field).toArray
annotateRecord.
getAlternateAlleles.filter(referenceRecord.hasAlternateAllele).
map(x => referenceRecord.getAlternateAlleles.indexOf(x)).
flatMap(x => refValues.lift(x)).
toList
}
/**
* Get the correct values from a field that has number=R
* @param referenceRecord the reference record
* @param annotateRecord the to-be-annotated record
* @param field the field to annotate
* @return
*/
def numberR(referenceRecord: VariantContext, annotateRecord: VariantContext, field: String): List[Any] = {
val refValues = referenceRecord.getAttributeAsList(field).toArray
annotateRecord.
getAlleles.
filter(referenceRecord.hasAllele).
map(x => referenceRecord.getAlleles.indexOf(x)).
flatMap(x => refValues.lift(x)).
toList
}
}
##fileformat=VCFv4.2
##INFO=<ID=DN,Number=1,Type=Integer,Description="inDbSNP">
##INFO=<ID=DT,Number=0,Type=Flag,Description="in1000Genomes">
##INFO=<ID=DA,Number=1,Type=String,Description="allelesDBSNP">
##INFO=<ID=FG,Number=.,Type=String,Description="functionGVS">
##INFO=<ID=FD,Number=.,Type=String,Description="functionDBSNP">
##INFO=<ID=GM,Number=.,Type=String,Description="accession">
##INFO=<ID=GL,Number=.,Type=String,Description="geneList">
##INFO=<ID=AAC,Number=.,Type=String,Description="aminoAcids">
##INFO=<ID=PP,Number=.,Type=String,Description="proteinPosition">
##INFO=<ID=CDP,Number=.,Type=String,Description="cDNAPosition">
##INFO=<ID=PH,Number=.,Type=String,Description="polyPhen">
##INFO=<ID=CP,Number=1,Type=String,Description="scorePhastCons">
##INFO=<ID=CG,Number=1,Type=String,Description="consScoreGERP">
##INFO=<ID=AA,Number=1,Type=String,Description="chimpAllele">
##INFO=<ID=CN,Number=.,Type=String,Description="CNV">
##INFO=<ID=HA,Number=1,Type=String,Description="AfricanHapMapFreq">
##INFO=<ID=HE,Number=1,Type=String,Description="EuropeanHapMapFreq">
##INFO=<ID=HC,Number=1,Type=String,Description="AsianHapMapFreq">
##INFO=<ID=DG,Number=0,Type=Flag,Description="hasGenotypes">
##INFO=<ID=DV,Number=.,Type=String,Description="dbSNPValidation">
##INFO=<ID=RM,Number=.,Type=String,Description="repeatMasker">
##INFO=<ID=RT,Number=.,Type=String,Description="tandemRepeat">
##INFO=<ID=CA,Number=0,Type=Flag,Description="clinicalAssociation">
##INFO=<ID=DSP,Number=1,Type=Integer,Description="distanceToSplice">
##INFO=<ID=GS,Number=.,Type=String,Description="granthamScore">
##INFO=<ID=MR,Number=.,Type=String,Description="microRNAs">
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities">
##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP Membership">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">
##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">
##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of Reads Containing Spanning Deletions">
##INFO=<ID=END,Number=1,Type=Integer,Description="Stop position of the interval">
##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias">
##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes">
##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation">
##INFO=<ID=MLEAC,Number=A,Type=Integer,Description="Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed">
##INFO=<ID=MLEAF,Number=A,Type=Float,Description="Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed">
##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads">
##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities">
##INFO=<ID=NEGATIVE_TRAIN_SITE,Number=0,Type=Flag,Description="This variant was used to build the negative training set of bad variants">
##INFO=<ID=POSITIVE_TRAIN_SITE,Number=0,Type=Flag,Description="This variant was used to build the positive training set of good variants">
##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">
##INFO=<ID=RPA,Number=.,Type=Integer,Description="Number of times tandem repeat unit is repeated, for each allele (including reference)">
##INFO=<ID=RU,Number=1,Type=String,Description="Tandem repeat unit (bases)">
##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias">
##INFO=<ID=STR,Number=0,Type=Flag,Description="Variant is a short tandem repeat">
##INFO=<ID=VQSLOD,Number=1,Type=Float,Description="Log odds ratio of being a true variant versus being false under the trained gaussian mixture model">
##INFO=<ID=culprit,Number=1,Type=String,Description="The annotation which was the worst performing in the Gaussian mixture model, likely the reason why the variant was filtered out">
##INFO=<ID=ClippingRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref number of hard clipped bases">
##INFO=<ID=GATKCaller,Number=.,Type=String,Description="GATK variant caller used to call the variant">
##INFO=<ID=PartOfCompound,Number=.,Type=String,Description="Whether the record was originally part of a record containing compound variants">
##INFO=<ID=ALL_ALLELE,Number=R,Type=String,Description="A field with number R">
##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
##FILTER=<ID=LowQual,Description="Low quality">
##FILTER=<ID=VQSRTrancheINDEL99.00to99.90,Description="Truth sensitivity tranche level for INDEL model at VQS Lod: -1.4714 <= x < -0.3324">
##FILTER=<ID=VQSRTrancheINDEL99.90to100.00+,Description="Truth sensitivity tranche level for INDEL model at VQS Lod < -6.093">
##FILTER=<ID=VQSRTrancheINDEL99.90to100.00,Description="Truth sensitivity tranche level for INDEL model at VQS Lod: -6.093 <= x < -1.4714">
##FILTER=<ID=VQSRTrancheSNP99.00to99.90,Description="Truth sensitivity tranche level for SNP model at VQS Lod: -4.8126 <= x < 0.2264">
##FILTER=<ID=VQSRTrancheSNP99.90to100.00+,Description="Truth sensitivity tranche level for SNP model at VQS Lod < -39474.9285">
##FILTER=<ID=VQSRTrancheSNP99.90to100.00,Description="Truth sensitivity tranche level for SNP model at VQS Lod: -39474.9285 <= x < -4.8126">
##FILTER=<ID=TooHigh1000GAF,Description="Allele frequency in 1000G is more than 5%">
##FILTER=<ID=TooHighGoNLAF,Description="Allele frequency in 1000G is more than 5%">
##FILTER=<ID=IndexNotCalled,Description="Position in index sample is not called">
##FILTER=<ID=IndexIsVariant,Description="Index call is a variant">
##FILTER=<ID=InArtificialChrom,Description="Variant found in an artificial chromosome">
##FILTER=<ID=IsIntergenic,Description="Variant found in intergenic region">
##contig=<ID=chrQ,length=16571>
##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence type as predicted by VEP. Format: Allele|Gene|Feature|Feature_type|Consequence|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|AA_MAF|EA_MAF|ALLELE_NUM|DISTANCE|STRAND|CLIN_SIG|SYMBOL|SYMBOL_SOURCE|GMAF|HGVSc|HGVSp|AFR_MAF|AMR_MAF|ASN_MAF|EUR_MAF|PUBMED">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample_101
chrQ 1042 rs199537431 C A 1541.12 PASS AF=0.333;ALL_ALLELE=C,A GT:AD:DP:GQ:PL 1/2:24,21:45:99:838,0,889
##fileformat=VCFv4.2
##INFO=<ID=DN,Number=1,Type=Integer,Description="inDbSNP">
##INFO=<ID=DT,Number=0,Type=Flag,Description="in1000Genomes">
##INFO=<ID=DA,Number=1,Type=String,Description="allelesDBSNP">
##INFO=<ID=FG,Number=.,Type=String,Description="functionGVS">
##INFO=<ID=FD,Number=.,Type=String,Description="functionDBSNP">
##INFO=<ID=GM,Number=.,Type=String,Description="accession">
##INFO=<ID=GL,Number=.,Type=String,Description="geneList">
##INFO=<ID=AAC,Number=.,Type=String,Description="aminoAcids">
##INFO=<ID=PP,Number=.,Type=String,Description="proteinPosition">
##INFO=<ID=CDP,Number=.,Type=String,Description="cDNAPosition">
##INFO=<ID=PH,Number=.,Type=String,Description="polyPhen">
##INFO=<ID=CP,Number=1,Type=String,Description="scorePhastCons">
##INFO=<ID=CG,Number=1,Type=String,Description="consScoreGERP">
##INFO=<ID=AA,Number=1,Type=String,Description="chimpAllele">
##INFO=<ID=CN,Number=.,Type=String,Description="CNV">
##INFO=<ID=HA,Number=1,Type=String,Description="AfricanHapMapFreq">
##INFO=<ID=HE,Number=1,Type=String,Description="EuropeanHapMapFreq">
##INFO=<ID=HC,Number=1,Type=String,Description="AsianHapMapFreq">
##INFO=<ID=DG,Number=0,Type=Flag,Description="hasGenotypes">
##INFO=<ID=DV,Number=.,Type=String,Description="dbSNPValidation">
##INFO=<ID=RM,Number=.,Type=String,Description="repeatMasker">
##INFO=<ID=RT,Number=.,Type=String,Description="tandemRepeat">
##INFO=<ID=CA,Number=0,Type=Flag,Description="clinicalAssociation">
##INFO=<ID=DSP,Number=1,Type=Integer,Description="distanceToSplice">
##INFO=<ID=GS,Number=.,Type=String,Description="granthamScore">
##INFO=<ID=MR,Number=.,Type=String,Description="microRNAs">
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities">
##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP Membership">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">
##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">
##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of Reads Containing Spanning Deletions">
##INFO=<ID=END,Number=1,Type=Integer,Description="Stop position of the interval">
##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias">
##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes">
##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation">
##INFO=<ID=MLEAC,Number=A,Type=Integer,Description="Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed">
##INFO=<ID=MLEAF,Number=A,Type=Float,Description="Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed">
##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads">
##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities">
##INFO=<ID=NEGATIVE_TRAIN_SITE,Number=0,Type=Flag,Description="This variant was used to build the negative training set of bad variants">
##INFO=<ID=POSITIVE_TRAIN_SITE,Number=0,Type=Flag,Description="This variant was used to build the positive training set of good variants">
##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">
##INFO=<ID=RPA,Number=.,Type=Integer,Description="Number of times tandem repeat unit is repeated, for each allele (including reference)">
##INFO=<ID=RU,Number=1,Type=String,Description="Tandem repeat unit (bases)">
##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias">
##INFO=<ID=STR,Number=0,Type=Flag,Description="Variant is a short tandem repeat">
##INFO=<ID=VQSLOD,Number=1,Type=Float,Description="Log odds ratio of being a true variant versus being false under the trained gaussian mixture model">
##INFO=<ID=culprit,Number=1,Type=String,Description="The annotation which was the worst performing in the Gaussian mixture model, likely the reason why the variant was filtered out">
##INFO=<ID=ClippingRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref number of hard clipped bases">
##INFO=<ID=GATKCaller,Number=.,Type=String,Description="GATK variant caller used to call the variant">
##INFO=<ID=PartOfCompound,Number=.,Type=String,Description="Whether the record was originally part of a record containing compound variants">
##INFO=<ID=ALL_ALLELE,Number=R,Type=String,Description="A field with number R">
##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
##FILTER=<ID=LowQual,Description="Low quality">
##FILTER=<ID=VQSRTrancheINDEL99.00to99.90,Description="Truth sensitivity tranche level for INDEL model at VQS Lod: -1.4714 <= x < -0.3324">
##FILTER=<ID=VQSRTrancheINDEL99.90to100.00+,Description="Truth sensitivity tranche level for INDEL model at VQS Lod < -6.093">
##FILTER=<ID=VQSRTrancheINDEL99.90to100.00,Description="Truth sensitivity tranche level for INDEL model at VQS Lod: -6.093 <= x < -1.4714">
##FILTER=<ID=VQSRTrancheSNP99.00to99.90,Description="Truth sensitivity tranche level for SNP model at VQS Lod: -4.8126 <= x < 0.2264">
##FILTER=<ID=VQSRTrancheSNP99.90to100.00+,Description="Truth sensitivity tranche level for SNP model at VQS Lod < -39474.9285">
##FILTER=<ID=VQSRTrancheSNP99.90to100.00,Description="Truth sensitivity tranche level for SNP model at VQS Lod: -39474.9285 <= x < -4.8126">
##FILTER=<ID=TooHigh1000GAF,Description="Allele frequency in 1000G is more than 5%">
##FILTER=<ID=TooHighGoNLAF,Description="Allele frequency in 1000G is more than 5%">
##FILTER=<ID=IndexNotCalled,Description="Position in index sample is not called">
##FILTER=<ID=IndexIsVariant,Description="Index call is a variant">
##FILTER=<ID=InArtificialChrom,Description="Variant found in an artificial chromosome">
##FILTER=<ID=IsIntergenic,Description="Variant found in intergenic region">
##contig=<ID=chrQ,length=16571>
##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence type as predicted by VEP. Format: Allele|Gene|Feature|Feature_type|Consequence|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|AA_MAF|EA_MAF|ALLELE_NUM|DISTANCE|STRAND|CLIN_SIG|SYMBOL|SYMBOL_SOURCE|GMAF|HGVSc|HGVSp|AFR_MAF|AMR_MAF|ASN_MAF|EUR_MAF|PUBMED">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample_101
chrQ 1042 rs199537431 C A,T 1541.12 PASS AF=0.333,0.667;ALL_ALLELE=C,A,T GT:AD:DP:GQ:PL 1/2:24,21:45:99:838,0,889
......@@ -18,6 +18,7 @@ import java.io.File
import java.nio.file.Paths
import java.util
import htsjdk.variant.vcf
import htsjdk.variant.vcf.VCFFileReader
import org.scalatest.Matchers
import org.scalatest.mock.MockitoSugar
......@@ -26,7 +27,6 @@ import org.testng.annotations.Test
import scala.util.Random
import scala.collection.JavaConversions._
import nl.lumc.sasc.biopet.utils.VcfUtils.identicalVariantContext
/**
......@@ -44,6 +44,8 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers {
val veppedPath = resourcePath("/VEP_oneline.vcf.gz")
val unveppedPath = resourcePath("/unvep_online.vcf.gz")
val referenceFasta = resourcePath("/fake_chrQ.fa")
val monoPath = resourcePath("/chrQ_monoallelic.vcf.gz")
val multiPath = resourcePath("/chrQ_multiallelic.vcf.gz")
val rand = new Random()
@Test
......@@ -71,7 +73,7 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers {
}
@Test
def testOutputFieldException = {
def testOutputFieldException() = {
val tmpFile = File.createTempFile("VCFWithVCf", ".vcf")
tmpFile.deleteOnExit()
val args = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ:AC", "-R", referenceFasta)
......@@ -81,7 +83,7 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers {
}
@Test
def testInputFieldException = {
def testInputFieldException() = {
val tmpFile = File.createTempFile("VCFWithVCf", ".vcf")
tmpFile.deleteOnExit()
val args = Array("-I", unveppedPath, "-s", unveppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ:NEW_CSQ", "-R", referenceFasta)
......@@ -91,7 +93,7 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers {
}
@Test
def testMinMethodException = {
def testMinMethodException() = {
val tmpFile = File.createTempFile("VcfWithVcf_", ".vcf")
tmpFile.deleteOnExit()
val args = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ:CSQ:min", "-R", referenceFasta)
......@@ -101,7 +103,7 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers {
}
@Test
def testMaxMethodException = {
def testMaxMethodException() = {
val tmpFile = File.createTempFile("VcfWithVcf_", ".vcf")
tmpFile.deleteOnExit()
val args = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ:CSQ:max", "-R", referenceFasta)
......@@ -111,8 +113,10 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers {
}
@Test
def testFieldMap = {
val unvepRecord = new VCFFileReader(new File(unveppedPath)).iterator().next()
def testFieldMap() = {
val unvepReader = new VCFFileReader(new File(unveppedPath))
val header = unvepReader.getFileHeader
val unvepRecord = unvepReader.iterator().next()
var fields = List(new Fields("FG", "FG"))
fields :::= List(new Fields("FD", "FD"))
......@@ -140,7 +144,7 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers {
fields :::= List(new Fields("VQSLOD", "VQSLOD"))
fields :::= List(new Fields("culprit", "culprit"))
val fieldMap = createFieldMap(fields, List(unvepRecord))
val fieldMap = createFieldMap(fields, unvepRecord, List(unvepRecord), header)
fieldMap("FG") shouldBe List("intron")
fieldMap("FD") shouldBe List("unknown")
......@@ -170,7 +174,7 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers {
}
@Test
def testGetSecondaryRecords = {
def testGetSecondaryRecords() = {
val unvepRecord = new VCFFileReader(new File(unveppedPath)).iterator().next()
val vepReader = new VCFFileReader(new File(veppedPath))
val vepRecord = vepReader.iterator().next()
......@@ -181,7 +185,7 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers {
}
@Test
def testCreateRecord = {
def testCreateRecord() = {
val unvepRecord = new VCFFileReader(new File(unveppedPath)).iterator().next()
val vepReader = new VCFFileReader(new File(veppedPath))
val header = vepReader.getFileHeader
......@@ -189,9 +193,53 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers {
val secRec = getSecondaryRecords(vepReader, unvepRecord, false)
val fieldMap = createFieldMap(List(new Fields("CSQ", "CSQ")), secRec)
val fieldMap = createFieldMap(List(new Fields("CSQ", "CSQ")), vepRecord, secRec, header)
val createdRecord = createRecord(fieldMap, unvepRecord, List(new Fields("CSQ", "CSQ")), header)
identicalVariantContext(createdRecord, vepRecord) shouldBe true
}
@Test
def testNumberA() = {
val multiRecord = new VCFFileReader(new File(multiPath)).iterator().next()
val monoRecord = new VCFFileReader(new File(monoPath)).iterator().next()
val annot = numberA(multiRecord, monoRecord, "AF")
annot shouldBe List("0.333")
}
@Test
def testNumberR() = {
val multiRecord = new VCFFileReader(new File(multiPath)).iterator().next()
val monoRecord = new VCFFileReader(new File(monoPath)).iterator().next()
val annot = numberR(multiRecord, monoRecord, "ALL_ALLELE")
annot shouldBe List("C", "A")
}
@Test
def testNumberAOutput() = {
val tmpFile = File.createTempFile("numberA", ".vcf.gz")
tmpFile.deleteOnExit()
val arguments = Array("-I", monoPath, "-s", multiPath, "-o", tmpFile.getAbsolutePath, "-f", "AF:MULTI_AF", "-R", referenceFasta)
main(arguments)
val annotatedRecord = new VCFFileReader(tmpFile).iterator().next()
annotatedRecord.getAttribute("MULTI_AF").toString shouldBe "0.333"
}
@Test
def testNumberROutput() = {
val tmpFile = File.createTempFile("numberR", ".vcf.gz")
tmpFile.deleteOnExit()
val arguments = Array("-I", monoPath, "-s", multiPath, "-o", tmpFile.getAbsolutePath, "-f", "ALL_ALLELE:MULTI_ALL_ALLELE", "-R", referenceFasta)
main(arguments)
val annotatedRecord = new VCFFileReader(tmpFile).iterator().next()
annotatedRecord.getAttribute("MULTI_ALL_ALLELE") match {
case l: List[_] => l shouldBe List("C", "A")
case u: util.ArrayList[_] => u.toList shouldBe List("C", "A")
case _ => throw new IllegalStateException("Not a list")
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment