Skip to content
Snippets Groups Projects
Commit 73342d82 authored by bow's avatar bow
Browse files

Merge branch 'feature-vcf_reference_check' into 'develop'

Force reference dict in vcf to check on unknown contigs

Fixes #179 

See merge request !378
parents 8a322f5c c093c17c
No related branches found
No related tags found
No related merge requests found
Showing with 134 additions and 105 deletions
......@@ -17,14 +17,14 @@ package nl.lumc.sasc.biopet.extensions.tools
import java.io.File
import nl.lumc.sasc.biopet.core.ToolCommandFunction
import nl.lumc.sasc.biopet.core.{ Reference, ToolCommandFunction }
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/**
* Biopet extension for tool VcfWithVcf
*/
class VcfWithVcf(val root: Configurable) extends ToolCommandFunction {
class VcfWithVcf(val root: Configurable) extends ToolCommandFunction with Reference {
def toolObject = nl.lumc.sasc.biopet.tools.VcfWithVcf
@Input(doc = "Input vcf file", shortName = "input", required = true)
......@@ -39,12 +39,16 @@ class VcfWithVcf(val root: Configurable) extends ToolCommandFunction {
@Output(doc = "Output vcf file index", shortName = "output", required = true)
private var outputIndex: File = _
@Input
var reference: File = _
var fields: List[(String, String, Option[String])] = List()
override def defaultCoreMemory = 2.0
override def beforeGraph() {
super.beforeGraph()
if (reference == null) reference = referenceFasta()
if (output.getName.endsWith(".gz")) outputIndex = new File(output.getAbsolutePath + ".tbi")
if (output.getName.endsWith(".vcf")) outputIndex = new File(output.getAbsolutePath + ".idx")
if (fields.isEmpty) throw new IllegalArgumentException("No fields found for VcfWithVcf")
......@@ -54,5 +58,6 @@ class VcfWithVcf(val root: Configurable) extends ToolCommandFunction {
required("-I", input) +
required("-o", output) +
required("-s", secondaryVcf) +
required("-R", reference) +
repeat("-f", fields.map(x => x._1 + ":" + x._2 + ":" + x._3.getOrElse("none")))
}
......@@ -18,6 +18,7 @@ package nl.lumc.sasc.biopet.tools
import java.io.File
import java.util
import htsjdk.samtools.reference.FastaSequenceFile
import htsjdk.variant.variantcontext.{ VariantContext, VariantContextBuilder }
import htsjdk.variant.variantcontext.writer.{ AsyncVariantContextWriter, VariantContextWriterBuilder }
import htsjdk.variant.vcf._
......@@ -36,6 +37,7 @@ object VcfWithVcf extends ToolCommand {
case class Args(inputFile: File = null,
outputFile: File = null,
referenceFasta: File = null,
secondaryVcf: File = null,
fields: List[Fields] = Nil,
matchAllele: Boolean = true) extends AbstractArgs
......@@ -54,6 +56,9 @@ object VcfWithVcf extends ToolCommand {
opt[File]('s', "secondaryVcf") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
c.copy(secondaryVcf = x)
}
opt[File]('R', "reference") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
c.copy(referenceFasta = x)
}
opt[String]('f', "field") unbounded () valueName "<field> or <input_field:output_field> or <input_field:output_field:method>" action { (x, c) =>
val values = x.split(":")
if (values.size > 2) c.copy(fields = Fields(values(0), values(1), FieldMethod.withName(values(2))) :: c.fields)
......@@ -74,16 +79,30 @@ object VcfWithVcf extends ToolCommand {
logger.info("Init phase")
val argsParser = new OptParser
val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1)
val commandArgs: Args = argsParser.parse(args, Args()) getOrElse(throw new IllegalArgumentException)
val reader = new VCFFileReader(commandArgs.inputFile)
val secondaryReader = new VCFFileReader(commandArgs.secondaryVcf)
val referenceDict = new FastaSequenceFile(commandArgs.referenceFasta, true).getSequenceDictionary
val header = reader.getFileHeader
val vcfDict = header.getSequenceDictionary match {
case r if r != null =>
r.assertSameDictionary(referenceDict)
r
case _ => referenceDict
}
val secondHeader = secondaryReader.getFileHeader
secondHeader.getSequenceDictionary match {
case r if r != null => r.assertSameDictionary(referenceDict)
case _ =>
}
val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder().
setOutputFile(commandArgs.outputFile).
setReferenceDictionary(header.getSequenceDictionary).
setReferenceDictionary(vcfDict).
build)
for (x <- commandArgs.fields) {
......
......@@ -73,90 +73,7 @@
##FILTER=<ID=IndexIsVariant,Description="Index call is a variant">
##FILTER=<ID=InArtificialChrom,Description="Variant found in an artificial chromosome">
##FILTER=<ID=IsIntergenic,Description="Variant found in intergenic region">
##contig=<ID=chrM,length=16571>
##contig=<ID=chr1,length=249250621>
##contig=<ID=chr2,length=243199373>
##contig=<ID=chr3,length=198022430>
##contig=<ID=chr4,length=191154276>
##contig=<ID=chr5,length=180915260>
##contig=<ID=chr6,length=171115067>
##contig=<ID=chr7,length=159138663>
##contig=<ID=chr8,length=146364022>
##contig=<ID=chr9,length=141213431>
##contig=<ID=chr10,length=135534747>
##contig=<ID=chr11,length=135006516>
##contig=<ID=chr12,length=133851895>
##contig=<ID=chr13,length=115169878>
##contig=<ID=chr14,length=107349540>
##contig=<ID=chr15,length=102531392>
##contig=<ID=chr16,length=90354753>
##contig=<ID=chr17,length=81195210>
##contig=<ID=chr18,length=78077248>
##contig=<ID=chr19,length=59128983>
##contig=<ID=chr20,length=63025520>
##contig=<ID=chr21,length=48129895>
##contig=<ID=chr22,length=51304566>
##contig=<ID=chrX,length=155270560>
##contig=<ID=chrY,length=59373566>
##contig=<ID=chr1_gl000191_random,length=106433>
##contig=<ID=chr1_gl000192_random,length=547496>
##contig=<ID=chr4_gl000193_random,length=189789>
##contig=<ID=chr4_gl000194_random,length=191469>
##contig=<ID=chr7_gl000195_random,length=182896>
##contig=<ID=chr8_gl000196_random,length=38914>
##contig=<ID=chr8_gl000197_random,length=37175>
##contig=<ID=chr9_gl000198_random,length=90085>
##contig=<ID=chr9_gl000199_random,length=169874>
##contig=<ID=chr9_gl000200_random,length=187035>
##contig=<ID=chr9_gl000201_random,length=36148>
##contig=<ID=chr11_gl000202_random,length=40103>
##contig=<ID=chr17_gl000203_random,length=37498>
##contig=<ID=chr17_gl000204_random,length=81310>
##contig=<ID=chr17_gl000205_random,length=174588>
##contig=<ID=chr17_gl000206_random,length=41001>
##contig=<ID=chr18_gl000207_random,length=4262>
##contig=<ID=chr19_gl000208_random,length=92689>
##contig=<ID=chr19_gl000209_random,length=159169>
##contig=<ID=chr21_gl000210_random,length=27682>
##contig=<ID=chrUn_gl000211,length=166566>
##contig=<ID=chrUn_gl000212,length=186858>
##contig=<ID=chrUn_gl000213,length=164239>
##contig=<ID=chrUn_gl000214,length=137718>
##contig=<ID=chrUn_gl000215,length=172545>
##contig=<ID=chrUn_gl000216,length=172294>
##contig=<ID=chrUn_gl000217,length=172149>
##contig=<ID=chrUn_gl000218,length=161147>
##contig=<ID=chrUn_gl000219,length=179198>
##contig=<ID=chrUn_gl000220,length=161802>
##contig=<ID=chrUn_gl000221,length=155397>
##contig=<ID=chrUn_gl000222,length=186861>
##contig=<ID=chrUn_gl000223,length=180455>
##contig=<ID=chrUn_gl000224,length=179693>
##contig=<ID=chrUn_gl000225,length=211173>
##contig=<ID=chrUn_gl000226,length=15008>
##contig=<ID=chrUn_gl000227,length=128374>
##contig=<ID=chrUn_gl000228,length=129120>
##contig=<ID=chrUn_gl000229,length=19913>
##contig=<ID=chrUn_gl000230,length=43691>
##contig=<ID=chrUn_gl000231,length=27386>
##contig=<ID=chrUn_gl000232,length=40652>
##contig=<ID=chrUn_gl000233,length=45941>
##contig=<ID=chrUn_gl000234,length=40531>
##contig=<ID=chrUn_gl000235,length=34474>
##contig=<ID=chrUn_gl000236,length=41934>
##contig=<ID=chrUn_gl000237,length=45867>
##contig=<ID=chrUn_gl000238,length=39939>
##contig=<ID=chrUn_gl000239,length=33824>
##contig=<ID=chrUn_gl000240,length=41933>
##contig=<ID=chrUn_gl000241,length=42152>
##contig=<ID=chrUn_gl000242,length=43523>
##contig=<ID=chrUn_gl000243,length=43341>
##contig=<ID=chrUn_gl000244,length=39929>
##contig=<ID=chrUn_gl000245,length=36651>
##contig=<ID=chrUn_gl000246,length=38154>
##contig=<ID=chrUn_gl000247,length=36422>
##contig=<ID=chrUn_gl000248,length=39786>
##contig=<ID=chrUn_gl000249,length=38502>
##contig=<ID=chrQ,length=16571>
##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence type as predicted by VEP. Format: Allele|Gene|Feature|Feature_type|Consequence|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|AA_MAF|EA_MAF|ALLELE_NUM|DISTANCE|STRAND|CLIN_SIG|SYMBOL|SYMBOL_SOURCE|GMAF|HGVSc|HGVSp|AFR_MAF|AMR_MAF|ASN_MAF|EUR_MAF|PUBMED">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample_101 Sample_102 Sample_103
chr1 871042 rs199537431 C CA 1541.12 PASS FG=intron;FD=unknown;GM=NM_152486.2;GL=SAMD11;CP=0.000;CG=-1.630;CN=2294,3274,30362,112930;DSP=107;AC=2;AF=0.333;AN=6;BaseQRankSum=4.068;DB;DP=124;FS=1.322;MLEAC=2;MLEAF=0.333;MQ=60.0;MQ0=0;MQRankSum=-0.197;QD=19.03;RPA=1,2;RU=A;ReadPosRankSum=-0.424;STR;VQSLOD=0.079;culprit=FS;GATKCaller=UG,HC;CSQ=A|ENSESTG00000013623|ENSESTT00000034081|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|ENSESTT00000034081.1:c.306-110_306-109insA||||||,A|CCDS2.2|CCDS2.2|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|CCDS2.2:c.306-110_306-109insA||||||,A|ENSESTG00000013623|ENSESTT00000034116|Transcript|upstream_gene_variant||||||rs199537431|||1|3610|1||||A:0.0078|||||||,A|ENSESTG00000013623|ENSESTT00000034091|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|ENSESTT00000034091.1:c.306-110_306-109insA||||||,A|ENSESTG00000013623|ENSESTT00000034102|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|ENSESTT00000034102.1:c.29-110_29-109insA||||||,A|148398|XM_005244723.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244723.1:c.306-110_306-109insA||||||,A|148398|XM_005244724.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244724.1:c.306-110_306-109insA||||||,A|148398|XM_005244725.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244725.1:c.306-110_306-109insA||||||,A|148398|NM_152486.2|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|NM_152486.2:c.306-110_306-109insA||||||,A|148398|XM_005244727.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244727.1:c.306-110_306-109insA||||||,A|148398|XM_005244726.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244726.1:c.306-110_306-109insA|||||| GT:AD:DP:GQ:PL 0/1:24,21:45:99:838,0,889 0/1:17,19:36:99:744,0,603 0/0:42,0:43:99:0,126,1717
chrQ 10000 rs199537431 C CA 1541.12 PASS FG=intron;FD=unknown;GM=NM_152486.2;GL=SAMD11;CP=0.000;CG=-1.630;CN=2294,3274,30362,112930;DSP=107;AC=2;AF=0.333;AN=6;BaseQRankSum=4.068;DB;DP=124;FS=1.322;MLEAC=2;MLEAF=0.333;MQ=60.0;MQ0=0;MQRankSum=-0.197;QD=19.03;RPA=1,2;RU=A;ReadPosRankSum=-0.424;STR;VQSLOD=0.079;culprit=FS;GATKCaller=UG,HC;CSQ=A|ENSESTG00000013623|ENSESTT00000034081|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|ENSESTT00000034081.1:c.306-110_306-109insA||||||,A|CCDS2.2|CCDS2.2|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|CCDS2.2:c.306-110_306-109insA||||||,A|ENSESTG00000013623|ENSESTT00000034116|Transcript|upstream_gene_variant||||||rs199537431|||1|3610|1||||A:0.0078|||||||,A|ENSESTG00000013623|ENSESTT00000034091|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|ENSESTT00000034091.1:c.306-110_306-109insA||||||,A|ENSESTG00000013623|ENSESTT00000034102|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|ENSESTT00000034102.1:c.29-110_29-109insA||||||,A|148398|XM_005244723.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244723.1:c.306-110_306-109insA||||||,A|148398|XM_005244724.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244724.1:c.306-110_306-109insA||||||,A|148398|XM_005244725.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244725.1:c.306-110_306-109insA||||||,A|148398|NM_152486.2|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|NM_152486.2:c.306-110_306-109insA||||||,A|148398|XM_005244727.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244727.1:c.306-110_306-109insA||||||,A|148398|XM_005244726.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244726.1:c.306-110_306-109insA|||||| GT:AD:DP:GQ:PL 0/1:24,21:45:99:838,0,889 0/1:17,19:36:99:744,0,603 0/0:42,0:43:99:0,126,1717
No preview for this file type
No preview for this file type
##fileformat=VCFv4.1
##reference=file:///data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta
##INFO=<ID=DN,Number=1,Type=Integer,Description="inDbSNP">
##INFO=<ID=DT,Number=0,Type=Flag,Description="in1000Genomes">
##INFO=<ID=DA,Number=1,Type=String,Description="allelesDBSNP">
##INFO=<ID=FG,Number=.,Type=String,Description="functionGVS">
##INFO=<ID=FD,Number=.,Type=String,Description="functionDBSNP">
##INFO=<ID=GM,Number=.,Type=String,Description="accession">
##INFO=<ID=GL,Number=.,Type=String,Description="geneList">
##INFO=<ID=AAC,Number=.,Type=String,Description="aminoAcids">
##INFO=<ID=PP,Number=.,Type=String,Description="proteinPosition">
##INFO=<ID=CDP,Number=.,Type=String,Description="cDNAPosition">
##INFO=<ID=PH,Number=.,Type=String,Description="polyPhen">
##INFO=<ID=CP,Number=1,Type=String,Description="scorePhastCons">
##INFO=<ID=CG,Number=1,Type=String,Description="consScoreGERP">
##INFO=<ID=AA,Number=1,Type=String,Description="chimpAllele">
##INFO=<ID=CN,Number=.,Type=String,Description="CNV">
##INFO=<ID=HA,Number=1,Type=String,Description="AfricanHapMapFreq">
##INFO=<ID=HE,Number=1,Type=String,Description="EuropeanHapMapFreq">
##INFO=<ID=HC,Number=1,Type=String,Description="AsianHapMapFreq">
##INFO=<ID=DG,Number=0,Type=Flag,Description="hasGenotypes">
##INFO=<ID=DV,Number=.,Type=String,Description="dbSNPValidation">
##INFO=<ID=RM,Number=.,Type=String,Description="repeatMasker">
##INFO=<ID=RT,Number=.,Type=String,Description="tandemRepeat">
##INFO=<ID=CA,Number=0,Type=Flag,Description="clinicalAssociation">
##INFO=<ID=DSP,Number=1,Type=Integer,Description="distanceToSplice">
##INFO=<ID=GS,Number=.,Type=String,Description="granthamScore">
##INFO=<ID=MR,Number=.,Type=String,Description="microRNAs">
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities">
##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP Membership">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">
##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">
##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of Reads Containing Spanning Deletions">
##INFO=<ID=END,Number=1,Type=Integer,Description="Stop position of the interval">
##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias">
##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes">
##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation">
##INFO=<ID=MLEAC,Number=A,Type=Integer,Description="Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed">
##INFO=<ID=MLEAF,Number=A,Type=Float,Description="Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed">
##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads">
##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities">
##INFO=<ID=NEGATIVE_TRAIN_SITE,Number=0,Type=Flag,Description="This variant was used to build the negative training set of bad variants">
##INFO=<ID=POSITIVE_TRAIN_SITE,Number=0,Type=Flag,Description="This variant was used to build the positive training set of good variants">
##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">
##INFO=<ID=RPA,Number=.,Type=Integer,Description="Number of times tandem repeat unit is repeated, for each allele (including reference)">
##INFO=<ID=RU,Number=1,Type=String,Description="Tandem repeat unit (bases)">
##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias">
##INFO=<ID=STR,Number=0,Type=Flag,Description="Variant is a short tandem repeat">
##INFO=<ID=VQSLOD,Number=1,Type=Float,Description="Log odds ratio of being a true variant versus being false under the trained gaussian mixture model">
##INFO=<ID=culprit,Number=1,Type=String,Description="The annotation which was the worst performing in the Gaussian mixture model, likely the reason why the variant was filtered out">
##INFO=<ID=ClippingRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref number of hard clipped bases">
##INFO=<ID=GATKCaller,Number=.,Type=String,Description="GATK variant caller used to call the variant">
##INFO=<ID=PartOfCompound,Number=.,Type=String,Description="Whether the record was originally part of a record containing compound variants">
##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
##FILTER=<ID=LowQual,Description="Low quality">
##FILTER=<ID=VQSRTrancheINDEL99.00to99.90,Description="Truth sensitivity tranche level for INDEL model at VQS Lod: -1.4714 <= x < -0.3324">
##FILTER=<ID=VQSRTrancheINDEL99.90to100.00+,Description="Truth sensitivity tranche level for INDEL model at VQS Lod < -6.093">
##FILTER=<ID=VQSRTrancheINDEL99.90to100.00,Description="Truth sensitivity tranche level for INDEL model at VQS Lod: -6.093 <= x < -1.4714">
##FILTER=<ID=VQSRTrancheSNP99.00to99.90,Description="Truth sensitivity tranche level for SNP model at VQS Lod: -4.8126 <= x < 0.2264">
##FILTER=<ID=VQSRTrancheSNP99.90to100.00+,Description="Truth sensitivity tranche level for SNP model at VQS Lod < -39474.9285">
##FILTER=<ID=VQSRTrancheSNP99.90to100.00,Description="Truth sensitivity tranche level for SNP model at VQS Lod: -39474.9285 <= x < -4.8126">
##FILTER=<ID=TooHigh1000GAF,Description="Allele frequency in 1000G is more than 5%">
##FILTER=<ID=TooHighGoNLAF,Description="Allele frequency in 1000G is more than 5%">
##FILTER=<ID=IndexNotCalled,Description="Position in index sample is not called">
##FILTER=<ID=IndexIsVariant,Description="Index call is a variant">
##FILTER=<ID=InArtificialChrom,Description="Variant found in an artificial chromosome">
##FILTER=<ID=IsIntergenic,Description="Variant found in intergenic region">
##contig=<ID=chrQ,length=16571>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample_101 Sample_102 Sample_103
chrQ 10000 rs199537431 C CA 1541.12 PASS FG=intron;FD=unknown;GM=NM_152486.2;GL=SAMD11;CP=0.000;CG=-1.630;CN=2294,3274,30362,112930;DSP=107;AC=2;AF=0.333;AN=6;BaseQRankSum=4.068;DB;DP=124;FS=1.322;MLEAC=2;MLEAF=0.333;MQ=60.0;MQ0=0;MQRankSum=-0.197;QD=19.03;RPA=1,2;RU=A;ReadPosRankSum=-0.424;STR;VQSLOD=0.079;culprit=FS;GATKCaller=UG,HC GT:AD:DP:GQ:PL 0/1:24,21:45:99:838,0,889 0/1:17,19:36:99:744,0,603 0/0:42,0:43:99:0,126,1717
No preview for this file type
No preview for this file type
......@@ -44,60 +44,68 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers {
val veppedPath = resourcePath("/VEP_oneline.vcf.gz")
val unveppedPath = resourcePath("/unvep_online.vcf.gz")
val referenceFasta = resourcePath("/fake_chrQ.fa")
val rand = new Random()
@Test def testOutputTypeVcf() = {
@Test
def testOutputTypeVcf() = {
val tmpFile = File.createTempFile("VcfWithVcf_", ".vcf")
tmpFile.deleteOnExit()
val arguments = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ")
val arguments = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ", "-R", referenceFasta)
main(arguments)
}
@Test def testOutputTypeVcfGz() = {
@Test
def testOutputTypeVcfGz() = {
val tmpFile = File.createTempFile("VcfWithVcf_", ".vcf.gz")
tmpFile.deleteOnExit()
val arguments = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ")
val arguments = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ", "-R", referenceFasta)
main(arguments)
}
@Test def testOutputTypeBcf() = {
@Test
def testOutputTypeBcf() = {
val tmpFile = File.createTempFile("VcfWithVcf_", ".bcf")
tmpFile.deleteOnExit()
val arguments = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ")
val arguments = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ", "-R", referenceFasta)
main(arguments)
}
@Test def testOutputFieldException = {
@Test
def testOutputFieldException = {
val tmpFile = File.createTempFile("VCFWithVCf", ".vcf")
tmpFile.deleteOnExit()
val args = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ:AC")
val args = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ:AC", "-R", referenceFasta)
an[IllegalArgumentException] should be thrownBy main(args)
val thrown = the[IllegalArgumentException] thrownBy main(args)
thrown.getMessage should equal("Field 'AC' already exists in input vcf")
}
@Test def testInputFieldException = {
@Test
def testInputFieldException = {
val tmpFile = File.createTempFile("VCFWithVCf", ".vcf")
tmpFile.deleteOnExit()
val args = Array("-I", unveppedPath, "-s", unveppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ:NEW_CSQ")
val args = Array("-I", unveppedPath, "-s", unveppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ:NEW_CSQ", "-R", referenceFasta)
an[IllegalArgumentException] should be thrownBy main(args)
val thrown = the[IllegalArgumentException] thrownBy main(args)
thrown.getMessage should equal("Field 'CSQ' does not exist in secondary vcf")
}
@Test def testMinMethodException = {
@Test
def testMinMethodException = {
val tmpFile = File.createTempFile("VcfWithVcf_", ".vcf")
tmpFile.deleteOnExit()
val args = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ:CSQ:min")
val args = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ:CSQ:min", "-R", referenceFasta)
an[IllegalArgumentException] should be thrownBy main(args)
val thrown = the[IllegalArgumentException] thrownBy main(args)
thrown.getMessage should equal("Type of field CSQ is not numeric")
}
@Test def testMaxMethodException = {
@Test
def testMaxMethodException = {
val tmpFile = File.createTempFile("VcfWithVcf_", ".vcf")
tmpFile.deleteOnExit()
val args = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ:CSQ:max")
val args = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ:CSQ:max", "-R", referenceFasta)
an[IllegalArgumentException] should be thrownBy main(args)
val thrown = the[IllegalArgumentException] thrownBy main(args)
thrown.getMessage should equal("Type of field CSQ is not numeric")
......@@ -162,7 +170,8 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers {
}
@Test def testGetSecondaryRecords = {
@Test
def testGetSecondaryRecords = {
val unvepRecord = new VCFFileReader(new File(unveppedPath)).iterator().next()
val vepReader = new VCFFileReader(new File(veppedPath))
val vepRecord = vepReader.iterator().next()
......@@ -172,7 +181,8 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers {
secRec.foreach(x => identicalVariantContext(x, vepRecord) shouldBe true)
}
@Test def testCreateRecord = {
@Test
def testCreateRecord = {
val unvepRecord = new VCFFileReader(new File(unveppedPath)).iterator().next()
val vepReader = new VCFFileReader(new File(veppedPath))
val header = vepReader.getFileHeader
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment