diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfWithVcf.scala b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfWithVcf.scala index 0f507ed21e13b6767b70c422fb3d5c71cb2dccc3..afde5054be9d1b679f95deb5cd7248f556e5ad12 100644 --- a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfWithVcf.scala +++ b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfWithVcf.scala @@ -17,14 +17,14 @@ package nl.lumc.sasc.biopet.extensions.tools import java.io.File -import nl.lumc.sasc.biopet.core.ToolCommandFunction +import nl.lumc.sasc.biopet.core.{ Reference, ToolCommandFunction } import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Input, Output } /** * Biopet extension for tool VcfWithVcf */ -class VcfWithVcf(val root: Configurable) extends ToolCommandFunction { +class VcfWithVcf(val root: Configurable) extends ToolCommandFunction with Reference { def toolObject = nl.lumc.sasc.biopet.tools.VcfWithVcf @Input(doc = "Input vcf file", shortName = "input", required = true) @@ -39,12 +39,16 @@ class VcfWithVcf(val root: Configurable) extends ToolCommandFunction { @Output(doc = "Output vcf file index", shortName = "output", required = true) private var outputIndex: File = _ + @Input + var reference: File = _ + var fields: List[(String, String, Option[String])] = List() override def defaultCoreMemory = 2.0 override def beforeGraph() { super.beforeGraph() + if (reference == null) reference = referenceFasta() if (output.getName.endsWith(".gz")) outputIndex = new File(output.getAbsolutePath + ".tbi") if (output.getName.endsWith(".vcf")) outputIndex = new File(output.getAbsolutePath + ".idx") if (fields.isEmpty) throw new IllegalArgumentException("No fields found for VcfWithVcf") @@ -54,5 +58,6 @@ class VcfWithVcf(val root: Configurable) extends ToolCommandFunction { required("-I", input) + required("-o", output) + required("-s", secondaryVcf) + + required("-R", reference) + repeat("-f", fields.map(x => x._1 + ":" + x._2 + ":" + x._3.getOrElse("none"))) } diff --git a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfWithVcf.scala b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfWithVcf.scala index 2b764ec9c2c7d75b12dc0248eddca338957e2ee7..fc72b6abc4f9e099d021a265834d96f0eb5be7af 100644 --- a/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfWithVcf.scala +++ b/public/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/VcfWithVcf.scala @@ -18,6 +18,7 @@ package nl.lumc.sasc.biopet.tools import java.io.File import java.util +import htsjdk.samtools.reference.FastaSequenceFile import htsjdk.variant.variantcontext.{ VariantContext, VariantContextBuilder } import htsjdk.variant.variantcontext.writer.{ AsyncVariantContextWriter, VariantContextWriterBuilder } import htsjdk.variant.vcf._ @@ -36,6 +37,7 @@ object VcfWithVcf extends ToolCommand { case class Args(inputFile: File = null, outputFile: File = null, + referenceFasta: File = null, secondaryVcf: File = null, fields: List[Fields] = Nil, matchAllele: Boolean = true) extends AbstractArgs @@ -54,6 +56,9 @@ object VcfWithVcf extends ToolCommand { opt[File]('s', "secondaryVcf") required () maxOccurs 1 valueName "<file>" action { (x, c) => c.copy(secondaryVcf = x) } + opt[File]('R', "reference") required () maxOccurs 1 valueName "<file>" action { (x, c) => + c.copy(referenceFasta = x) + } opt[String]('f', "field") unbounded () valueName "<field> or <input_field:output_field> or <input_field:output_field:method>" action { (x, c) => val values = x.split(":") if (values.size > 2) c.copy(fields = Fields(values(0), values(1), FieldMethod.withName(values(2))) :: c.fields) @@ -74,16 +79,30 @@ object VcfWithVcf extends ToolCommand { logger.info("Init phase") val argsParser = new OptParser - val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) + val commandArgs: Args = argsParser.parse(args, Args()) getOrElse(throw new IllegalArgumentException) val reader = new VCFFileReader(commandArgs.inputFile) val secondaryReader = new VCFFileReader(commandArgs.secondaryVcf) + val referenceDict = new FastaSequenceFile(commandArgs.referenceFasta, true).getSequenceDictionary + val header = reader.getFileHeader + val vcfDict = header.getSequenceDictionary match { + case r if r != null => + r.assertSameDictionary(referenceDict) + r + case _ => referenceDict + } val secondHeader = secondaryReader.getFileHeader + + secondHeader.getSequenceDictionary match { + case r if r != null => r.assertSameDictionary(referenceDict) + case _ => + } + val writer = new AsyncVariantContextWriter(new VariantContextWriterBuilder(). setOutputFile(commandArgs.outputFile). - setReferenceDictionary(header.getSequenceDictionary). + setReferenceDictionary(vcfDict). build) for (x <- commandArgs.fields) { diff --git a/public/biopet-tools/src/test/resources/VEP_oneline.vcf b/public/biopet-tools/src/test/resources/VEP_oneline.vcf index e5cd64bcb5ff107f53fb6c22fd1634be4c969e54..e93681cd277b7b7db096a5128792dd466d893137 100644 --- a/public/biopet-tools/src/test/resources/VEP_oneline.vcf +++ b/public/biopet-tools/src/test/resources/VEP_oneline.vcf @@ -73,90 +73,7 @@ ##FILTER=<ID=IndexIsVariant,Description="Index call is a variant"> ##FILTER=<ID=InArtificialChrom,Description="Variant found in an artificial chromosome"> ##FILTER=<ID=IsIntergenic,Description="Variant found in intergenic region"> -##contig=<ID=chrM,length=16571> -##contig=<ID=chr1,length=249250621> -##contig=<ID=chr2,length=243199373> -##contig=<ID=chr3,length=198022430> -##contig=<ID=chr4,length=191154276> -##contig=<ID=chr5,length=180915260> -##contig=<ID=chr6,length=171115067> -##contig=<ID=chr7,length=159138663> -##contig=<ID=chr8,length=146364022> -##contig=<ID=chr9,length=141213431> -##contig=<ID=chr10,length=135534747> -##contig=<ID=chr11,length=135006516> -##contig=<ID=chr12,length=133851895> -##contig=<ID=chr13,length=115169878> -##contig=<ID=chr14,length=107349540> -##contig=<ID=chr15,length=102531392> -##contig=<ID=chr16,length=90354753> -##contig=<ID=chr17,length=81195210> -##contig=<ID=chr18,length=78077248> -##contig=<ID=chr19,length=59128983> -##contig=<ID=chr20,length=63025520> -##contig=<ID=chr21,length=48129895> -##contig=<ID=chr22,length=51304566> -##contig=<ID=chrX,length=155270560> -##contig=<ID=chrY,length=59373566> -##contig=<ID=chr1_gl000191_random,length=106433> -##contig=<ID=chr1_gl000192_random,length=547496> -##contig=<ID=chr4_gl000193_random,length=189789> -##contig=<ID=chr4_gl000194_random,length=191469> -##contig=<ID=chr7_gl000195_random,length=182896> -##contig=<ID=chr8_gl000196_random,length=38914> -##contig=<ID=chr8_gl000197_random,length=37175> -##contig=<ID=chr9_gl000198_random,length=90085> -##contig=<ID=chr9_gl000199_random,length=169874> -##contig=<ID=chr9_gl000200_random,length=187035> -##contig=<ID=chr9_gl000201_random,length=36148> -##contig=<ID=chr11_gl000202_random,length=40103> -##contig=<ID=chr17_gl000203_random,length=37498> -##contig=<ID=chr17_gl000204_random,length=81310> -##contig=<ID=chr17_gl000205_random,length=174588> -##contig=<ID=chr17_gl000206_random,length=41001> -##contig=<ID=chr18_gl000207_random,length=4262> -##contig=<ID=chr19_gl000208_random,length=92689> -##contig=<ID=chr19_gl000209_random,length=159169> -##contig=<ID=chr21_gl000210_random,length=27682> -##contig=<ID=chrUn_gl000211,length=166566> -##contig=<ID=chrUn_gl000212,length=186858> -##contig=<ID=chrUn_gl000213,length=164239> -##contig=<ID=chrUn_gl000214,length=137718> -##contig=<ID=chrUn_gl000215,length=172545> -##contig=<ID=chrUn_gl000216,length=172294> -##contig=<ID=chrUn_gl000217,length=172149> -##contig=<ID=chrUn_gl000218,length=161147> -##contig=<ID=chrUn_gl000219,length=179198> -##contig=<ID=chrUn_gl000220,length=161802> -##contig=<ID=chrUn_gl000221,length=155397> -##contig=<ID=chrUn_gl000222,length=186861> -##contig=<ID=chrUn_gl000223,length=180455> -##contig=<ID=chrUn_gl000224,length=179693> -##contig=<ID=chrUn_gl000225,length=211173> -##contig=<ID=chrUn_gl000226,length=15008> -##contig=<ID=chrUn_gl000227,length=128374> -##contig=<ID=chrUn_gl000228,length=129120> -##contig=<ID=chrUn_gl000229,length=19913> -##contig=<ID=chrUn_gl000230,length=43691> -##contig=<ID=chrUn_gl000231,length=27386> -##contig=<ID=chrUn_gl000232,length=40652> -##contig=<ID=chrUn_gl000233,length=45941> -##contig=<ID=chrUn_gl000234,length=40531> -##contig=<ID=chrUn_gl000235,length=34474> -##contig=<ID=chrUn_gl000236,length=41934> -##contig=<ID=chrUn_gl000237,length=45867> -##contig=<ID=chrUn_gl000238,length=39939> -##contig=<ID=chrUn_gl000239,length=33824> -##contig=<ID=chrUn_gl000240,length=41933> -##contig=<ID=chrUn_gl000241,length=42152> -##contig=<ID=chrUn_gl000242,length=43523> -##contig=<ID=chrUn_gl000243,length=43341> -##contig=<ID=chrUn_gl000244,length=39929> -##contig=<ID=chrUn_gl000245,length=36651> -##contig=<ID=chrUn_gl000246,length=38154> -##contig=<ID=chrUn_gl000247,length=36422> -##contig=<ID=chrUn_gl000248,length=39786> -##contig=<ID=chrUn_gl000249,length=38502> +##contig=<ID=chrQ,length=16571> ##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence type as predicted by VEP. Format: Allele|Gene|Feature|Feature_type|Consequence|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|AA_MAF|EA_MAF|ALLELE_NUM|DISTANCE|STRAND|CLIN_SIG|SYMBOL|SYMBOL_SOURCE|GMAF|HGVSc|HGVSp|AFR_MAF|AMR_MAF|ASN_MAF|EUR_MAF|PUBMED"> #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample_101 Sample_102 Sample_103 -chr1 871042 rs199537431 C CA 1541.12 PASS FG=intron;FD=unknown;GM=NM_152486.2;GL=SAMD11;CP=0.000;CG=-1.630;CN=2294,3274,30362,112930;DSP=107;AC=2;AF=0.333;AN=6;BaseQRankSum=4.068;DB;DP=124;FS=1.322;MLEAC=2;MLEAF=0.333;MQ=60.0;MQ0=0;MQRankSum=-0.197;QD=19.03;RPA=1,2;RU=A;ReadPosRankSum=-0.424;STR;VQSLOD=0.079;culprit=FS;GATKCaller=UG,HC;CSQ=A|ENSESTG00000013623|ENSESTT00000034081|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|ENSESTT00000034081.1:c.306-110_306-109insA||||||,A|CCDS2.2|CCDS2.2|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|CCDS2.2:c.306-110_306-109insA||||||,A|ENSESTG00000013623|ENSESTT00000034116|Transcript|upstream_gene_variant||||||rs199537431|||1|3610|1||||A:0.0078|||||||,A|ENSESTG00000013623|ENSESTT00000034091|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|ENSESTT00000034091.1:c.306-110_306-109insA||||||,A|ENSESTG00000013623|ENSESTT00000034102|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|ENSESTT00000034102.1:c.29-110_29-109insA||||||,A|148398|XM_005244723.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244723.1:c.306-110_306-109insA||||||,A|148398|XM_005244724.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244724.1:c.306-110_306-109insA||||||,A|148398|XM_005244725.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244725.1:c.306-110_306-109insA||||||,A|148398|NM_152486.2|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|NM_152486.2:c.306-110_306-109insA||||||,A|148398|XM_005244727.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244727.1:c.306-110_306-109insA||||||,A|148398|XM_005244726.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244726.1:c.306-110_306-109insA|||||| GT:AD:DP:GQ:PL 0/1:24,21:45:99:838,0,889 0/1:17,19:36:99:744,0,603 0/0:42,0:43:99:0,126,1717 +chrQ 10000 rs199537431 C CA 1541.12 PASS FG=intron;FD=unknown;GM=NM_152486.2;GL=SAMD11;CP=0.000;CG=-1.630;CN=2294,3274,30362,112930;DSP=107;AC=2;AF=0.333;AN=6;BaseQRankSum=4.068;DB;DP=124;FS=1.322;MLEAC=2;MLEAF=0.333;MQ=60.0;MQ0=0;MQRankSum=-0.197;QD=19.03;RPA=1,2;RU=A;ReadPosRankSum=-0.424;STR;VQSLOD=0.079;culprit=FS;GATKCaller=UG,HC;CSQ=A|ENSESTG00000013623|ENSESTT00000034081|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|ENSESTT00000034081.1:c.306-110_306-109insA||||||,A|CCDS2.2|CCDS2.2|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|CCDS2.2:c.306-110_306-109insA||||||,A|ENSESTG00000013623|ENSESTT00000034116|Transcript|upstream_gene_variant||||||rs199537431|||1|3610|1||||A:0.0078|||||||,A|ENSESTG00000013623|ENSESTT00000034091|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|ENSESTT00000034091.1:c.306-110_306-109insA||||||,A|ENSESTG00000013623|ENSESTT00000034102|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||||A:0.0078|ENSESTT00000034102.1:c.29-110_29-109insA||||||,A|148398|XM_005244723.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244723.1:c.306-110_306-109insA||||||,A|148398|XM_005244724.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244724.1:c.306-110_306-109insA||||||,A|148398|XM_005244725.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244725.1:c.306-110_306-109insA||||||,A|148398|NM_152486.2|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|NM_152486.2:c.306-110_306-109insA||||||,A|148398|XM_005244727.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244727.1:c.306-110_306-109insA||||||,A|148398|XM_005244726.1|Transcript|intron_variant&feature_elongation||||||rs199537431|||1||1||SAMD11||A:0.0078|XM_005244726.1:c.306-110_306-109insA|||||| GT:AD:DP:GQ:PL 0/1:24,21:45:99:838,0,889 0/1:17,19:36:99:744,0,603 0/0:42,0:43:99:0,126,1717 diff --git a/public/biopet-tools/src/test/resources/VEP_oneline.vcf.gz b/public/biopet-tools/src/test/resources/VEP_oneline.vcf.gz index 8f0e4cb3292cbb6ac9918f7f13633e21b9f2ed1a..4947c907caf17380cddab39af399ce2762b53f83 100644 Binary files a/public/biopet-tools/src/test/resources/VEP_oneline.vcf.gz and b/public/biopet-tools/src/test/resources/VEP_oneline.vcf.gz differ diff --git a/public/biopet-tools/src/test/resources/VEP_oneline.vcf.gz.tbi b/public/biopet-tools/src/test/resources/VEP_oneline.vcf.gz.tbi index 1d1bc2ce9351b5f9d267a75fd6f7df3fc34326df..78d4dad2281aaba9589d3f1a8e8571edf1b012dd 100644 Binary files a/public/biopet-tools/src/test/resources/VEP_oneline.vcf.gz.tbi and b/public/biopet-tools/src/test/resources/VEP_oneline.vcf.gz.tbi differ diff --git a/public/biopet-tools/src/test/resources/unvep_online.vcf b/public/biopet-tools/src/test/resources/unvep_online.vcf new file mode 100644 index 0000000000000000000000000000000000000000..30133271958e7ee68a1264000f6c4a477df80c4f --- /dev/null +++ b/public/biopet-tools/src/test/resources/unvep_online.vcf @@ -0,0 +1,78 @@ +##fileformat=VCFv4.1 +##reference=file:///data/DIV5/KG/references/gatk_bundle_2.5/hg19_nohap/ucsc.hg19_nohap.fasta +##INFO=<ID=DN,Number=1,Type=Integer,Description="inDbSNP"> +##INFO=<ID=DT,Number=0,Type=Flag,Description="in1000Genomes"> +##INFO=<ID=DA,Number=1,Type=String,Description="allelesDBSNP"> +##INFO=<ID=FG,Number=.,Type=String,Description="functionGVS"> +##INFO=<ID=FD,Number=.,Type=String,Description="functionDBSNP"> +##INFO=<ID=GM,Number=.,Type=String,Description="accession"> +##INFO=<ID=GL,Number=.,Type=String,Description="geneList"> +##INFO=<ID=AAC,Number=.,Type=String,Description="aminoAcids"> +##INFO=<ID=PP,Number=.,Type=String,Description="proteinPosition"> +##INFO=<ID=CDP,Number=.,Type=String,Description="cDNAPosition"> +##INFO=<ID=PH,Number=.,Type=String,Description="polyPhen"> +##INFO=<ID=CP,Number=1,Type=String,Description="scorePhastCons"> +##INFO=<ID=CG,Number=1,Type=String,Description="consScoreGERP"> +##INFO=<ID=AA,Number=1,Type=String,Description="chimpAllele"> +##INFO=<ID=CN,Number=.,Type=String,Description="CNV"> +##INFO=<ID=HA,Number=1,Type=String,Description="AfricanHapMapFreq"> +##INFO=<ID=HE,Number=1,Type=String,Description="EuropeanHapMapFreq"> +##INFO=<ID=HC,Number=1,Type=String,Description="AsianHapMapFreq"> +##INFO=<ID=DG,Number=0,Type=Flag,Description="hasGenotypes"> +##INFO=<ID=DV,Number=.,Type=String,Description="dbSNPValidation"> +##INFO=<ID=RM,Number=.,Type=String,Description="repeatMasker"> +##INFO=<ID=RT,Number=.,Type=String,Description="tandemRepeat"> +##INFO=<ID=CA,Number=0,Type=Flag,Description="clinicalAssociation"> +##INFO=<ID=DSP,Number=1,Type=Integer,Description="distanceToSplice"> +##INFO=<ID=GS,Number=.,Type=String,Description="granthamScore"> +##INFO=<ID=MR,Number=.,Type=String,Description="microRNAs"> +##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed"> +##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed"> +##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes"> +##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities"> +##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP Membership"> +##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered"> +##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?"> +##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of Reads Containing Spanning Deletions"> +##INFO=<ID=END,Number=1,Type=Integer,Description="Stop position of the interval"> +##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias"> +##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes"> +##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation"> +##INFO=<ID=MLEAC,Number=A,Type=Integer,Description="Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed"> +##INFO=<ID=MLEAF,Number=A,Type=Float,Description="Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed"> +##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality"> +##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads"> +##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities"> +##INFO=<ID=NEGATIVE_TRAIN_SITE,Number=0,Type=Flag,Description="This variant was used to build the negative training set of bad variants"> +##INFO=<ID=POSITIVE_TRAIN_SITE,Number=0,Type=Flag,Description="This variant was used to build the positive training set of good variants"> +##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth"> +##INFO=<ID=RPA,Number=.,Type=Integer,Description="Number of times tandem repeat unit is repeated, for each allele (including reference)"> +##INFO=<ID=RU,Number=1,Type=String,Description="Tandem repeat unit (bases)"> +##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias"> +##INFO=<ID=STR,Number=0,Type=Flag,Description="Variant is a short tandem repeat"> +##INFO=<ID=VQSLOD,Number=1,Type=Float,Description="Log odds ratio of being a true variant versus being false under the trained gaussian mixture model"> +##INFO=<ID=culprit,Number=1,Type=String,Description="The annotation which was the worst performing in the Gaussian mixture model, likely the reason why the variant was filtered out"> +##INFO=<ID=ClippingRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref number of hard clipped bases"> +##INFO=<ID=GATKCaller,Number=.,Type=String,Description="GATK variant caller used to call the variant"> +##INFO=<ID=PartOfCompound,Number=.,Type=String,Description="Whether the record was originally part of a record containing compound variants"> +##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)"> +##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"> +##FILTER=<ID=LowQual,Description="Low quality"> +##FILTER=<ID=VQSRTrancheINDEL99.00to99.90,Description="Truth sensitivity tranche level for INDEL model at VQS Lod: -1.4714 <= x < -0.3324"> +##FILTER=<ID=VQSRTrancheINDEL99.90to100.00+,Description="Truth sensitivity tranche level for INDEL model at VQS Lod < -6.093"> +##FILTER=<ID=VQSRTrancheINDEL99.90to100.00,Description="Truth sensitivity tranche level for INDEL model at VQS Lod: -6.093 <= x < -1.4714"> +##FILTER=<ID=VQSRTrancheSNP99.00to99.90,Description="Truth sensitivity tranche level for SNP model at VQS Lod: -4.8126 <= x < 0.2264"> +##FILTER=<ID=VQSRTrancheSNP99.90to100.00+,Description="Truth sensitivity tranche level for SNP model at VQS Lod < -39474.9285"> +##FILTER=<ID=VQSRTrancheSNP99.90to100.00,Description="Truth sensitivity tranche level for SNP model at VQS Lod: -39474.9285 <= x < -4.8126"> +##FILTER=<ID=TooHigh1000GAF,Description="Allele frequency in 1000G is more than 5%"> +##FILTER=<ID=TooHighGoNLAF,Description="Allele frequency in 1000G is more than 5%"> +##FILTER=<ID=IndexNotCalled,Description="Position in index sample is not called"> +##FILTER=<ID=IndexIsVariant,Description="Index call is a variant"> +##FILTER=<ID=InArtificialChrom,Description="Variant found in an artificial chromosome"> +##FILTER=<ID=IsIntergenic,Description="Variant found in intergenic region"> +##contig=<ID=chrQ,length=16571> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample_101 Sample_102 Sample_103 +chrQ 10000 rs199537431 C CA 1541.12 PASS FG=intron;FD=unknown;GM=NM_152486.2;GL=SAMD11;CP=0.000;CG=-1.630;CN=2294,3274,30362,112930;DSP=107;AC=2;AF=0.333;AN=6;BaseQRankSum=4.068;DB;DP=124;FS=1.322;MLEAC=2;MLEAF=0.333;MQ=60.0;MQ0=0;MQRankSum=-0.197;QD=19.03;RPA=1,2;RU=A;ReadPosRankSum=-0.424;STR;VQSLOD=0.079;culprit=FS;GATKCaller=UG,HC GT:AD:DP:GQ:PL 0/1:24,21:45:99:838,0,889 0/1:17,19:36:99:744,0,603 0/0:42,0:43:99:0,126,1717 diff --git a/public/biopet-tools/src/test/resources/unvep_online.vcf.gz b/public/biopet-tools/src/test/resources/unvep_online.vcf.gz index 210f83e016e5d83d303e755b4772cb8299089997..cb2934f5c19796e58f20ecb501eafbd3b141e432 100644 Binary files a/public/biopet-tools/src/test/resources/unvep_online.vcf.gz and b/public/biopet-tools/src/test/resources/unvep_online.vcf.gz differ diff --git a/public/biopet-tools/src/test/resources/unvep_online.vcf.gz.tbi b/public/biopet-tools/src/test/resources/unvep_online.vcf.gz.tbi index c52458d5601b25642ad72a90934955f280428dea..a70174d51594773e3c85d89ed17981ace4893e2e 100644 Binary files a/public/biopet-tools/src/test/resources/unvep_online.vcf.gz.tbi and b/public/biopet-tools/src/test/resources/unvep_online.vcf.gz.tbi differ diff --git a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfWithVcfTest.scala b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfWithVcfTest.scala index 2f1814210f0ccc760da2235969910e5a335524f7..bfb4d7c044b652a3c90e46d0226379cda595f460 100644 --- a/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfWithVcfTest.scala +++ b/public/biopet-tools/src/test/scala/nl/lumc/sasc/biopet/tools/VcfWithVcfTest.scala @@ -44,60 +44,68 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers { val veppedPath = resourcePath("/VEP_oneline.vcf.gz") val unveppedPath = resourcePath("/unvep_online.vcf.gz") + val referenceFasta = resourcePath("/fake_chrQ.fa") val rand = new Random() - @Test def testOutputTypeVcf() = { + @Test + def testOutputTypeVcf() = { val tmpFile = File.createTempFile("VcfWithVcf_", ".vcf") tmpFile.deleteOnExit() - val arguments = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ") + val arguments = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ", "-R", referenceFasta) main(arguments) } - @Test def testOutputTypeVcfGz() = { + @Test + def testOutputTypeVcfGz() = { val tmpFile = File.createTempFile("VcfWithVcf_", ".vcf.gz") tmpFile.deleteOnExit() - val arguments = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ") + val arguments = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ", "-R", referenceFasta) main(arguments) } - @Test def testOutputTypeBcf() = { + @Test + def testOutputTypeBcf() = { val tmpFile = File.createTempFile("VcfWithVcf_", ".bcf") tmpFile.deleteOnExit() - val arguments = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ") + val arguments = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ", "-R", referenceFasta) main(arguments) } - @Test def testOutputFieldException = { + @Test + def testOutputFieldException = { val tmpFile = File.createTempFile("VCFWithVCf", ".vcf") tmpFile.deleteOnExit() - val args = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ:AC") + val args = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ:AC", "-R", referenceFasta) an[IllegalArgumentException] should be thrownBy main(args) val thrown = the[IllegalArgumentException] thrownBy main(args) thrown.getMessage should equal("Field 'AC' already exists in input vcf") } - @Test def testInputFieldException = { + @Test + def testInputFieldException = { val tmpFile = File.createTempFile("VCFWithVCf", ".vcf") tmpFile.deleteOnExit() - val args = Array("-I", unveppedPath, "-s", unveppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ:NEW_CSQ") + val args = Array("-I", unveppedPath, "-s", unveppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ:NEW_CSQ", "-R", referenceFasta) an[IllegalArgumentException] should be thrownBy main(args) val thrown = the[IllegalArgumentException] thrownBy main(args) thrown.getMessage should equal("Field 'CSQ' does not exist in secondary vcf") } - @Test def testMinMethodException = { + @Test + def testMinMethodException = { val tmpFile = File.createTempFile("VcfWithVcf_", ".vcf") tmpFile.deleteOnExit() - val args = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ:CSQ:min") + val args = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ:CSQ:min", "-R", referenceFasta) an[IllegalArgumentException] should be thrownBy main(args) val thrown = the[IllegalArgumentException] thrownBy main(args) thrown.getMessage should equal("Type of field CSQ is not numeric") } - @Test def testMaxMethodException = { + @Test + def testMaxMethodException = { val tmpFile = File.createTempFile("VcfWithVcf_", ".vcf") tmpFile.deleteOnExit() - val args = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ:CSQ:max") + val args = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ:CSQ:max", "-R", referenceFasta) an[IllegalArgumentException] should be thrownBy main(args) val thrown = the[IllegalArgumentException] thrownBy main(args) thrown.getMessage should equal("Type of field CSQ is not numeric") @@ -162,7 +170,8 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers { } - @Test def testGetSecondaryRecords = { + @Test + def testGetSecondaryRecords = { val unvepRecord = new VCFFileReader(new File(unveppedPath)).iterator().next() val vepReader = new VCFFileReader(new File(veppedPath)) val vepRecord = vepReader.iterator().next() @@ -172,7 +181,8 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers { secRec.foreach(x => identicalVariantContext(x, vepRecord) shouldBe true) } - @Test def testCreateRecord = { + @Test + def testCreateRecord = { val unvepRecord = new VCFFileReader(new File(unveppedPath)).iterator().next() val vepReader = new VCFFileReader(new File(veppedPath)) val header = vepReader.getFileHeader