Skip to content
Snippets Groups Projects
Commit 8ad8f6c9 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Merge branch 'develop' into feature-centrifuge_pipe

parents ff5e56e1 0cea2b66
No related branches found
No related tags found
No related merge requests found
Showing
with 290 additions and 16 deletions
...@@ -38,6 +38,8 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript => ...@@ -38,6 +38,8 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
if (config.contains("output_dir", path = Nil)) config("output_dir", path = Nil).asFile if (config.contains("output_dir", path = Nil)) config("output_dir", path = Nil).asFile
else new File(".") else new File(".")
} }
require(outputDir.canRead, s"No premision to read outputdir: $outputDir")
require(outputDir.canWrite, s"No premision to read outputdir: $outputDir")
@Argument(doc = "Disable all scatters", shortName = "DSC", required = false) @Argument(doc = "Disable all scatters", shortName = "DSC", required = false)
var disableScatter: Boolean = false var disableScatter: Boolean = false
...@@ -67,6 +69,9 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript => ...@@ -67,6 +69,9 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
final def script() { final def script() {
outputDir = config("output_dir") outputDir = config("output_dir")
outputDir = outputDir.getAbsoluteFile outputDir = outputDir.getAbsoluteFile
BiopetQScript.checkOutputDir(outputDir)
init() init()
biopetScript() biopetScript()
logger.info("Biopet script done") logger.info("Biopet script done")
...@@ -153,4 +158,13 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript => ...@@ -153,4 +158,13 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
object BiopetQScript { object BiopetQScript {
case class InputFile(file: File, md5: Option[String] = None) case class InputFile(file: File, md5: Option[String] = None)
def checkOutputDir(outputDir: File): Unit = {
// Sanity checks
require(outputDir.getParentFile.canRead, s"No premision to read parent of outputdir: ${outputDir.getParentFile}")
require(outputDir.getParentFile.canWrite, s"No premision to write parent of outputdir: ${outputDir.getParentFile}")
outputDir.mkdir()
require(outputDir.canRead, s"No premision to read outputdir: $outputDir")
require(outputDir.canWrite, s"No premision to write outputdir: $outputDir")
}
} }
...@@ -77,6 +77,7 @@ trait PipelineCommand extends MainCommand with GatkLogging with ImplicitConversi ...@@ -77,6 +77,7 @@ trait PipelineCommand extends MainCommand with GatkLogging with ImplicitConversi
val pipelineName = this.getClass.getSimpleName.toLowerCase.split("""\$""").head val pipelineName = this.getClass.getSimpleName.toLowerCase.split("""\$""").head
val pipelineConfig = globalConfig.map.getOrElse(pipelineName, Map()).asInstanceOf[Map[String, Any]] val pipelineConfig = globalConfig.map.getOrElse(pipelineName, Map()).asInstanceOf[Map[String, Any]]
val pipelineOutputDir = new File(globalConfig.map.getOrElse("output_dir", pipelineConfig.getOrElse("output_dir", "./")).toString) val pipelineOutputDir = new File(globalConfig.map.getOrElse("output_dir", pipelineConfig.getOrElse("output_dir", "./")).toString)
BiopetQScript.checkOutputDir(pipelineOutputDir)
val logDir: File = new File(pipelineOutputDir, ".log") val logDir: File = new File(pipelineOutputDir, ".log")
logDir.mkdirs() logDir.mkdirs()
new File(logDir, "biopet." + BiopetQCommandLine.timestamp + ".log") new File(logDir, "biopet." + BiopetQCommandLine.timestamp + ".log")
......
...@@ -17,7 +17,6 @@ package nl.lumc.sasc.biopet.tools ...@@ -17,7 +17,6 @@ package nl.lumc.sasc.biopet.tools
import java.io.File import java.io.File
import java.util import java.util
import htsjdk.samtools.reference.FastaSequenceFile
import htsjdk.variant.variantcontext.{ VariantContext, VariantContextBuilder } import htsjdk.variant.variantcontext.{ VariantContext, VariantContextBuilder }
import htsjdk.variant.variantcontext.writer.{ AsyncVariantContextWriter, VariantContextWriterBuilder } import htsjdk.variant.variantcontext.writer.{ AsyncVariantContextWriter, VariantContextWriterBuilder }
import htsjdk.variant.vcf._ import htsjdk.variant.vcf._
...@@ -66,7 +65,10 @@ object VcfWithVcf extends ToolCommand { ...@@ -66,7 +65,10 @@ object VcfWithVcf extends ToolCommand {
else c.copy(fields = Fields(x, x) :: c.fields) else c.copy(fields = Fields(x, x) :: c.fields)
} text """| If only <field> is given, the field's identifier in the output VCF will be identical to <field>. } text """| If only <field> is given, the field's identifier in the output VCF will be identical to <field>.
| By default we will return all values found for a given field. | By default we will return all values found for a given field.
| With <method> the values will processed after getting it from the secondary VCF file, posible methods are: | For INFO fields with type R or A we will take the respective alleles present in the input file.
| If a <method> is supplied, a method will be applied over the contents of the field.
| In this case, all values will be considered.
| The following methods are available:
| - max : takes maximum of found value, only works for numeric (integer/float) fields | - max : takes maximum of found value, only works for numeric (integer/float) fields
| - min : takes minimum of found value, only works for numeric (integer/float) fields | - min : takes minimum of found value, only works for numeric (integer/float) fields
| - unique: takes only unique values """.stripMargin | - unique: takes only unique values """.stripMargin
...@@ -126,7 +128,7 @@ object VcfWithVcf extends ToolCommand { ...@@ -126,7 +128,7 @@ object VcfWithVcf extends ToolCommand {
require(vcfDict.getSequence(record.getContig) != null, s"Contig ${record.getContig} does not exist on reference") require(vcfDict.getSequence(record.getContig) != null, s"Contig ${record.getContig} does not exist on reference")
val secondaryRecords = getSecondaryRecords(secondaryReader, record, commandArgs.matchAllele) val secondaryRecords = getSecondaryRecords(secondaryReader, record, commandArgs.matchAllele)
val fieldMap = createFieldMap(commandArgs.fields, secondaryRecords) val fieldMap = createFieldMap(commandArgs.fields, record, secondaryRecords, secondHeader)
writer.add(createRecord(fieldMap, record, commandArgs.fields, header)) writer.add(createRecord(fieldMap, record, commandArgs.fields, header))
...@@ -147,17 +149,19 @@ object VcfWithVcf extends ToolCommand { ...@@ -147,17 +149,19 @@ object VcfWithVcf extends ToolCommand {
/** /**
* Create Map of field -> List of attributes in secondary records * Create Map of field -> List of attributes in secondary records
* @param fields List of Field * @param fields List of Field
* @param record Original record
* @param secondaryRecords List of VariantContext with secondary records * @param secondaryRecords List of VariantContext with secondary records
* @param header: header of secondary reader
* @return Map of fields and their values in secondary records * @return Map of fields and their values in secondary records
*/ */
def createFieldMap(fields: List[Fields], secondaryRecords: List[VariantContext]): Map[String, List[Any]] = { def createFieldMap(fields: List[Fields], record: VariantContext, secondaryRecords: List[VariantContext], header: VCFHeader): Map[String, List[Any]] = {
val fieldMap = (for ( val fieldMap = (for (
f <- fields if secondaryRecords.exists(_.hasAttribute(f.inputField)) f <- fields if secondaryRecords.exists(_.hasAttribute(f.inputField))
) yield { ) yield {
f.outputField -> (for ( f.outputField -> (for (
secondRecord <- secondaryRecords if secondRecord.hasAttribute(f.inputField) secondRecord <- secondaryRecords if secondRecord.hasAttribute(f.inputField)
) yield { ) yield {
secondRecord.getAttribute(f.inputField) match { getSecondaryField(record, secondRecord, f.inputField, header) match {
case l: List[_] => l case l: List[_] => l
case y: util.ArrayList[_] => y.toList case y: util.ArrayList[_] => y.toList
case x => List(x) case x => List(x)
...@@ -207,4 +211,53 @@ object VcfWithVcf extends ToolCommand { ...@@ -207,4 +211,53 @@ object VcfWithVcf extends ToolCommand {
}) })
}).make() }).make()
} }
/**
* Get the proper representation of a field from a secondary record given an original record
* @param record original record
* @param secondaryRecord secondary record
* @param field field
* @param header header of secondary record
* @return
*/
def getSecondaryField(record: VariantContext, secondaryRecord: VariantContext, field: String, header: VCFHeader): Any = {
header.getInfoHeaderLine(field).getCountType match {
case VCFHeaderLineCount.A => numberA(record, secondaryRecord, field)
case VCFHeaderLineCount.R => numberR(record, secondaryRecord, field)
case _ => secondaryRecord.getAttribute(field)
}
}
/**
* Get the correct values from a field that has number=A
* @param referenceRecord the reference record
* @param annotateRecord the to-be-annotated record
* @param field the field to annotate
* @return
*/
def numberA(referenceRecord: VariantContext, annotateRecord: VariantContext, field: String): List[Any] = {
val refValues = referenceRecord.getAttributeAsList(field).toArray
annotateRecord.
getAlternateAlleles.filter(referenceRecord.hasAlternateAllele).
map(x => referenceRecord.getAlternateAlleles.indexOf(x)).
flatMap(x => refValues.lift(x)).
toList
}
/**
* Get the correct values from a field that has number=R
* @param referenceRecord the reference record
* @param annotateRecord the to-be-annotated record
* @param field the field to annotate
* @return
*/
def numberR(referenceRecord: VariantContext, annotateRecord: VariantContext, field: String): List[Any] = {
val refValues = referenceRecord.getAttributeAsList(field).toArray
annotateRecord.
getAlleles.
filter(referenceRecord.hasAllele).
map(x => referenceRecord.getAlleles.indexOf(x)).
flatMap(x => refValues.lift(x)).
toList
}
} }
##fileformat=VCFv4.2
##INFO=<ID=DN,Number=1,Type=Integer,Description="inDbSNP">
##INFO=<ID=DT,Number=0,Type=Flag,Description="in1000Genomes">
##INFO=<ID=DA,Number=1,Type=String,Description="allelesDBSNP">
##INFO=<ID=FG,Number=.,Type=String,Description="functionGVS">
##INFO=<ID=FD,Number=.,Type=String,Description="functionDBSNP">
##INFO=<ID=GM,Number=.,Type=String,Description="accession">
##INFO=<ID=GL,Number=.,Type=String,Description="geneList">
##INFO=<ID=AAC,Number=.,Type=String,Description="aminoAcids">
##INFO=<ID=PP,Number=.,Type=String,Description="proteinPosition">
##INFO=<ID=CDP,Number=.,Type=String,Description="cDNAPosition">
##INFO=<ID=PH,Number=.,Type=String,Description="polyPhen">
##INFO=<ID=CP,Number=1,Type=String,Description="scorePhastCons">
##INFO=<ID=CG,Number=1,Type=String,Description="consScoreGERP">
##INFO=<ID=AA,Number=1,Type=String,Description="chimpAllele">
##INFO=<ID=CN,Number=.,Type=String,Description="CNV">
##INFO=<ID=HA,Number=1,Type=String,Description="AfricanHapMapFreq">
##INFO=<ID=HE,Number=1,Type=String,Description="EuropeanHapMapFreq">
##INFO=<ID=HC,Number=1,Type=String,Description="AsianHapMapFreq">
##INFO=<ID=DG,Number=0,Type=Flag,Description="hasGenotypes">
##INFO=<ID=DV,Number=.,Type=String,Description="dbSNPValidation">
##INFO=<ID=RM,Number=.,Type=String,Description="repeatMasker">
##INFO=<ID=RT,Number=.,Type=String,Description="tandemRepeat">
##INFO=<ID=CA,Number=0,Type=Flag,Description="clinicalAssociation">
##INFO=<ID=DSP,Number=1,Type=Integer,Description="distanceToSplice">
##INFO=<ID=GS,Number=.,Type=String,Description="granthamScore">
##INFO=<ID=MR,Number=.,Type=String,Description="microRNAs">
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities">
##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP Membership">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">
##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">
##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of Reads Containing Spanning Deletions">
##INFO=<ID=END,Number=1,Type=Integer,Description="Stop position of the interval">
##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias">
##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes">
##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation">
##INFO=<ID=MLEAC,Number=A,Type=Integer,Description="Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed">
##INFO=<ID=MLEAF,Number=A,Type=Float,Description="Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed">
##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads">
##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities">
##INFO=<ID=NEGATIVE_TRAIN_SITE,Number=0,Type=Flag,Description="This variant was used to build the negative training set of bad variants">
##INFO=<ID=POSITIVE_TRAIN_SITE,Number=0,Type=Flag,Description="This variant was used to build the positive training set of good variants">
##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">
##INFO=<ID=RPA,Number=.,Type=Integer,Description="Number of times tandem repeat unit is repeated, for each allele (including reference)">
##INFO=<ID=RU,Number=1,Type=String,Description="Tandem repeat unit (bases)">
##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias">
##INFO=<ID=STR,Number=0,Type=Flag,Description="Variant is a short tandem repeat">
##INFO=<ID=VQSLOD,Number=1,Type=Float,Description="Log odds ratio of being a true variant versus being false under the trained gaussian mixture model">
##INFO=<ID=culprit,Number=1,Type=String,Description="The annotation which was the worst performing in the Gaussian mixture model, likely the reason why the variant was filtered out">
##INFO=<ID=ClippingRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref number of hard clipped bases">
##INFO=<ID=GATKCaller,Number=.,Type=String,Description="GATK variant caller used to call the variant">
##INFO=<ID=PartOfCompound,Number=.,Type=String,Description="Whether the record was originally part of a record containing compound variants">
##INFO=<ID=ALL_ALLELE,Number=R,Type=String,Description="A field with number R">
##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
##FILTER=<ID=LowQual,Description="Low quality">
##FILTER=<ID=VQSRTrancheINDEL99.00to99.90,Description="Truth sensitivity tranche level for INDEL model at VQS Lod: -1.4714 <= x < -0.3324">
##FILTER=<ID=VQSRTrancheINDEL99.90to100.00+,Description="Truth sensitivity tranche level for INDEL model at VQS Lod < -6.093">
##FILTER=<ID=VQSRTrancheINDEL99.90to100.00,Description="Truth sensitivity tranche level for INDEL model at VQS Lod: -6.093 <= x < -1.4714">
##FILTER=<ID=VQSRTrancheSNP99.00to99.90,Description="Truth sensitivity tranche level for SNP model at VQS Lod: -4.8126 <= x < 0.2264">
##FILTER=<ID=VQSRTrancheSNP99.90to100.00+,Description="Truth sensitivity tranche level for SNP model at VQS Lod < -39474.9285">
##FILTER=<ID=VQSRTrancheSNP99.90to100.00,Description="Truth sensitivity tranche level for SNP model at VQS Lod: -39474.9285 <= x < -4.8126">
##FILTER=<ID=TooHigh1000GAF,Description="Allele frequency in 1000G is more than 5%">
##FILTER=<ID=TooHighGoNLAF,Description="Allele frequency in 1000G is more than 5%">
##FILTER=<ID=IndexNotCalled,Description="Position in index sample is not called">
##FILTER=<ID=IndexIsVariant,Description="Index call is a variant">
##FILTER=<ID=InArtificialChrom,Description="Variant found in an artificial chromosome">
##FILTER=<ID=IsIntergenic,Description="Variant found in intergenic region">
##contig=<ID=chrQ,length=16571>
##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence type as predicted by VEP. Format: Allele|Gene|Feature|Feature_type|Consequence|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|AA_MAF|EA_MAF|ALLELE_NUM|DISTANCE|STRAND|CLIN_SIG|SYMBOL|SYMBOL_SOURCE|GMAF|HGVSc|HGVSp|AFR_MAF|AMR_MAF|ASN_MAF|EUR_MAF|PUBMED">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample_101
chrQ 1042 rs199537431 C A 1541.12 PASS AF=0.333;ALL_ALLELE=C,A GT:AD:DP:GQ:PL 1/2:24,21:45:99:838,0,889
File added
File added
##fileformat=VCFv4.2
##INFO=<ID=DN,Number=1,Type=Integer,Description="inDbSNP">
##INFO=<ID=DT,Number=0,Type=Flag,Description="in1000Genomes">
##INFO=<ID=DA,Number=1,Type=String,Description="allelesDBSNP">
##INFO=<ID=FG,Number=.,Type=String,Description="functionGVS">
##INFO=<ID=FD,Number=.,Type=String,Description="functionDBSNP">
##INFO=<ID=GM,Number=.,Type=String,Description="accession">
##INFO=<ID=GL,Number=.,Type=String,Description="geneList">
##INFO=<ID=AAC,Number=.,Type=String,Description="aminoAcids">
##INFO=<ID=PP,Number=.,Type=String,Description="proteinPosition">
##INFO=<ID=CDP,Number=.,Type=String,Description="cDNAPosition">
##INFO=<ID=PH,Number=.,Type=String,Description="polyPhen">
##INFO=<ID=CP,Number=1,Type=String,Description="scorePhastCons">
##INFO=<ID=CG,Number=1,Type=String,Description="consScoreGERP">
##INFO=<ID=AA,Number=1,Type=String,Description="chimpAllele">
##INFO=<ID=CN,Number=.,Type=String,Description="CNV">
##INFO=<ID=HA,Number=1,Type=String,Description="AfricanHapMapFreq">
##INFO=<ID=HE,Number=1,Type=String,Description="EuropeanHapMapFreq">
##INFO=<ID=HC,Number=1,Type=String,Description="AsianHapMapFreq">
##INFO=<ID=DG,Number=0,Type=Flag,Description="hasGenotypes">
##INFO=<ID=DV,Number=.,Type=String,Description="dbSNPValidation">
##INFO=<ID=RM,Number=.,Type=String,Description="repeatMasker">
##INFO=<ID=RT,Number=.,Type=String,Description="tandemRepeat">
##INFO=<ID=CA,Number=0,Type=Flag,Description="clinicalAssociation">
##INFO=<ID=DSP,Number=1,Type=Integer,Description="distanceToSplice">
##INFO=<ID=GS,Number=.,Type=String,Description="granthamScore">
##INFO=<ID=MR,Number=.,Type=String,Description="microRNAs">
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities">
##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP Membership">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">
##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">
##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of Reads Containing Spanning Deletions">
##INFO=<ID=END,Number=1,Type=Integer,Description="Stop position of the interval">
##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias">
##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes">
##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation">
##INFO=<ID=MLEAC,Number=A,Type=Integer,Description="Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed">
##INFO=<ID=MLEAF,Number=A,Type=Float,Description="Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed">
##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads">
##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities">
##INFO=<ID=NEGATIVE_TRAIN_SITE,Number=0,Type=Flag,Description="This variant was used to build the negative training set of bad variants">
##INFO=<ID=POSITIVE_TRAIN_SITE,Number=0,Type=Flag,Description="This variant was used to build the positive training set of good variants">
##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">
##INFO=<ID=RPA,Number=.,Type=Integer,Description="Number of times tandem repeat unit is repeated, for each allele (including reference)">
##INFO=<ID=RU,Number=1,Type=String,Description="Tandem repeat unit (bases)">
##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias">
##INFO=<ID=STR,Number=0,Type=Flag,Description="Variant is a short tandem repeat">
##INFO=<ID=VQSLOD,Number=1,Type=Float,Description="Log odds ratio of being a true variant versus being false under the trained gaussian mixture model">
##INFO=<ID=culprit,Number=1,Type=String,Description="The annotation which was the worst performing in the Gaussian mixture model, likely the reason why the variant was filtered out">
##INFO=<ID=ClippingRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref number of hard clipped bases">
##INFO=<ID=GATKCaller,Number=.,Type=String,Description="GATK variant caller used to call the variant">
##INFO=<ID=PartOfCompound,Number=.,Type=String,Description="Whether the record was originally part of a record containing compound variants">
##INFO=<ID=ALL_ALLELE,Number=R,Type=String,Description="A field with number R">
##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
##FILTER=<ID=LowQual,Description="Low quality">
##FILTER=<ID=VQSRTrancheINDEL99.00to99.90,Description="Truth sensitivity tranche level for INDEL model at VQS Lod: -1.4714 <= x < -0.3324">
##FILTER=<ID=VQSRTrancheINDEL99.90to100.00+,Description="Truth sensitivity tranche level for INDEL model at VQS Lod < -6.093">
##FILTER=<ID=VQSRTrancheINDEL99.90to100.00,Description="Truth sensitivity tranche level for INDEL model at VQS Lod: -6.093 <= x < -1.4714">
##FILTER=<ID=VQSRTrancheSNP99.00to99.90,Description="Truth sensitivity tranche level for SNP model at VQS Lod: -4.8126 <= x < 0.2264">
##FILTER=<ID=VQSRTrancheSNP99.90to100.00+,Description="Truth sensitivity tranche level for SNP model at VQS Lod < -39474.9285">
##FILTER=<ID=VQSRTrancheSNP99.90to100.00,Description="Truth sensitivity tranche level for SNP model at VQS Lod: -39474.9285 <= x < -4.8126">
##FILTER=<ID=TooHigh1000GAF,Description="Allele frequency in 1000G is more than 5%">
##FILTER=<ID=TooHighGoNLAF,Description="Allele frequency in 1000G is more than 5%">
##FILTER=<ID=IndexNotCalled,Description="Position in index sample is not called">
##FILTER=<ID=IndexIsVariant,Description="Index call is a variant">
##FILTER=<ID=InArtificialChrom,Description="Variant found in an artificial chromosome">
##FILTER=<ID=IsIntergenic,Description="Variant found in intergenic region">
##contig=<ID=chrQ,length=16571>
##INFO=<ID=CSQ,Number=.,Type=String,Description="Consequence type as predicted by VEP. Format: Allele|Gene|Feature|Feature_type|Consequence|cDNA_position|CDS_position|Protein_position|Amino_acids|Codons|Existing_variation|AA_MAF|EA_MAF|ALLELE_NUM|DISTANCE|STRAND|CLIN_SIG|SYMBOL|SYMBOL_SOURCE|GMAF|HGVSc|HGVSp|AFR_MAF|AMR_MAF|ASN_MAF|EUR_MAF|PUBMED">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT Sample_101
chrQ 1042 rs199537431 C A,T 1541.12 PASS AF=0.333,0.667;ALL_ALLELE=C,A,T GT:AD:DP:GQ:PL 1/2:24,21:45:99:838,0,889
File added
File added
...@@ -18,6 +18,7 @@ import java.io.File ...@@ -18,6 +18,7 @@ import java.io.File
import java.nio.file.Paths import java.nio.file.Paths
import java.util import java.util
import htsjdk.variant.vcf
import htsjdk.variant.vcf.VCFFileReader import htsjdk.variant.vcf.VCFFileReader
import org.scalatest.Matchers import org.scalatest.Matchers
import org.scalatest.mock.MockitoSugar import org.scalatest.mock.MockitoSugar
...@@ -26,7 +27,6 @@ import org.testng.annotations.Test ...@@ -26,7 +27,6 @@ import org.testng.annotations.Test
import scala.util.Random import scala.util.Random
import scala.collection.JavaConversions._ import scala.collection.JavaConversions._
import nl.lumc.sasc.biopet.utils.VcfUtils.identicalVariantContext import nl.lumc.sasc.biopet.utils.VcfUtils.identicalVariantContext
/** /**
...@@ -44,6 +44,8 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers { ...@@ -44,6 +44,8 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers {
val veppedPath = resourcePath("/VEP_oneline.vcf.gz") val veppedPath = resourcePath("/VEP_oneline.vcf.gz")
val unveppedPath = resourcePath("/unvep_online.vcf.gz") val unveppedPath = resourcePath("/unvep_online.vcf.gz")
val referenceFasta = resourcePath("/fake_chrQ.fa") val referenceFasta = resourcePath("/fake_chrQ.fa")
val monoPath = resourcePath("/chrQ_monoallelic.vcf.gz")
val multiPath = resourcePath("/chrQ_multiallelic.vcf.gz")
val rand = new Random() val rand = new Random()
@Test @Test
...@@ -71,7 +73,7 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers { ...@@ -71,7 +73,7 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers {
} }
@Test @Test
def testOutputFieldException = { def testOutputFieldException() = {
val tmpFile = File.createTempFile("VCFWithVCf", ".vcf") val tmpFile = File.createTempFile("VCFWithVCf", ".vcf")
tmpFile.deleteOnExit() tmpFile.deleteOnExit()
val args = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ:AC", "-R", referenceFasta) val args = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ:AC", "-R", referenceFasta)
...@@ -81,7 +83,7 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers { ...@@ -81,7 +83,7 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers {
} }
@Test @Test
def testInputFieldException = { def testInputFieldException() = {
val tmpFile = File.createTempFile("VCFWithVCf", ".vcf") val tmpFile = File.createTempFile("VCFWithVCf", ".vcf")
tmpFile.deleteOnExit() tmpFile.deleteOnExit()
val args = Array("-I", unveppedPath, "-s", unveppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ:NEW_CSQ", "-R", referenceFasta) val args = Array("-I", unveppedPath, "-s", unveppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ:NEW_CSQ", "-R", referenceFasta)
...@@ -91,7 +93,7 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers { ...@@ -91,7 +93,7 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers {
} }
@Test @Test
def testMinMethodException = { def testMinMethodException() = {
val tmpFile = File.createTempFile("VcfWithVcf_", ".vcf") val tmpFile = File.createTempFile("VcfWithVcf_", ".vcf")
tmpFile.deleteOnExit() tmpFile.deleteOnExit()
val args = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ:CSQ:min", "-R", referenceFasta) val args = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ:CSQ:min", "-R", referenceFasta)
...@@ -101,7 +103,7 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers { ...@@ -101,7 +103,7 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers {
} }
@Test @Test
def testMaxMethodException = { def testMaxMethodException() = {
val tmpFile = File.createTempFile("VcfWithVcf_", ".vcf") val tmpFile = File.createTempFile("VcfWithVcf_", ".vcf")
tmpFile.deleteOnExit() tmpFile.deleteOnExit()
val args = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ:CSQ:max", "-R", referenceFasta) val args = Array("-I", unveppedPath, "-s", veppedPath, "-o", tmpFile.getAbsolutePath, "-f", "CSQ:CSQ:max", "-R", referenceFasta)
...@@ -111,8 +113,10 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers { ...@@ -111,8 +113,10 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers {
} }
@Test @Test
def testFieldMap = { def testFieldMap() = {
val unvepRecord = new VCFFileReader(new File(unveppedPath)).iterator().next() val unvepReader = new VCFFileReader(new File(unveppedPath))
val header = unvepReader.getFileHeader
val unvepRecord = unvepReader.iterator().next()
var fields = List(new Fields("FG", "FG")) var fields = List(new Fields("FG", "FG"))
fields :::= List(new Fields("FD", "FD")) fields :::= List(new Fields("FD", "FD"))
...@@ -140,7 +144,7 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers { ...@@ -140,7 +144,7 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers {
fields :::= List(new Fields("VQSLOD", "VQSLOD")) fields :::= List(new Fields("VQSLOD", "VQSLOD"))
fields :::= List(new Fields("culprit", "culprit")) fields :::= List(new Fields("culprit", "culprit"))
val fieldMap = createFieldMap(fields, List(unvepRecord)) val fieldMap = createFieldMap(fields, unvepRecord, List(unvepRecord), header)
fieldMap("FG") shouldBe List("intron") fieldMap("FG") shouldBe List("intron")
fieldMap("FD") shouldBe List("unknown") fieldMap("FD") shouldBe List("unknown")
...@@ -170,7 +174,7 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers { ...@@ -170,7 +174,7 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers {
} }
@Test @Test
def testGetSecondaryRecords = { def testGetSecondaryRecords() = {
val unvepRecord = new VCFFileReader(new File(unveppedPath)).iterator().next() val unvepRecord = new VCFFileReader(new File(unveppedPath)).iterator().next()
val vepReader = new VCFFileReader(new File(veppedPath)) val vepReader = new VCFFileReader(new File(veppedPath))
val vepRecord = vepReader.iterator().next() val vepRecord = vepReader.iterator().next()
...@@ -181,7 +185,7 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers { ...@@ -181,7 +185,7 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers {
} }
@Test @Test
def testCreateRecord = { def testCreateRecord() = {
val unvepRecord = new VCFFileReader(new File(unveppedPath)).iterator().next() val unvepRecord = new VCFFileReader(new File(unveppedPath)).iterator().next()
val vepReader = new VCFFileReader(new File(veppedPath)) val vepReader = new VCFFileReader(new File(veppedPath))
val header = vepReader.getFileHeader val header = vepReader.getFileHeader
...@@ -189,9 +193,53 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers { ...@@ -189,9 +193,53 @@ class VcfWithVcfTest extends TestNGSuite with MockitoSugar with Matchers {
val secRec = getSecondaryRecords(vepReader, unvepRecord, false) val secRec = getSecondaryRecords(vepReader, unvepRecord, false)
val fieldMap = createFieldMap(List(new Fields("CSQ", "CSQ")), secRec) val fieldMap = createFieldMap(List(new Fields("CSQ", "CSQ")), vepRecord, secRec, header)
val createdRecord = createRecord(fieldMap, unvepRecord, List(new Fields("CSQ", "CSQ")), header) val createdRecord = createRecord(fieldMap, unvepRecord, List(new Fields("CSQ", "CSQ")), header)
identicalVariantContext(createdRecord, vepRecord) shouldBe true identicalVariantContext(createdRecord, vepRecord) shouldBe true
} }
@Test
def testNumberA() = {
val multiRecord = new VCFFileReader(new File(multiPath)).iterator().next()
val monoRecord = new VCFFileReader(new File(monoPath)).iterator().next()
val annot = numberA(multiRecord, monoRecord, "AF")
annot shouldBe List("0.333")
}
@Test
def testNumberR() = {
val multiRecord = new VCFFileReader(new File(multiPath)).iterator().next()
val monoRecord = new VCFFileReader(new File(monoPath)).iterator().next()
val annot = numberR(multiRecord, monoRecord, "ALL_ALLELE")
annot shouldBe List("C", "A")
}
@Test
def testNumberAOutput() = {
val tmpFile = File.createTempFile("numberA", ".vcf.gz")
tmpFile.deleteOnExit()
val arguments = Array("-I", monoPath, "-s", multiPath, "-o", tmpFile.getAbsolutePath, "-f", "AF:MULTI_AF", "-R", referenceFasta)
main(arguments)
val annotatedRecord = new VCFFileReader(tmpFile).iterator().next()
annotatedRecord.getAttribute("MULTI_AF").toString shouldBe "0.333"
}
@Test
def testNumberROutput() = {
val tmpFile = File.createTempFile("numberR", ".vcf.gz")
tmpFile.deleteOnExit()
val arguments = Array("-I", monoPath, "-s", multiPath, "-o", tmpFile.getAbsolutePath, "-f", "ALL_ALLELE:MULTI_ALL_ALLELE", "-R", referenceFasta)
main(arguments)
val annotatedRecord = new VCFFileReader(tmpFile).iterator().next()
annotatedRecord.getAttribute("MULTI_ALL_ALLELE") match {
case l: List[_] => l shouldBe List("C", "A")
case u: util.ArrayList[_] => u.toList shouldBe List("C", "A")
case _ => throw new IllegalStateException("Not a list")
}
}
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment