Skip to content
Snippets Groups Projects
Commit 966ce478 authored by Sander Bollen's avatar Sander Bollen
Browse files

Removed VCFFreqAnnotator (going to be vcfwithvcf annotator in own branch)

parent 9e3f92bc
No related branches found
No related tags found
No related merge requests found
package nl.lumc.sasc.biopet.tools
import scala.collection.JavaConversions._
import nl.lumc.sasc.biopet.core.ToolCommand
import nl.lumc.sasc.biopet.utils.ConfigUtils.jsonToMap
import java.io.File
import argonaut._, Argonaut._
import scalaz._, Scalaz._
import htsjdk.variant.variantcontext.VariantContext
import htsjdk.variant.vcf.VCFFileReader
/**
* This tool annotates frequencies of variants in the input VCF with information from several sources
* The path to these sources has to be supplied as a json file containing all paths and names of sources
* Supported sources: VCF, tabix-indexed tsv, and BigWig directory structure
* Annotation from Varda variation database is done in a different tool
* Created by ahbbollen on 12/8/14.
*/
object VCFFreqAnnotator extends ToolCommand {
def main(args: Array[String]): Unit = {
val commandArgs: Args = new OptParser()
.parse(args, Args())
.getOrElse(sys.exit(1))
val inputvcf = commandArgs.inputVCF
val outputvcf = commandArgs.inputVCF
val sourcesjson = commandArgs.sourcesJSON
logger.info("Starting VCFFreqAnnotator with following flags:")
logger.info(s"""Input VCF - $inputvcf""")
logger.info(s"""Output VCF - $outputvcf""")
logger.info(s"""Sources JSON - $sourcesjson""")
val jsondict = sourcesJsonToMap(sourcesjson)
}
/**
* This function parses a json file to a scala map
* @param json a File object to json file
* @return a scala map
*/
def sourcesJsonToMap(json: File): Map[String, Any] = {
val jsontext = scala.io.Source.fromFile(json).mkString
val json_obj = Parse.parseOption(jsontext)
if (json_obj == None) {
throw new IllegalStateException("The sources JSON file is either not properly formatted or not a JSON file")
}
jsonToMap(json_obj.get)
}
/**
* This function takes a VariantContext and annotates it with frequency of sources
* @param vc input VariantContext
* @param sources input sources as in Map( column_name -> vcf_reader )
* @return Attribute map of variant context with new annotations
*/
def fetchAnnotations(vc: VariantContext, sources: Map[String, Any]): Map[String, Any] = {
val dummy: Map[String, Any] = Map("a" -> "a")
return dummy
}
/**
* This function takes a VariantContext and returns the frequency of its variant in source
* @param vc input VariantContext
* @param source VCFFileReader source
* @param column Column name in source containing allele frequency
* @return Double with frequency
*/
def fetchVCFFrequency(vc: VariantContext, source: VCFFileReader, column: String): Double = {
if (vc.isVariant) {
val source_vcs = source.query(vc.getChr, vc.getStart, vc.getEnd)
var default = 0.0
for (svc <- source_vcs) {
val s1 = Set(vc.getAlternateAlleles.map(x => x.toString))
val s2 = Set(svc.getAlternateAlleles.map(x => x.toString))
if (vc.getReference.toString == svc.getReference.toString && s1 == s2) {
val freq = svc.getAttribute(column)
if (freq != None) {
return freq.asInstanceOf[Double]
}
}
}
return default
} else {
return 0.0
}
}
/**
* This function takes a VariantContext and returns IDs found for this variant in source
* This is useful for VCFs that provide IDs in stead or in addition to frequency data
* Returns "unknown" by default
* @param vc input VariantContext
* @param source source VCFFileReader
* @param column the column containing the ID
* @return a String of the ID (e.g. rs000001)
*/
def fetchVCFIDs(vc: VariantContext, source: VCFFileReader, column: String): String = {
var value = "unknown"
if (vc.isVariant) {
val source_vcs = source.query(vc.getChr, vc.getStart, vc.getEnd)
for (svc <- source_vcs) {
val s1 = Set(vc.getAlternateAlleles.map(x => x.toString))
val s2 = Set(svc.getAlternateAlleles.map(x => x.toString))
if (vc.getReference == svc.getReference && s1 == s2) {
val id = svc.getAttribute(column)
if (id != None) {
return id.asInstanceOf[String]
}
}
}
} else {
value = "unknown"
}
value
}
case class Args(inputVCF: File = null,
outputVCF: File = null,
sourcesJSON: File = null) extends AbstractArgs
class OptParser extends AbstractOptParser {
head(s"""$commandName - Annotate input VCF with frequency information from various sources""")
opt[File]('I', "InputFile") required () valueName "<vcf>" action { (x, c) =>
c.copy(inputVCF = x)
} validate {
x => if (x.exists) success else failure("Input VCF not found")
} text "Input VCF file"
opt[File]('j', "json") required () valueName "<json>" action { (x, c) =>
c.copy(sourcesJSON = x)
} validate {
x => if (x.exists) success else failure("Sources JSON not found")
} text "Sources JSON"
opt[File]('O', "OutputFile") required () valueName "<vcf>" action { (x, c) =>
c.copy(outputVCF = x)
} validate {
x => if (x.exists) success else success
} text "Output VCF"
}
}
......@@ -45,6 +45,5 @@ object BiopetExecutablePublic extends BiopetExecutable {
nl.lumc.sasc.biopet.tools.MergeAlleles,
nl.lumc.sasc.biopet.tools.SamplesTsvToJson,
nl.lumc.sasc.biopet.tools.VEPNormalizer,
nl.lumc.sasc.biopet.tools.VCFFreqAnnotator,
nl.lumc.sasc.biopet.tools.AnnotateVcfWithBed)
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment