Skip to content
Snippets Groups Projects
Commit 03770ca1 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Added vep and multiple dbsnp files

parent f19ee689
No related branches found
No related tags found
No related merge requests found
package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output }
/**
* Created by pjvan_thof on 8/11/15.
*/
class TarExtract(val root: Configurable) extends BiopetCommandLineFunction {
@Input(required = true)
var inputTar: File = _
@Argument(required = true)
var outputDir: File = _
executable = config("exe", default = "tar", freeVar = false)
override def versionCommand = executable + " --version"
override def versionRegex = """tar \(GNU tar\) (.*)""".r
override def beforeGraph: Unit = {
super.beforeGraph
jobLocalDir = outputDir
jobOutputFile = new File(outputDir, "." + inputTar.getName + ".tar.out")
}
def cmdLine: String = required(executable) +
required("-x") +
required("-f", inputTar)
}
......@@ -16,16 +16,19 @@
package nl.lumc.sasc.biopet.pipelines
import java.io.PrintWriter
import java.util
import nl.lumc.sasc.biopet.core.{ PipelineCommand, BiopetQScript }
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.extensions.bwa.BwaIndex
import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants
import nl.lumc.sasc.biopet.extensions.gmap.GmapBuild
import nl.lumc.sasc.biopet.extensions.picard.CreateSequenceDictionary
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsFaidx
import nl.lumc.sasc.biopet.extensions._
import nl.lumc.sasc.biopet.utils.ConfigUtils
import org.broadinstitute.gatk.queue.QScript
import scala.collection.JavaConversions._
class GenerateIndexes(val root: Configurable) extends QScript with BiopetQScript {
def this() = this(null)
......@@ -95,20 +98,52 @@ class GenerateIndexes(val root: Configurable) extends QScript with BiopetQScript
val annotationDir = new File(genomeDir, "annotation")
genomeConfig.get("vep_cache_uri").foreach { vepCacheUri =>
//TODO: add VEP download and extraction
}
genomeConfig.get("dbsnp_uri").foreach { dbsnpUri =>
val vepDir = new File(annotationDir, "vep")
val curl = new Curl(this)
curl.url = dbsnpUri.toString
curl.output = new File(annotationDir, new File(dbsnpUri.toString).getName)
curl.url = vepCacheUri.toString
curl.output = new File(vepDir, new File(curl.url).getName)
add(curl)
outputConfig += "dbsnp" -> curl.output.getAbsolutePath
val tabix = new Tabix(this)
tabix.input = curl.output
tabix.p = Some("vcf")
add(tabix)
val tar = new TarExtract(this)
tar.inputTar = curl.output
tar.outputDir = vepDir
add(tar)
val regex = """.*\/(.*)_vep_(\d*)_(.*)\.tar\.gz""".r
vepCacheUri.toString match {
case regex(species, version, assembly) if (version.forall(_.isDigit)) => {
outputConfig ++= Map("varianteffectpredictor" -> Map(
"species" -> species,
"assembly" -> assembly,
"cache_version" -> version.toInt,
"cache" -> vepDir,
"fasta" -> createLinks(vepDir)))
}
case _ => throw new IllegalArgumentException("Cache found but no version was found")
}
}
genomeConfig.get("dbsnp_vcf_uri").foreach { dbsnpUri =>
val cv = new CombineVariants(this)
cv.reference = fastaFile
cv.deps ::= createDict.output
def addDownload(uri: String): Unit = {
val curl = new Curl(this)
curl.url = uri
curl.output = new File(annotationDir, new File(curl.url).getName)
curl.isIntermediate = true
add(curl)
cv.inputFiles ::= curl.output
}
dbsnpUri match {
case l: Traversable[_] => l.foreach(x => addDownload(x.toString))
case l: util.ArrayList[_] => l.foreach(x => addDownload(x.toString))
case _ => addDownload(dbsnpUri.toString)
}
cv.outputFile = new File(annotationDir, "dbsnp.vcf.gz")
add(cv)
}
// Bwa index
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment