Commit 8ecb7f41 authored by Peter van 't Hof's avatar Peter van 't Hof

Added contig map for dbsnp

parent 6ae5046c
package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.core.{BiopetCommandLineFunction, Version}
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{Input, Output}
import scala.util.matching.Regex
/**
* Created by pjvanthof on 18/05/16.
*/
class Sed(val root: Configurable) extends BiopetCommandLineFunction with Version {
executable = config("exe", default = "sed", freeVar = false)
/** Command to get version of executable */
override def versionCommand: String = executable + " --version"
/** Regex to get version from version command output */
override def versionRegex: Regex = """sed (GNU sed) \d+.\d+.\d+""".r
@Input(required = false)
var inputFile: File = _
@Output
var outputFile: File = _
var expressions: List[String] = Nil
def cmdLine = executable +
repeat("-e", expressions) +
(if (inputAsStdin) "" else required(inputFile)) +
(if (outputAsStsout) "" else " > " + required(outputFile))
}
......@@ -15,7 +15,6 @@
*/
package nl.lumc.sasc.biopet.pipelines.generateindexes
import java.io.{ File, PrintWriter }
import java.util
import nl.lumc.sasc.biopet.core.extensions.Md5sum
......@@ -164,25 +163,41 @@ class GenerateIndexes(val root: Configurable) extends QScript with BiopetQScript
}
genomeConfig.get("dbsnp_vcf_uri").foreach { dbsnpUri =>
val contigMap = genomeConfig.get("dbsnp_contig_map").map(_.asInstanceOf[Map[String, Any]])
val contigSed = contigMap.map { map =>
val sed = new Sed(this)
sed.expressions = map.map(x => s"""s/^${x._1}\t/${x._2}\t/""").toList
sed
}
val cv = new CombineVariants(this)
cv.reference_sequence = fastaFile
cv.deps ::= createDict.output
def addDownload(uri: String): Unit = {
val isZipped = uri.endsWith(".gz")
val output = new File(annotationDir, new File(uri).getName + (if (isZipped) "" else ".gz"))
val curl = new Curl(this)
curl.url = uri
curl.output = new File(annotationDir, new File(curl.url).getName)
curl.isIntermediate = true
add(curl)
cv.variant :+= curl.output
val downloadCmd = (isZipped, contigSed) match {
case (true, Some(sed)) => curl | Zcat(this) | sed | new Bgzip(this) > output
case (false, Some(sed)) => curl | sed | new Bgzip(this) > output
case (true, None) => curl > output
case (false, None) => curl | new Bgzip(this) > output
}
downloadCmd.isIntermediate = true
add(downloadCmd)
if (curl.output.getName.endsWith(".vcf.gz")) {
val tabix = new Tabix(this)
tabix.input = curl.output
tabix.input = output
tabix.p = Some("vcf")
tabix.isIntermediate = true
add(tabix)
configDeps :+= tabix.outputIndex
}
cv.variant :+= output
}
dbsnpUri match {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment