Commit b6fe015a authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Added conifer as method

parent 95536e65
......@@ -21,7 +21,7 @@ import nl.lumc.sasc.biopet.core.extensions.PythonCommandLineFunction
abstract class Conifer extends PythonCommandLineFunction with Version {
override def subPath = "conifer" :: super.subPath
// executable = config("exe", default = "conifer")
setPythonScript(config("script", default = "conifer"))
setPythonScript(config("script", default = "conifer.py", namespace = "conifer"))
def versionRegex = """(.*)""".r
override def versionExitcode = List(0)
def versionCommand = executable + " " + pythonScript + " --version"
......
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.kopisu
import java.io.File
import nl.lumc.sasc.biopet.utils.config._
import nl.lumc.sasc.biopet.core.{ PipelineCommand, _ }
import nl.lumc.sasc.biopet.extensions.Ln
import nl.lumc.sasc.biopet.extensions.conifer.{ ConiferAnalyze, ConiferCall, ConiferRPKM }
import org.broadinstitute.gatk.queue.QScript
class ConiferPipeline(val root: Configurable) extends QScript with BiopetQScript {
//*
// Kopisu - Coniferpipeline is a pipeline that can run standalone
// */
def this() = this(null)
/** Input bamfile */
@Input(doc = "Bamfile to start from", fullName = "bam", shortName = "bam", required = true)
var inputBam: File = _
@Argument(doc = "Label this sample with a name/ID [0-9a-zA-Z] and [-_]",
fullName = "label",
shortName = "label", required = false)
var sampleLabel: String = _
/** Exon definitions in bed format */
@Input(doc = "Exon definition file in bed format", fullName = "exon_bed", shortName = "bed", required = false)
var probeFile: File = config("probeFile")
@Input(doc = "Previous RPKM files (controls)", fullName = "rpkm_controls", shortName = "rc", required = false)
var controlsDir: File = config("controlsDir")
@Argument(doc = "Enable RPKM only mode, generate files for reference db", shortName = "rpkmonly", required = false)
var RPKMonly: Boolean = false
val summary = new ConiferSummary(this)
def init() {
}
def input2RPKM(inputBam: File): String = {
if (!sampleLabel.isEmpty) sampleLabel ++ ".txt"
else swapExt(inputBam.getName, ".bam", ".txt")
}
def input2HDF5(inputBam: File): String = {
if (!sampleLabel.isEmpty) sampleLabel ++ ".hdf5"
else swapExt(inputBam.getName, ".bam", ".hdf5")
}
def input2Calls(inputBam: File): String = {
if (!sampleLabel.isEmpty) sampleLabel ++ ".calls.txt"
else swapExt(inputBam.getName, ".bam", "calls.txt")
}
def biopetScript(): Unit = {
/** Setup RPKM directory */
val sampleDir: String = outputDir
val RPKMdir: File = new File(sampleDir + File.separator + "RPKM" + File.separator)
RPKMdir.mkdir()
val coniferRPKM = new ConiferRPKM(this)
coniferRPKM.bamFile = this.inputBam.getAbsoluteFile
coniferRPKM.probes = this.probeFile
coniferRPKM.output = new File(RPKMdir, input2RPKM(inputBam))
add(coniferRPKM)
if (!RPKMonly) {
/** Collect the rpkm_output to a temp directory, where we merge with the control files */
var refRPKMlist: List[File] = Nil
// Sync the .txt only, these files contain the RPKM Values
for (controlRPKMfile <- controlsDir.list.filter(_.toLowerCase.endsWith(".txt"))) {
val target = new File(RPKMdir, controlRPKMfile)
val source = new File(controlsDir, controlRPKMfile)
if (!target.exists) {
add(Ln(this, source, target, relative = false))
refRPKMlist :+= target
} else if (!target.equals(source)) {
target.delete()
add(Ln(this, source, target, relative = false))
refRPKMlist :+= target
}
}
val coniferAnalyze = new ConiferAnalyze(this)
coniferAnalyze.deps = List(coniferRPKM.output) ++ refRPKMlist
coniferAnalyze.probes = this.probeFile
coniferAnalyze.rpkmDir = RPKMdir
coniferAnalyze.output = new File(sampleDir, input2HDF5(inputBam))
add(coniferAnalyze)
val coniferCall = new ConiferCall(this)
coniferCall.input = coniferAnalyze.output
coniferCall.output = new File(sampleDir, "calls.txt")
add(coniferCall)
summary.deps = List(coniferCall.output)
summary.label = sampleLabel
summary.calls = coniferCall.output
summary.out = new File(sampleDir, input2Calls(inputBam))
add(summary)
}
}
}
object ConiferPipeline extends PipelineCommand
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.kopisu
import java.io.{ BufferedWriter, File, FileWriter }
import argonaut._
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.function.InProcessFunction
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import scala.io.Source
class ConiferSummary(val root: Configurable) extends InProcessFunction with Configurable {
def filterCalls(callFile: File, outFile: File, sampleName: String): Unit = {
// val filename = callFile.getAbsolutePath
val writer = new BufferedWriter(new FileWriter(outFile))
for (line <- Source.fromFile(callFile).getLines()) {
line.startsWith(sampleName) || line.startsWith("sampleID") match {
case true => writer.write(line + "\n");
case _ =>
}
}
writer.close()
}
this.analysisName = getClass.getSimpleName
@Input(doc = "deps")
var deps: List[File] = Nil
@Output(doc = "Summary output", required = true)
var out: File = _
@Input(doc = "calls")
var calls: File = _
var label: String = _
var coniferPipeline: ConiferPipeline = root match {
case pipeline: ConiferPipeline => pipeline
case _ =>
throw new IllegalStateException("Root is no instance of ConiferPipeline")
}
var resources: Map[String, Json] = Map()
override def run() {
logger.debug("Start")
filterCalls(calls, out, label)
logger.debug("Stop")
}
}
......@@ -17,7 +17,7 @@ package nl.lumc.sasc.biopet.pipelines.kopisu
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.core.{PipelineCommand, Reference}
import nl.lumc.sasc.biopet.pipelines.kopisu.methods.FreecMethod
import nl.lumc.sasc.biopet.pipelines.kopisu.methods.{ConiferMethod, FreecMethod}
import nl.lumc.sasc.biopet.utils.{BamUtils, Logging}
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
......@@ -42,9 +42,13 @@ class Kopisu(val root: Configurable) extends QScript with SummaryQScript with Re
Some(new FreecMethod(this))
} else None
lazy val coniferMethod = if (config("use_conifer_method", default = false)) {
Some(new ConiferMethod(this))
} else None
// This script is in fact FreeC only.
def biopetScript() {
if (freecMethod.isEmpty) Logging.addError("No method selected")
if (freecMethod.isEmpty && coniferMethod.isEmpty) Logging.addError("No method selected")
freecMethod.foreach { method =>
method.inputBams = inputBams
......@@ -52,6 +56,12 @@ class Kopisu(val root: Configurable) extends QScript with SummaryQScript with Re
add(method)
}
coniferMethod.foreach { method =>
method.inputBams = inputBams
method.outputDir = new File(outputDir, "conifer_method")
add(method)
}
addSummaryJobs()
}
......
package nl.lumc.sasc.biopet.pipelines.kopisu.methods
import java.io.File
import nl.lumc.sasc.biopet.extensions.Ln
import nl.lumc.sasc.biopet.extensions.conifer.{ConiferAnalyze, ConiferCall, ConiferRPKM}
import nl.lumc.sasc.biopet.utils.config.Configurable
/**
* Created by pjvanthof on 10/05/16.
*/
class ConiferMethod(val root: Configurable) extends CnvMethod {
def name = "conifer"
/** Exon definitions in bed format */
var probeFile: File = config("probe_file")
var controlsDir: File = config("controls_dir")
/**Enable RPKM only mode, generate files for reference db */
var RPKMonly: Boolean = false
def biopetScript: Unit = {
val RPKMdir = new File(outputDir, "rpkm")
val rpkmFiles: List[File] = inputBams.map { case (sampleName, bamFile) =>
val coniferRPKM = new ConiferRPKM(this)
coniferRPKM.bamFile = bamFile.getAbsoluteFile
coniferRPKM.probes = this.probeFile
coniferRPKM.output = new File(RPKMdir, s"$sampleName.rpkm.txt")
add(coniferRPKM)
coniferRPKM.output
}.toList ++ controlsDir.list.filter(_.toLowerCase.endsWith(".txt")).map { path =>
val oldFile = new File(path)
val newFile = new File(RPKMdir, s"control.${oldFile.getName}")
add(Ln(this, oldFile, newFile))
newFile
}
inputBams.foreach { case (sampleName, bamFile) =>
val sampleDir = new File(outputDir, "samples" + File.separator + sampleName)
val coniferAnalyze = new ConiferAnalyze(this)
coniferAnalyze.deps ++= rpkmFiles
coniferAnalyze.probes = probeFile
coniferAnalyze.rpkmDir = RPKMdir
coniferAnalyze.output = new File(sampleDir, s"$sampleName.hdf5")
add(coniferAnalyze)
val coniferCall = new ConiferCall(this)
coniferCall.input = coniferAnalyze.output
coniferCall.output = new File(sampleDir, s"${sampleName}.calls.txt")
add(coniferCall)
}
addSummaryJobs()
}
override def summaryFiles = super.summaryFiles ++ Map("probe_file" -> probeFile)
}
......@@ -48,20 +48,25 @@ class KopisuTest extends TestNGSuite with Matchers {
val bool = Array(true, false)
(for (
bams <- 0 to 3;
freec <- bool
) yield Array(bams, freec)).toArray
freec <- bool;
conifer <- bool
) yield Array(bams, freec, conifer)).toArray
}
@Test(dataProvider = "shivaSvCallingOptions")
def testShivaSvCalling(bams: Int,
freec: Boolean) = {
freec: Boolean,
conifer: Boolean) = {
val callers: ListBuffer[String] = ListBuffer()
val map = Map("sv_callers" -> callers.toList)
val pipeline = initPipeline(map ++ Map("use_freec_method" -> freec))
val pipeline = initPipeline(map ++ Map(
"use_freec_method" -> freec,
"use_conifer_method" -> conifer
))
pipeline.inputBams = (for (n <- 1 to bams) yield n.toString -> KopisuTest.inputTouch("bam_" + n + ".bam")).toMap
val illegalArgumentException = pipeline.inputBams.isEmpty || (!freec)
val illegalArgumentException = pipeline.inputBams.isEmpty || (!freec && !conifer)
if (illegalArgumentException) intercept[IllegalStateException] {
pipeline.init()
......@@ -103,6 +108,10 @@ object KopisuTest {
copyFile("ref.dict")
copyFile("ref.fa.fai")
val controlDir = Files.createTempDir()
controlDir.deleteOnExit()
Files.touch(new File(controlDir, "test.txt"))
val config = Map(
"name_prefix" -> "test",
"output_dir" -> outputDir,
......@@ -112,6 +121,10 @@ object KopisuTest {
"md5sum" -> Map("exe" -> "test"),
"bgzip" -> Map("exe" -> "test"),
"tabix" -> Map("exe" -> "test"),
"freec" -> Map("exe" -> "test", "chrFiles" -> "test", "chrLenFile" ->"test")
"freec" -> Map("exe" -> "test", "chrFiles" -> "test", "chrLenFile" ->"test"),
"controls_dir" -> controlDir.getAbsolutePath,
"conifer" -> Map("script" -> "/usr/bin/test"),
"probe_file" -> "test",
"rscript" -> Map("exe" -> "test")
)
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment