Commit 45776fbc authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

CNV pipeline for Exome

parent 136e28ce
......@@ -28,7 +28,14 @@ trait PythonCommandLineFunction extends BiopetCommandLineFunction {
executable = config("exe", default = "python", submodule = "python")
protected var python_script_name: String = _
def setPythonScript(script: String) { setPythonScript(script, "") }
def setPythonScript(script: String) {
python_script = new File(script)
if (!python_script.exists()) {
setPythonScript(script, "")
} else {
python_script_name = script
}
}
def setPythonScript(script: String, subpackage: String) {
python_script_name = script
python_script = new File(".queue/tmp/" + subpackage + python_script_name)
......
......@@ -16,11 +16,19 @@
package nl.lumc.sasc.biopet.extensions.conifer
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.extensions.PythonCommandLineFunction
abstract class Conifer extends BiopetCommandLineFunction {
abstract class Conifer extends PythonCommandLineFunction {
override def subPath = "conifer" :: super.subPath
executable = config("exe", default = "conifer")
// executable = config("exe", default = "conifer")
setPythonScript(config("script", default = "conifer"))
override val versionRegex = """(.*)""".r
override val versionExitcode = List(0)
override def versionCommand = executable + " --version"
override def versionCommand = executable + " " + python_script + " --version"
override val defaultVmem = "8G"
override val defaultThreads = 1
def cmdLine = getPythonCommand
}
......@@ -18,35 +18,56 @@ package nl.lumc.sasc.biopet.extensions.conifer
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{Argument, Input, Output}
import nl.lumc.sasc.biopet.extensions.Ln
import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output }
class ConiferAnalyze(val root: Configurable) extends Conifer {
@Input(doc = "Probes / capture kit definition as bed file: chr,start,stop,gene-annot", required = true)
var probes: File = _
@Input(doc = "Path to Conifer RPKM files", required = true)
var rpkm_dir: File = _
// @Input(doc = "Path to Conifer RPKM files", required = true)
var rpkmDir: File = _
@Output(doc = "Output RPKM.txt", shortName = "out")
@Output(doc = "Output analyse.hdf5", shortName = "out")
var output: File = _
@Argument(doc = "SVD, number of components to remove", minRecommendedValue = 2, maxRecommendedValue = 5,
minValue = 2, maxValue = 20)
var svd: Option[Int] = config("svd")
minValue = 2, maxValue = 20, required = false)
var svd: Option[Int] = config("svd", default = 1)
@Argument(doc="Minimum population median RPKM per probe")
@Argument(doc = "Minimum population median RPKM per probe", required = false)
var min_rpkm: Option[Double] = config("min_rpkm")
override def afterGraph {
this.checkExecutable
override def afterGraph: Unit = {
super.afterGraph
//
// // create new RPKM_dir with the controls in it together with the subject
// new_rpkm_dir = new File(this.output.getParent() + File.separator + "rpkm_tmp")
// logger.info("Creating " + new_rpkm_dir.getAbsolutePath)
// new_rpkm_dir.mkdir()
//
// for (f <- rpkm_dir.listFiles()) {
// var target = new File(new_rpkm_dir + File.separator + f.getName)
// if (!target.exists()) {
// logger.info("Creating " + target.getAbsolutePath)
// Ln(this, f, target, true).run
// }
// }
// for (f <- rpkm_refdir.listFiles()) {
// var target = new File(new_rpkm_dir + File.separator + f.getName)
// if (!target.exists()) {
// logger.info("Creating " + target.getAbsolutePath)
// Ln(this, f, target, true).run
// }
// }
}
def cmdLine = required(executable) +
required("rpkm")+
override def cmdLine = super.cmdLine +
" analyze " +
" --probes" + required(probes) +
" --rpkm_dir" + required(rpkm_dir) +
" --rpkm_dir" + required(rpkmDir) +
" --output" + required(output) +
optional("--svd",svd) +
optional("--svd", svd) +
optional("--min_rpkm", min_rpkm)
}
......@@ -18,7 +18,7 @@ package nl.lumc.sasc.biopet.extensions.conifer
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{Argument, Input, Output}
import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output }
class ConiferCall(val root: Configurable) extends Conifer {
......@@ -32,8 +32,8 @@ class ConiferCall(val root: Configurable) extends Conifer {
this.checkExecutable
}
def cmdLine = required(executable) +
required("call")+
override def cmdLine = super.cmdLine +
" call " +
" --input" + required(input) +
" --output" + required(output)
}
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.conifer
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
class ConiferExport(val root: Configurable) extends Conifer {
@Input(doc = "Input analysis.hdf5", required = true)
var input: File = _
@Output(doc = "Output <sample>.svdzrpkm.bed", shortName = "out", required = true)
var output: File = _
override def afterGraph {
this.checkExecutable
}
override def cmdLine = super.cmdLine +
" export " +
" --input" + required(input) +
" --output" + required(output)
}
......@@ -18,7 +18,7 @@ package nl.lumc.sasc.biopet.extensions.conifer
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{Output, Input}
import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
class ConiferRPKM(val root: Configurable) extends Conifer {
......@@ -28,17 +28,12 @@ class ConiferRPKM(val root: Configurable) extends Conifer {
@Input(doc = "Probes / capture kit definition as bed file: chr,start,stop,gene-annot", required = true)
var probes: File = _
/** The output RPKM should outputted to a directory which contains all the RPKM files from previous experiments */
@Output(doc = "Output RPKM.txt", shortName = "out")
var output: File = _
private var config_file: File = _
override def afterGraph {
this.checkExecutable
}
def cmdLine = required(executable) +
required("rpkm")+
override def cmdLine = super.cmdLine +
" rpkm " +
" --probes" + required(probes) +
" --input" + required(bamFile) +
" --output" + required(output)
......
......@@ -75,6 +75,11 @@
<artifactId>Yamsvp</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Kopisu</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
<build>
<plugins>
......
......@@ -22,7 +22,8 @@ object BiopetExecutablePublic extends BiopetExecutable {
nl.lumc.sasc.biopet.pipelines.gentrap.Gentrap,
nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics,
nl.lumc.sasc.biopet.pipelines.yamsvp.Yamsvp,
nl.lumc.sasc.biopet.pipelines.sage.Sage
nl.lumc.sasc.biopet.pipelines.sage.Sage,
nl.lumc.sasc.biopet.pipelines.kopisu.ConiferPipeline
)
def tools: List[MainCommand] = List(
......
......@@ -30,7 +30,7 @@
<relativePath>../</relativePath>
</parent>
<inceptionYear>2014</inceptionYear>
<inceptionYear>2015</inceptionYear>
<name>Kopisu</name>
<dependencies>
......
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.kopisu
import java.io.{ BufferedWriter, FileWriter, File }
import nl.lumc.sasc.biopet.core.{ PipelineCommand, _ }
import nl.lumc.sasc.biopet.core.config._
import nl.lumc.sasc.biopet.extensions.Ln
import nl.lumc.sasc.biopet.extensions.conifer.{ ConiferAnalyze, ConiferCall, ConiferRPKM }
import org.broadinstitute.gatk.queue.QScript
import scala.io.Source
class ConiferPipeline(val root: Configurable) extends QScript with BiopetQScript {
def this() = this(null)
/** Input bamfile */
@Input(doc = "Bamfile to start from", fullName = "bam", shortName = "bam", required = true)
var inputBam: File = _
@Argument(doc = "Label this sample with a name/ID [0-9a-zA-Z] and [-_]",
fullName = "label",
shortName = "label", required = false)
var sampleLabel: String = _
/** Exon definitions in bed format */
@Input(doc = "Exon definition file in bed format", fullName = "exon_bed", shortName = "bed", required = true)
var probeFile: File = _
@Input(doc = "Previous RPKM files (controls)", fullName = "rpkm_controls", shortName = "rc", required = true)
var rpkmControls: File = _
val summary = new ConiferSummary(this)
def init() {
}
def input2RPKM(inputBam: File): String = {
if (!sampleLabel.isEmpty) sampleLabel ++ ".txt"
else swapExt(inputBam.getName, ".bam", ".txt")
}
def input2HDF5(inputBam: File): String = {
if (!sampleLabel.isEmpty) sampleLabel ++ ".hdf5"
else swapExt(inputBam.getName, ".bam", ".hdf5")
}
def input2Calls(inputBam: File): String = {
if (!sampleLabel.isEmpty) sampleLabel ++ ".calls.txt"
else swapExt(inputBam.getName, ".bam", "calls.txt")
}
def biopetScript(): Unit = {
/** Setup RPKM directory */
val sampleDir: String = outputDir
val RPKMdir: File = new File(sampleDir + File.separator + "RPKM" + File.separator)
RPKMdir.mkdir()
val coniferRPKM = new ConiferRPKM(this)
coniferRPKM.bamFile = this.inputBam.getAbsoluteFile
coniferRPKM.probes = this.probeFile
coniferRPKM.output = new File(RPKMdir + File.separator + input2RPKM(inputBam))
add(coniferRPKM)
/** Collect the rpkm_output to a temp directory, where we merge with the control files */
var refRPKMlist: List[File] = Nil
for (f <- rpkmControls.listFiles()) {
var target = new File(RPKMdir + File.separator + f.getName)
if (!target.exists()) {
logger.info("Creating " + target.getAbsolutePath)
add(Ln(this, f, target, false))
refRPKMlist :+= target
}
}
val coniferAnalyze = new ConiferAnalyze(this)
coniferAnalyze.deps = List(coniferRPKM.output) ++ refRPKMlist
coniferAnalyze.probes = this.probeFile
coniferAnalyze.rpkmDir = RPKMdir
coniferAnalyze.output = new File(sampleDir + File.separator + input2HDF5(inputBam))
add(coniferAnalyze)
val coniferCall = new ConiferCall(this)
coniferCall.input = coniferAnalyze.output
coniferCall.output = new File(sampleDir + File.separator + "calls.txt")
add(coniferCall)
summary.deps = List(coniferCall.output)
summary.label = sampleLabel
summary.calls = coniferCall.output
summary.out = input2Calls(inputBam)
add(summary)
}
}
object ConiferPipeline extends PipelineCommand
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.kopisu
import java.io.{ FileWriter, BufferedWriter, File, PrintWriter }
import argonaut._
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.queue.function.InProcessFunction
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import scala.io.Source
class ConiferSummary(val root: Configurable) extends InProcessFunction with Configurable {
def filterCalls(callFile: File, outFile: File, sampleName: String): Unit = {
val filename = callFile.getAbsolutePath
val writer = new BufferedWriter(new FileWriter(outFile))
for (line <- Source.fromFile(filename).getLines()) {
line.startsWith(sampleName) || line.startsWith("sampleID") match {
case true => writer.write(line)
case _ =>
}
}
}
this.analysisName = getClass.getSimpleName
@Input(doc = "deps")
var deps: List[File] = Nil
@Output(doc = "Summary output", required = true)
var out: File = _
@Input(doc = "calls")
var calls: File = _
var label: String = _
var coniferPipeline: ConiferPipeline = if (root.isInstanceOf[ConiferPipeline]) root.asInstanceOf[ConiferPipeline] else {
throw new IllegalStateException("Root is no instance of ConiferPipeline")
}
var resources: Map[String, Json] = Map()
override def run {
logger.debug("Start")
filterCalls(out, calls, label)
logger.debug("Stop")
}
}
......@@ -13,21 +13,10 @@
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.sage
package nl.lumc.sasc.biopet.pipelines.kopisu
import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, PipelineCommand }
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.extensions.Cat
import nl.lumc.sasc.biopet.extensions.bedtools.BedtoolsCoverage
import nl.lumc.sasc.biopet.extensions.picard.MergeSamFiles
import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
import nl.lumc.sasc.biopet.pipelines.mapping.Mapping
import nl.lumc.sasc.biopet.tools.PrefixFastq
import nl.lumc.sasc.biopet.tools.BedtoolsCoverageToCounts
import nl.lumc.sasc.biopet.scripts.SquishBed
import nl.lumc.sasc.biopet.tools.SageCountFastq
import nl.lumc.sasc.biopet.tools.SageCreateLibrary
import nl.lumc.sasc.biopet.tools.SageCreateTagCounts
import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, PipelineCommand }
import org.broadinstitute.gatk.queue.QScript
class Kopisu(val root: Configurable) extends QScript with MultiSampleQScript {
......@@ -53,13 +42,6 @@ class Kopisu(val root: Configurable) extends QScript with MultiSampleQScript {
// Called for each sample
def runSingleSampleJobs(sampleConfig: Map[String, Any]): SampleOutput = {
val sampleOutput = new SampleOutput
var libraryBamfiles: List[File] = List()
var libraryFastqFiles: List[File] = List()
val sampleID: String = sampleConfig("ID").toString
val sampleDir: String = globalSampleDir + sampleID + "/"
for ((library, libraryFiles) <- runLibraryJobs(sampleConfig)) {
}
return sampleOutput
}
......@@ -67,11 +49,6 @@ class Kopisu(val root: Configurable) extends QScript with MultiSampleQScript {
// Called for each run from a sample
def runSingleLibraryJobs(runConfig: Map[String, Any], sampleConfig: Map[String, Any]): LibraryOutput = {
val libraryOutput = new LibraryOutput
val runID: String = runConfig("ID").toString
val sampleID: String = sampleConfig("ID").toString
val runDir: String = globalSampleDir + sampleID + "/run_" + runID + "/"
if (runConfig.contains("bam")) {
} else this.logger.error("Sample: " + sampleID + ": No R1 found for run: " + runConfig)
return libraryOutput
}
}
......
......@@ -32,6 +32,7 @@
<module>gentrap</module>
<module>mapping</module>
<module>sage</module>
<module>kopisu</module>
<module>yamsvp</module>
</modules>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment