Commit c874be08 authored by bow's avatar bow
Browse files

Merge branch 'feature-cnvpipeline' into 'develop'

Feature cnvpipeline - including changing the PythonCommandLineFunction

CNV pipeline - starts with bamfile

Tickets:

#106
#107

Work for KG

See merge request !76
parents 4e1cada3 ccf09c1a
......@@ -28,7 +28,14 @@ trait PythonCommandLineFunction extends BiopetCommandLineFunction {
executable = config("exe", default = "python", submodule = "python")
protected var python_script_name: String = _
def setPythonScript(script: String) { setPythonScript(script, "") }
def setPythonScript(script: String) {
python_script = new File(script)
if (!python_script.exists()) {
setPythonScript(script, "")
} else {
python_script_name = script
}
}
def setPythonScript(script: String, subpackage: String) {
python_script_name = script
python_script = new File(".queue/tmp/" + subpackage + python_script_name)
......
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.conifer
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.extensions.PythonCommandLineFunction
abstract class Conifer extends PythonCommandLineFunction {
override def subPath = "conifer" :: super.subPath
// executable = config("exe", default = "conifer")
setPythonScript(config("script", default = "conifer"))
override val versionRegex = """(.*)""".r
override val versionExitcode = List(0)
override def versionCommand = executable + " " + python_script + " --version"
override val defaultVmem = "8G"
override val defaultThreads = 1
def cmdLine = getPythonCommand
}
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.conifer
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.extensions.Ln
import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output }
class ConiferAnalyze(val root: Configurable) extends Conifer {
@Input(doc = "Probes / capture kit definition as bed file: chr,start,stop,gene-annot", required = true)
var probes: File = _
// @Input(doc = "Path to Conifer RPKM files", required = true)
var rpkmDir: File = _
@Output(doc = "Output analyse.hdf5", shortName = "out")
var output: File = _
@Argument(doc = "SVD, number of components to remove", minRecommendedValue = 2, maxRecommendedValue = 5,
minValue = 2, maxValue = 20, required = false)
var svd: Option[Int] = config("svd", default = 1)
@Argument(doc = "Minimum population median RPKM per probe", required = false)
var min_rpkm: Option[Double] = config("min_rpkm")
override def cmdLine = super.cmdLine +
" analyze " +
" --probes" + required(probes) +
" --rpkm_dir" + required(rpkmDir) +
" --output" + required(output) +
optional("--svd", svd) +
optional("--min_rpkm", min_rpkm)
}
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.conifer
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output }
class ConiferCall(val root: Configurable) extends Conifer {
@Input(doc = "Input analysis.hdf5", required = true)
var input: File = _
@Output(doc = "Output calls.txt", shortName = "out")
var output: File = _
override def cmdLine = super.cmdLine +
" call " +
" --input" + required(input) +
" --output" + required(output)
}
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.conifer
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
class ConiferExport(val root: Configurable) extends Conifer {
@Input(doc = "Input analysis.hdf5", required = true)
var input: File = _
@Output(doc = "Output <sample>.svdzrpkm.bed", shortName = "out", required = true)
var output: File = _
override def afterGraph {
this.checkExecutable
}
override def cmdLine = super.cmdLine +
" export " +
" --input" + required(input) +
" --output" + required(output)
}
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.conifer
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
class ConiferRPKM(val root: Configurable) extends Conifer {
@Input(doc = "Bam file", required = true)
var bamFile: File = _
@Input(doc = "Probes / capture kit definition as bed file: chr,start,stop,gene-annot", required = true)
var probes: File = _
/** The output RPKM should outputted to a directory which contains all the RPKM files from previous experiments */
@Output(doc = "Output RPKM.txt", shortName = "out")
var output: File = _
override def cmdLine = super.cmdLine +
" rpkm " +
" --probes" + required(probes) +
" --input" + required(bamFile) +
" --output" + required(output)
}
......@@ -75,6 +75,11 @@
<artifactId>Yamsvp</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Kopisu</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
<build>
<plugins>
......
......@@ -22,7 +22,8 @@ object BiopetExecutablePublic extends BiopetExecutable {
nl.lumc.sasc.biopet.pipelines.gentrap.Gentrap,
nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics,
nl.lumc.sasc.biopet.pipelines.yamsvp.Yamsvp,
nl.lumc.sasc.biopet.pipelines.sage.Sage
nl.lumc.sasc.biopet.pipelines.sage.Sage,
nl.lumc.sasc.biopet.pipelines.kopisu.ConiferPipeline
)
def tools: List[MainCommand] = List(
......
/target/
\ No newline at end of file
<!--
Biopet is built on top of GATK Queue for building bioinformatic
pipelines. It is mainly intended to support LUMC SHARK cluster which is running
SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
should also be able to execute Biopet tools and pipelines.
Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
Contact us at: sasc@lumc.nl
A dual licensing mode is applied. The source code within this project that are
not part of GATK Queue is freely available for non-commercial use under an AGPL
license; For commercial users or users who do not want to follow the AGPL
license, please contact us to obtain a separate license.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Kopisu</artifactId>
<packaging>jar</packaging>
<parent>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Biopet</artifactId>
<version>0.3.0-DEV</version>
<relativePath>../</relativePath>
</parent>
<inceptionYear>2015</inceptionYear>
<name>Kopisu</name>
<dependencies>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>BiopetFramework</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
</project>
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.kopisu
import java.io.{ BufferedWriter, FileWriter, File }
import nl.lumc.sasc.biopet.core.{ PipelineCommand, _ }
import nl.lumc.sasc.biopet.core.config._
import nl.lumc.sasc.biopet.extensions.Ln
import nl.lumc.sasc.biopet.extensions.conifer.{ ConiferAnalyze, ConiferCall, ConiferRPKM }
import org.broadinstitute.gatk.queue.QScript
import scala.io.Source
class ConiferPipeline(val root: Configurable) extends QScript with BiopetQScript {
//*
// Kopisu - Coniferpipeline is a pipeline that can run standalone
// */
def this() = this(null)
/** Input bamfile */
@Input(doc = "Bamfile to start from", fullName = "bam", shortName = "bam", required = true)
var inputBam: File = _
@Argument(doc = "Label this sample with a name/ID [0-9a-zA-Z] and [-_]",
fullName = "label",
shortName = "label", required = false)
var sampleLabel: String = _
/** Exon definitions in bed format */
@Input(doc = "Exon definition file in bed format", fullName = "exon_bed", shortName = "bed", required = false)
var probeFile: File = config("probeFile")
@Input(doc = "Previous RPKM files (controls)", fullName = "rpkm_controls", shortName = "rc", required = false)
var controlsDir: File = config("controlsDir")
@Argument(doc = "Enable RPKM only mode, generate files for reference db", shortName = "rpkmonly", required = false)
var RPKMonly: Boolean = false
val summary = new ConiferSummary(this)
def init() {
}
def input2RPKM(inputBam: File): String = {
if (!sampleLabel.isEmpty) sampleLabel ++ ".txt"
else swapExt(inputBam.getName, ".bam", ".txt")
}
def input2HDF5(inputBam: File): String = {
if (!sampleLabel.isEmpty) sampleLabel ++ ".hdf5"
else swapExt(inputBam.getName, ".bam", ".hdf5")
}
def input2Calls(inputBam: File): String = {
if (!sampleLabel.isEmpty) sampleLabel ++ ".calls.txt"
else swapExt(inputBam.getName, ".bam", "calls.txt")
}
def biopetScript(): Unit = {
/** Setup RPKM directory */
val sampleDir: String = outputDir
val RPKMdir: File = new File(sampleDir + File.separator + "RPKM" + File.separator)
RPKMdir.mkdir()
val coniferRPKM = new ConiferRPKM(this)
coniferRPKM.bamFile = this.inputBam.getAbsoluteFile
coniferRPKM.probes = this.probeFile
coniferRPKM.output = new File(RPKMdir + File.separator + input2RPKM(inputBam))
add(coniferRPKM)
if (!RPKMonly) {
/** Collect the rpkm_output to a temp directory, where we merge with the control files */
var refRPKMlist: List[File] = Nil
for (f <- controlsDir.listFiles()) {
var target = new File(RPKMdir + File.separator + f.getName)
if (!target.exists()) {
logger.info("Creating " + target.getAbsolutePath)
add(Ln(this, f, target, true))
refRPKMlist :+= target
}
}
val coniferAnalyze = new ConiferAnalyze(this)
coniferAnalyze.deps = List(coniferRPKM.output) ++ refRPKMlist
coniferAnalyze.probes = this.probeFile
coniferAnalyze.rpkmDir = RPKMdir
coniferAnalyze.output = new File(sampleDir + File.separator + input2HDF5(inputBam))
add(coniferAnalyze)
val coniferCall = new ConiferCall(this)
coniferCall.input = coniferAnalyze.output
coniferCall.output = new File(sampleDir + File.separator + "calls.txt")
add(coniferCall)
summary.deps = List(coniferCall.output)
summary.label = sampleLabel
summary.calls = coniferCall.output
summary.out = new File(sampleDir + File.separator + input2Calls(inputBam))
add(summary)
}
}
}
object ConiferPipeline extends PipelineCommand
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.kopisu
import java.io.{ FileWriter, BufferedWriter, File, PrintWriter }
import argonaut._
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.queue.function.InProcessFunction
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import scala.io.Source
class ConiferSummary(val root: Configurable) extends InProcessFunction with Configurable {
def filterCalls(callFile: File, outFile: File, sampleName: String): Unit = {
// val filename = callFile.getAbsolutePath
val writer = new BufferedWriter(new FileWriter(outFile))
for (line <- Source.fromFile(callFile).getLines()) {
line.startsWith(sampleName) || line.startsWith("sampleID") match {
case true => writer.write(line + "\n");
case _ =>
}
}
writer.close()
}
this.analysisName = getClass.getSimpleName
@Input(doc = "deps")
var deps: List[File] = Nil
@Output(doc = "Summary output", required = true)
var out: File = _
@Input(doc = "calls")
var calls: File = _
var label: String = _
var coniferPipeline: ConiferPipeline = if (root.isInstanceOf[ConiferPipeline]) root.asInstanceOf[ConiferPipeline] else {
throw new IllegalStateException("Root is no instance of ConiferPipeline")
}
var resources: Map[String, Json] = Map()
override def run {
logger.debug("Start")
filterCalls(calls, out, label)
logger.debug("Stop")
}
}
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.kopisu
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, PipelineCommand }
import org.broadinstitute.gatk.queue.QScript
class Kopisu(val root: Configurable) extends QScript with MultiSampleQScript {
def this() = this(null)
@Input(doc = "Input bamfile", required = true)
var bamFile: File = config("bam")
class LibraryOutput extends AbstractLibraryOutput {
}
class SampleOutput extends AbstractSampleOutput {
}
def init() {
if (!outputDir.endsWith("/")) outputDir += "/"
}
def biopetScript() {
runSamplesJobs
}
// Called for each sample
def runSingleSampleJobs(sampleConfig: Map[String, Any]): SampleOutput = {
val sampleOutput = new SampleOutput
return sampleOutput
}
// Called for each run from a sample
def runSingleLibraryJobs(runConfig: Map[String, Any], sampleConfig: Map[String, Any]): LibraryOutput = {
val libraryOutput = new LibraryOutput
return libraryOutput
}
}
object Kopisu extends PipelineCommand
......@@ -32,6 +32,7 @@
<module>gentrap</module>
<module>mapping</module>
<module>sage</module>
<module>kopisu</module>
<module>yamsvp</module>
</modules>
......@@ -182,4 +183,4 @@
</plugin>
</plugins>
</build>
</project>
\ No newline at end of file
</project>
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment