Commit 046b719e authored by Peter van 't Hof's avatar Peter van 't Hof Committed by GitHub

Merge pull request #169 from biopet/fix-BIOPET-719

Adding Stringtie to gentrap
parents 702a85e8 90d86e6c
package nl.lumc.sasc.biopet.extensions.stringtie
import java.io.File
import nl.lumc.sasc.biopet.core.{BiopetCommandLineFunction, Reference, Version}
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{Input, Output}
import scala.util.matching.Regex
class Stringtie(val parent: Configurable)
extends BiopetCommandLineFunction
with Reference
with Version {
executable = config("exe", "stringtie")
@Input(required = true)
var inputBam: File = _
@Input(required = false)
var referenceGtf: Option[File] = None
@Output
var outputGtf: File = _
@Output
var geneAbundances: Option[File] = None
@Output
var referenceCoverage: Option[File] = None
var rf: Boolean = config("rf", default = false)
var fr: Boolean = config("fr", default = false)
var v: Boolean = config("v", default = logger.isDebugEnabled)
var l: Option[String] = None
var f: Option[Double] = config("f")
var m: Option[Int] = config("m")
var a: Option[Int] = config("a")
var j: Option[Float] = config("j")
var t: Boolean = config("t", default = false)
var c: Option[Float] = config("c")
var g: Option[Int] = config("g")
var B: Boolean = config("B", default = false)
var b: Option[String] = config("b")
var e: Boolean = config("e", default = false)
var M: Option[Float] = config("M")
var x: List[String] = config("x", default = Nil)
/** Command to get version of executable */
def versionCommand: String = executable + " --version"
/** Regex to get version from version command output */
def versionRegex: Regex = "(.*)".r
def cmdLine: String =
required(executable) +
required(inputBam) +
conditional(v, "-v") +
required("-p", threads) +
conditional(rf, "--rf") +
conditional(fr, "--fr") +
optional("-l", l) +
optional("-f", f) +
optional("-m", m) +
optional("-A", geneAbundances) +
optional("-C", referenceCoverage) +
optional("-a", a) +
optional("-j", j) +
conditional(t, "-t") +
optional("-c", c) +
optional("-g", g) +
conditional(B, "-B") +
optional("-b", b) +
conditional(e, "-e") +
optional("-M", M) +
optional("-G", referenceGtf) +
(if (x.nonEmpty) optional("-x", x.mkString(",")) else "") +
(if (outputAsStdout) "" else required("-o", outputGtf))
}
package nl.lumc.sasc.biopet.extensions.stringtie
import java.io.File
import nl.lumc.sasc.biopet.core.{BiopetCommandLineFunction, Reference, Version}
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{Input, Output}
import scala.util.matching.Regex
class StringtieMerge(val parent: Configurable)
extends BiopetCommandLineFunction
with Reference
with Version {
executable = config("exe", "stringtie")
@Input(required = true)
var inputGtfs: List[File] = Nil
@Input(required = false)
var referenceGtf: Option[File] = None
@Output
var outputGtf: File = _
var v: Boolean = config("v", default = logger.isDebugEnabled)
var l: Option[String] = None
var f: Option[Double] = config("f")
var m: Option[Int] = config("m")
var c: Option[Float] = config("c")
var F: Option[Double] = config("F")
var T: Option[Double] = config("T")
var i: Boolean = config("i", default = false)
/** Command to get version of executable */
def versionCommand: String = executable + " --version"
/** Regex to get version from version command output */
def versionRegex: Regex = "(.*)".r
def cmdLine: String =
required(executable) +
required("--merge") +
conditional(v, "-v") +
required("-p", threads) +
optional("-l", l) +
optional("-f", f) +
optional("-m", m) +
optional("-c", c) +
optional("-F", F) +
conditional(i, "-i") +
optional("-G", referenceGtf) +
(if (outputAsStdout) "" else required("-o", outputGtf)) +
repeat(inputGtfs)
}
......@@ -148,6 +148,7 @@ class Gentrap(val parent: Configurable)
cufflinksBlind.foreach(validate.gtfFile :+= _.annotationGtf)
cufflinksGuided.foreach(validate.gtfFile :+= _.annotationGtf)
cufflinksStrict.foreach(validate.gtfFile :+= _.annotationGtf)
stringtie.foreach(validate.gtfFile :+= _.annotationGtf)
validate.jobOutputFile = new File(outputDir, ".validate.annotation.out")
add(validate)
......@@ -169,6 +170,11 @@ class Gentrap(val parent: Configurable)
Some(new BaseCounts(this))
else None
lazy val stringtie: Option[Stringtie] =
if (expMeasures().contains(ExpMeasures.Stringtie))
Some(new Stringtie(this))
else None
lazy val cufflinksBlind: Option[CufflinksBlind] =
if (expMeasures().contains(ExpMeasures.CufflinksBlind))
Some(new CufflinksBlind(this))
......@@ -186,7 +192,7 @@ class Gentrap(val parent: Configurable)
def executedMeasures: List[QScript with Measurement] =
(fragmentsPerGene :: baseCounts :: cufflinksBlind ::
cufflinksGuided :: cufflinksStrict :: Nil).flatten
cufflinksGuided :: cufflinksStrict :: stringtie :: Nil).flatten
/** Whether to do simple variant calling on RNA or not */
lazy val shivaVariantcalling: Option[ShivaVariantcalling] =
......@@ -299,7 +305,8 @@ object Gentrap extends PipelineCommand {
/** Enumeration of available expression measures */
object ExpMeasures extends Enumeration {
val FragmentsPerGene, BaseCounts, CufflinksStrict, CufflinksGuided, CufflinksBlind = Value
val FragmentsPerGene, BaseCounts, CufflinksStrict, CufflinksGuided, CufflinksBlind, Stringtie =
Value
}
/** Enumeration of available strandedness */
......
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.gentrap.measures
import nl.lumc.sasc.biopet.core.annotations.AnnotationGtf
import nl.lumc.sasc.biopet.extensions.stringtie.{StringtieMerge, Stringtie => StringtieTool}
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
/**
* Created by pjvan_thof on 1/12/16.
*/
class Stringtie(val parent: Configurable) extends QScript with Measurement with AnnotationGtf {
def mergeArgs = MergeArgs(idCols = List(1), valCol = 2, fallback = "0")
/** Pipeline itself */
def biopetScript(): Unit = {
val sampleGtfFiles: List[File] = bamFiles.map {
case (id, file) =>
val sampleDir = new File(outputDir, id)
val stringtie = new StringtieTool(this)
stringtie.inputBam = file
stringtie.l = Some(id)
stringtie.referenceGtf = Some(annotationGtf)
stringtie.outputGtf = new File(sampleDir, s"$id.gtf")
stringtie.geneAbundances = Some(new File(sampleDir, s"$id.gene_abund.tab"))
stringtie.referenceCoverage = Some(new File(sampleDir, s"$id.cov_refs.gtf"))
add(stringtie)
stringtie.outputGtf
}.toList
val stringtieMerge = new StringtieMerge(this)
stringtieMerge.inputGtfs = sampleGtfFiles
stringtieMerge.referenceGtf = Some(annotationGtf)
stringtieMerge.outputGtf = stringtieMergeOutput
add(stringtieMerge)
addSummaryJobs()
}
def stringtieMergeOutput: File = new File(outputDir, "stringtie.merged.gtf")
override def summaryFiles: Map[String, File] =
super.summaryFiles ++ Map("annotation_gtf" -> annotationGtf,
"stringtie_merged" -> stringtieMergeOutput)
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment