Commit d9383457 authored by rhpvorderman's avatar rhpvorderman

Merge branch 'develop' into fix-BIOPET-724

parents 901d5f35 f5a3634a
...@@ -284,14 +284,13 @@ object BammetricsReport extends ReportBuilder { ...@@ -284,14 +284,13 @@ object BammetricsReport extends ReportBuilder {
.getOrElse(throw new IllegalStateException("Sample must be there")) .getOrElse(throw new IllegalStateException("Sample must be there"))
val libraryName = val libraryName =
library.flatMap(l => Await.result(summary.getLibraryName(l), Duration.Inf)) library.flatMap(l => Await.result(summary.getLibraryName(l), Duration.Inf))
if (yKeyList.find(x => map.contains(x) && map(x).isDefined).isEmpty) { val yKey = yKeyList.find(x => map.contains(x) && map(x).isDefined).getOrElse("none")
"" val xKey = xKeyList.find(x => map.contains(x) && map(x).isDefined).getOrElse("none")
}
val yKey = yKeyList.find(x => map.contains(x) && map(x).isDefined).get
val xKey = xKeyList.find(x => map.contains(x) && map(x).isDefined).get
Map( Map(
yKeyList.head -> map(yKey).getOrElse(Array()), yKeyList.head -> map.getOrElse(yKey, None).getOrElse(Array()),
(sampleName + libraryName.map("-" + _).getOrElse("")) -> map(xKey).getOrElse(Array()) (sampleName + libraryName.map("-" + _).getOrElse("")) -> map
.getOrElse(xKey, None)
.getOrElse(Array())
) )
}.toArray }.toArray
......
...@@ -31,7 +31,7 @@ ORIGINAL_UNITS_TO_RUNTIME_UNITS = 1/1000/60/60 ...@@ -31,7 +31,7 @@ ORIGINAL_UNITS_TO_RUNTIME_UNITS = 1/1000/60/60
# Helper function to aggregate all of the jobs in the report across all tables # Helper function to aggregate all of the jobs in the report across all tables
# #
allJobsFromReport <- function(report) { allJobsFromReport <- function(report) {
names <- c("jobName", "startTime", "analysisName", "doneTime", "exechosts", "runtime") names <- c("jobName", "startTime", "analysisName", "doneTime", "exechosts", "runtime", "cores")
sub <- lapply(report, function(table) table[,names]) sub <- lapply(report, function(table) table[,names])
do.call("rbind", sub) do.call("rbind", sub)
} }
...@@ -102,6 +102,43 @@ plotProgressByTime <- function(gatkReport) { ...@@ -102,6 +102,43 @@ plotProgressByTime <- function(gatkReport) {
print(p) print(p)
} }
plotCoresByTime <- function(gatkReport) {
allJobs = allJobsFromReport(gatkReport)
nJobs = sum(allJobs$cores)
allJobs = allJobs[order(allJobs$startTime, decreasing=F),]
allJobs$index = 1:nrow(allJobs)
minTime = min(allJobs$startTime)
allJobs$relStartTime = allJobs$startTime - minTime
allJobs$relDoneTime = allJobs$doneTime - minTime
times = sort(c(allJobs$relStartTime, allJobs$relDoneTime))
countJobs <- function(p) {
s = allJobs$relStartTime
e = allJobs$relDoneTime
cpu = allJobs$cores
x = c() # I wish I knew how to make this work with apply
for ( time in times )
x = c(x, sum(p(s, e, time) * cpu))
x
}
pending = countJobs(function(s, e, t) s > t)
done = countJobs(function(s, e, t) e < t)
running = nJobs - pending - done
d = data.frame(times=times, running=running)
p <- ggplot(data=melt(d, id.vars=c("times")), aes(x=times, y=value, color=variable))
p <- p + facet_grid(variable ~ ., scales="free")
p <- p + geom_line(size=2)
p <- p + xlab(paste("Time since start of first job", RUNTIME_UNITS))
p <- p + ggtitle("Cores used in time")
print(p)
}
# #
# Creates tables for each job in this group # Creates tables for each job in this group
# #
...@@ -113,13 +150,13 @@ plotGroup <- function(groupTable) { ...@@ -113,13 +150,13 @@ plotGroup <- function(groupTable) {
sub = sub[order(sub$iteration, sub$jobName, decreasing=F), ] sub = sub[order(sub$iteration, sub$jobName, decreasing=F), ]
# create a table showing each job and all annotations # create a table showing each job and all annotations
textplot(sub, show.rownames=F) # textplot(sub, show.rownames=F)
title(paste("Job summary for", name, "full itemization"), cex=3) # title(paste("Job summary for", name, "full itemization"), cex=3)
# create the table for each combination of values in the group, listing iterations in the columns # create the table for each combination of values in the group, listing iterations in the columns
sum = cast(melt(sub, id.vars=groupAnnotations, measure.vars=c("runtime")), ... ~ iteration, fun.aggregate=mean) # sum = cast(melt(sub, id.vars=groupAnnotations, measure.vars=c("runtime")), ... ~ iteration, fun.aggregate=mean)
textplot(as.data.frame(sum), show.rownames=F) # textplot(as.data.frame(sum), show.rownames=F)
title(paste("Job summary for", name, "itemizing each iteration"), cex=3) # title(paste("Job summary for", name, "itemizing each iteration"), cex=3)
# histogram of job times by groupAnnotations # histogram of job times by groupAnnotations
if ( length(groupAnnotations) == 1 && dim(sub)[1] > 1 ) { if ( length(groupAnnotations) == 1 && dim(sub)[1] > 1 ) {
...@@ -131,14 +168,14 @@ plotGroup <- function(groupTable) { ...@@ -131,14 +168,14 @@ plotGroup <- function(groupTable) {
} }
# as above, but averaging over all iterations # as above, but averaging over all iterations
groupAnnotationsNoIteration = setdiff(groupAnnotations, "iteration") # groupAnnotationsNoIteration = setdiff(groupAnnotations, "iteration")
if ( dim(sub)[1] > 1 ) { # if ( dim(sub)[1] > 1 ) {
try({ # need a try here because we will fail to reduce when there's just a single iteration # try({ # need a try here because we will fail to reduce when there's just a single iteration
sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd)) # sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd))
textplot(as.data.frame(sum), show.rownames=F) # textplot(as.data.frame(sum), show.rownames=F)
title(paste("Job summary for", name, "averaging over all iterations"), cex=3) # title(paste("Job summary for", name, "averaging over all iterations"), cex=3)
}, silent=T) # }, silent=T)
} # }
} }
# print out some useful basic information # print out some useful basic information
...@@ -147,6 +184,7 @@ print(paste("Project :", inputFileName)) ...@@ -147,6 +184,7 @@ print(paste("Project :", inputFileName))
convertUnits <- function(gatkReportData) { convertUnits <- function(gatkReportData) {
convertGroup <- function(g) { convertGroup <- function(g) {
if (is.null(g$cores)) {g$cores = 1}
g$runtime = g$runtime * ORIGINAL_UNITS_TO_RUNTIME_UNITS g$runtime = g$runtime * ORIGINAL_UNITS_TO_RUNTIME_UNITS
g$startTime = g$startTime * ORIGINAL_UNITS_TO_RUNTIME_UNITS g$startTime = g$startTime * ORIGINAL_UNITS_TO_RUNTIME_UNITS
g$doneTime = g$doneTime * ORIGINAL_UNITS_TO_RUNTIME_UNITS g$doneTime = g$doneTime * ORIGINAL_UNITS_TO_RUNTIME_UNITS
...@@ -195,7 +233,8 @@ mergeScattersForAnalysis <- function(table) { ...@@ -195,7 +233,8 @@ mergeScattersForAnalysis <- function(table) {
intermediate = intermediate[1], intermediate = intermediate[1],
startTime = min(startTime), startTime = min(startTime),
doneTime = min(startTime) + sum(runtime), doneTime = min(startTime) + sum(runtime),
runtime = sum(runtime)) runtime = sum(runtime),
cores = min(cores))
} }
mergeScatters <- function(report) { mergeScatters <- function(report) {
...@@ -218,18 +257,28 @@ if ( ! is.na(outputPDF) ) { ...@@ -218,18 +257,28 @@ if ( ! is.na(outputPDF) ) {
plotJobsGantt(gatkReportData, T, "All jobs, by analysis, by start time", F) plotJobsGantt(gatkReportData, T, "All jobs, by analysis, by start time", F)
plotJobsGantt(gatkReportData, F, "All jobs, sorted by start time", F) plotJobsGantt(gatkReportData, F, "All jobs, sorted by start time", F)
plotProgressByTime(gatkReportData) plotProgressByTime(gatkReportData)
plotCoresByTime(gatkReportData)
# plots summarizing overall costs, merging scattered counts # plots summarizing overall costs, merging scattered counts
merged.by.scatter = mergeScatters(gatkReportData) merged.by.scatter = mergeScatters(gatkReportData)
plotJobsGantt(merged.by.scatter, F, "Jobs merged by scatter by start time", T) plotJobsGantt(merged.by.scatter, F, "Jobs merged by scatter by start time", T)
merged.as.df = do.call(rbind.data.frame, merged.by.scatter)[,c("analysisName", "runtime")] merged.as.df = do.call(rbind.data.frame, merged.by.scatter)[,c("analysisName", "runtime", "cores")]
merged.as.df$cputime = merged.as.df$runtime * merged.as.df$cores
merged.as.df$percent = merged.as.df$runtime / sum(merged.as.df$runtime) * 100 merged.as.df$percent = merged.as.df$runtime / sum(merged.as.df$runtime) * 100
merged.as.df.formatted = data.frame(analysisName=merged.as.df$analysisName,runtime=prettyNum(merged.as.df$runtime), percent=prettyNum(merged.as.df$percent,digits=2)) merged.as.df$percentCpu = merged.as.df$cputime / sum(merged.as.df$cputime) * 100
textplot(merged.as.df.formatted[order(merged.as.df$runtime),], show.rownames=F) merged.as.df.formatted = data.frame(
analysisName=merged.as.df$analysisName,
walltime=prettyNum(merged.as.df$runtime),
percent=prettyNum(merged.as.df$percent,digits=2),
cores=merged.as.df$cores,
cputime=prettyNum(merged.as.df$cputime),
percentCpu=prettyNum(merged.as.df$percentCpu,digits=2))
textplot(merged.as.df.formatted[order(merged.as.df$percentCpu),], show.rownames=F)
title("Total runtime for each analysis") title("Total runtime for each analysis")
plotTimeByHost(gatkReportData) #plotTimeByHost(gatkReportData)
for ( group in gatkReportData ) { for ( group in gatkReportData ) {
#print(group) #print(group)
plotGroup(group) plotGroup(group)
......
package nl.lumc.sasc.biopet.core.jobreport
import java.io.{File, PrintStream}
import nl.lumc.sasc.biopet.utils.{AbstractOptParser, ToolCommand}
import org.broadinstitute.gatk.utils.report.GATKReport
import scala.collection.JavaConversions._
/**
* Created by pjvanthof on 25/07/2017.
*/
object MergeQueueJobReports extends ToolCommand {
case class Args(inputFiles: List[File] = Nil, outputfile: File = null)
class OptParser extends AbstractOptParser[Args](commandName) {
opt[File]('I', "inputFile") unbounded () required () valueName "<file>" action { (x, c) =>
c.copy(inputFiles = x :: c.inputFiles)
} text "Output directory of the pipeline"
opt[File]('o', "outputFile") unbounded () required () maxOccurs 1 valueName "<file>" action {
(x, c) =>
c.copy(outputfile = x)
} text "Output directory of this tool"
}
def main(args: Array[String]): Unit = {
logger.info("Start")
val argsParser = new OptParser
val cmdArgs
: Args = argsParser.parse(args, Args()) getOrElse (throw new IllegalArgumentException)
val newReport = new GATKReport
val reports = cmdArgs.inputFiles.map(new GATKReport(_))
val tableNames = reports.flatMap(_.getTables.map(_.getTableName)).distinct
for (name <- tableNames; report <- reports if report.hasTable(name)) {
val table = report.getTable(name)
if (newReport.hasTable(name)) newReport.getTable(name).concat(table)
else newReport.addTable(table)
}
val writer = new PrintStream(cmdArgs.outputfile)
newReport.print(writer)
writer.close()
logger.info("Done")
}
}
package nl.lumc.sasc.biopet.extensions.stringtie
import java.io.File
import nl.lumc.sasc.biopet.core.{BiopetCommandLineFunction, Reference, Version}
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{Input, Output}
import scala.util.matching.Regex
class Stringtie(val parent: Configurable)
extends BiopetCommandLineFunction
with Reference
with Version {
executable = config("exe", "stringtie")
@Input(required = true)
var inputBam: File = _
@Input(required = false)
var referenceGtf: Option[File] = None
@Output
var outputGtf: File = _
@Output
var geneAbundances: Option[File] = None
@Output
var referenceCoverage: Option[File] = None
var rf: Boolean = config("rf", default = false)
var fr: Boolean = config("fr", default = false)
var v: Boolean = config("v", default = logger.isDebugEnabled)
var l: Option[String] = None
var f: Option[Double] = config("f")
var m: Option[Int] = config("m")
var a: Option[Int] = config("a")
var j: Option[Float] = config("j")
var t: Boolean = config("t", default = false)
var c: Option[Float] = config("c")
var g: Option[Int] = config("g")
var B: Boolean = config("B", default = false)
var b: Option[String] = config("b")
var e: Boolean = config("e", default = false)
var M: Option[Float] = config("M")
var x: List[String] = config("x", default = Nil)
/** Command to get version of executable */
def versionCommand: String = executable + " --version"
/** Regex to get version from version command output */
def versionRegex: Regex = "(.*)".r
def cmdLine: String =
required(executable) +
required(inputBam) +
conditional(v, "-v") +
required("-p", threads) +
conditional(rf, "--rf") +
conditional(fr, "--fr") +
optional("-l", l) +
optional("-f", f) +
optional("-m", m) +
optional("-A", geneAbundances) +
optional("-C", referenceCoverage) +
optional("-a", a) +
optional("-j", j) +
conditional(t, "-t") +
optional("-c", c) +
optional("-g", g) +
conditional(B, "-B") +
optional("-b", b) +
conditional(e, "-e") +
optional("-M", M) +
optional("-G", referenceGtf) +
(if (x.nonEmpty) optional("-x", x.mkString(",")) else "") +
(if (outputAsStdout) "" else required("-o", outputGtf))
}
package nl.lumc.sasc.biopet.extensions.stringtie
import java.io.File
import nl.lumc.sasc.biopet.core.{BiopetCommandLineFunction, Reference, Version}
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{Input, Output}
import scala.util.matching.Regex
class StringtieMerge(val parent: Configurable)
extends BiopetCommandLineFunction
with Reference
with Version {
executable = config("exe", "stringtie")
@Input(required = true)
var inputGtfs: List[File] = Nil
@Input(required = false)
var referenceGtf: Option[File] = None
@Output
var outputGtf: File = _
var v: Boolean = config("v", default = logger.isDebugEnabled)
var l: Option[String] = None
var f: Option[Double] = config("f")
var m: Option[Int] = config("m")
var c: Option[Float] = config("c")
var F: Option[Double] = config("F")
var T: Option[Double] = config("T")
var i: Boolean = config("i", default = false)
/** Command to get version of executable */
def versionCommand: String = executable + " --version"
/** Regex to get version from version command output */
def versionRegex: Regex = "(.*)".r
def cmdLine: String =
required(executable) +
required("--merge") +
conditional(v, "-v") +
required("-p", threads) +
optional("-l", l) +
optional("-f", f) +
optional("-m", m) +
optional("-c", c) +
optional("-F", F) +
conditional(i, "-i") +
optional("-G", referenceGtf) +
(if (outputAsStdout) "" else required("-o", outputGtf)) +
repeat(inputGtfs)
}
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
*/ */
package nl.lumc.sasc.biopet package nl.lumc.sasc.biopet
import nl.lumc.sasc.biopet.core.jobreport.MergeQueueJobReports
import nl.lumc.sasc.biopet.core.pipelinestatus.PipelineStatus import nl.lumc.sasc.biopet.core.pipelinestatus.PipelineStatus
import nl.lumc.sasc.biopet.utils.{BiopetExecutable, MainCommand} import nl.lumc.sasc.biopet.utils.{BiopetExecutable, MainCommand}
...@@ -46,7 +47,8 @@ object BiopetExecutableMain extends BiopetExecutable { ...@@ -46,7 +47,8 @@ object BiopetExecutableMain extends BiopetExecutable {
nl.lumc.sasc.biopet.pipelines.generateindexes.ValidateAnnotations nl.lumc.sasc.biopet.pipelines.generateindexes.ValidateAnnotations
) )
def tools: List[MainCommand] = PipelineStatus :: BiopetToolsExecutable.tools def tools: List[MainCommand] =
MergeQueueJobReports :: PipelineStatus :: BiopetToolsExecutable.tools
def templates: List[MainCommand] = List( def templates: List[MainCommand] = List(
nl.lumc.sasc.biopet.pipelines.mapping.template.MultiSampleMapping, nl.lumc.sasc.biopet.pipelines.mapping.template.MultiSampleMapping,
......
...@@ -165,7 +165,7 @@ case class Stats(generalStats: mutable.Map[String, mutable.Map[Any, Int]] = muta ...@@ -165,7 +165,7 @@ case class Stats(generalStats: mutable.Map[String, mutable.Map[Any, Int]] = muta
sampleDistributions: List[String], sampleDistributions: List[String],
contig: Option[String]): Unit = { contig: Option[String]): Unit = {
outputDir.mkdirs() outputDir.mkdirs()
this.writeToFile(new File(outputDir, "stats.json"), this.writeToFile(new File(outputDir, s"${contig.getOrElse("total")}.json"),
samples, samples,
genotypeFields, genotypeFields,
infoFields, infoFields,
......
...@@ -6,7 +6,7 @@ import java.net.URLClassLoader ...@@ -6,7 +6,7 @@ import java.net.URLClassLoader
import htsjdk.variant.variantcontext.{Genotype, VariantContext} import htsjdk.variant.variantcontext.{Genotype, VariantContext}
import htsjdk.variant.vcf.VCFFileReader import htsjdk.variant.vcf.VCFFileReader
import nl.lumc.sasc.biopet.utils.intervals.{BedRecord, BedRecordList} import nl.lumc.sasc.biopet.utils.intervals.{BedRecord, BedRecordList}
import nl.lumc.sasc.biopet.utils.{FastaUtils, ToolCommand, VcfUtils} import nl.lumc.sasc.biopet.utils._
import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.{SparkConf, SparkContext}
import scala.collection.JavaConversions._ import scala.collection.JavaConversions._
...@@ -108,6 +108,19 @@ object VcfStats extends ToolCommand { ...@@ -108,6 +108,19 @@ object VcfStats extends ToolCommand {
Await.result(totalStats, Duration.Inf) Await.result(totalStats, Duration.Inf)
val completeStatsJson = regions
.flatMap(_.map(_.chr))
.foldLeft(ConfigUtils.fileToConfigMap(new File(cmdArgs.outputDir, "total.json"))) {
case (map, contig) =>
val contigMap = ConfigUtils.fileToConfigMap(
new File(cmdArgs.outputDir,
"contigs" + File.separator + contig + File.separator + s"$contig.json"))
ConfigUtils.mergeMaps(map, contigMap)
}
IoUtils.writeLinesToFile(new File(cmdArgs.outputDir, "stats.json"),
ConfigUtils.mapToJson(completeStatsJson).nospaces :: Nil)
sc.stop sc.stop
logger.info("Done") logger.info("Done")
} }
......
...@@ -284,44 +284,6 @@ class VcfStatsTest extends TestNGSuite with Matchers { ...@@ -284,44 +284,6 @@ class VcfStatsTest extends TestNGSuite with Matchers {
general.get("NotFiltered") shouldEqual Some(1) general.get("NotFiltered") shouldEqual Some(1)
general.get("Symbolic") shouldEqual Some(0) general.get("Symbolic") shouldEqual Some(0)
general.get("SimpleInsertion") shouldEqual Some(1) general.get("SimpleInsertion") shouldEqual Some(1)
val total = generalStats
total.get("SampleDistribution-NonInformative") shouldEqual Some(Map(0 -> 1))
total.get("SampleDistribution-Called") shouldEqual Some(Map(3 -> 1))
total.get("SampleDistribution-Mixed") shouldEqual Some(Map(0 -> 1))
total.get("SampleDistribution-Hom") shouldEqual Some(Map(1 -> 1))
total.get("SampleDistribution-HomRef") shouldEqual Some(Map(1 -> 1))
total.get("SampleDistribution-Available") shouldEqual Some(Map(3 -> 1))
total.get("QUAL") shouldEqual Some(Map(1541 -> 1))
total.get("SampleDistribution-HetNonRef") shouldEqual Some(Map(0 -> 1))
total.get("SampleDistribution-Het") shouldEqual Some(Map(2 -> 1))
total.get("SampleDistribution-NoCall") shouldEqual Some(Map(0 -> 1))
total.get("SampleDistribution-Filtered") shouldEqual Some(Map(0 -> 1))
total.get("SampleDistribution-HomVar") shouldEqual Some(Map(0 -> 1))
total.get("SampleDistribution-Variant") shouldEqual Some(Map(2 -> 1))
generalStats.get("general") should not be empty
val totGeneral = generalStats("general")
totGeneral.get("PolymorphicInSamples") shouldEqual Some(1)
totGeneral.get("ComplexIndel") shouldEqual Some(0)
totGeneral.get("FullyDecoded") shouldEqual Some(0)
totGeneral.get("PointEvent") shouldEqual Some(0)
totGeneral.get("MNP") shouldEqual Some(0)
totGeneral.get("Indel") shouldEqual Some(1)
totGeneral.get("Biallelic") shouldEqual Some(1)
totGeneral.get("SimpleDeletion") shouldEqual Some(0)
totGeneral.get("Variant") shouldEqual Some(1)
totGeneral.get("SymbolicOrSV") shouldEqual Some(0)
totGeneral.get("MonomorphicInSamples") shouldEqual Some(0)
totGeneral.get("SNP") shouldEqual Some(0)
totGeneral.get("Filtered") shouldEqual Some(0)
totGeneral.get("StructuralIndel") shouldEqual Some(0)
totGeneral.get("Total") shouldEqual Some(1)
totGeneral.get("Mixed") shouldEqual Some(0)
totGeneral.get("NotFiltered") shouldEqual Some(1)
totGeneral.get("Symbolic") shouldEqual Some(0)
totGeneral.get("SimpleInsertion") shouldEqual Some(1)
} }
@Test @Test
...@@ -354,29 +316,5 @@ class VcfStatsTest extends TestNGSuite with Matchers { ...@@ -354,29 +316,5 @@ class VcfStatsTest extends TestNGSuite with Matchers {
general.get("Available") shouldEqual Some(1) general.get("Available") shouldEqual Some(1)
general.get("Het") shouldEqual Some(1) general.get("Het") shouldEqual Some(1)
general.get("HetNonRef") shouldEqual Some(0) general.get("HetNonRef") shouldEqual Some(0)
val total = genotypeStats
total.get("GQ") shouldEqual Some(Map(99 -> 1))
total.get("AD") shouldEqual Some(Map(24 -> 1, 21 -> 1))
total.get("AD-used") shouldEqual Some(Map(24 -> 1, 21 -> 1))
total.get("DP") shouldEqual Some(Map(45 -> 1))
total.get("AD-alt") shouldEqual Some(Map(21 -> 1))
total.get("AD-ref") shouldEqual Some(Map(24 -> 1))
total.get("general") should not be empty
val totGeneral = genotypeStats("general")
totGeneral.get("Hom") shouldEqual Some(0)
totGeneral.get("NoCall") shouldEqual Some(0)
totGeneral.get("Variant") shouldEqual Some(1)
totGeneral.get("Filtered") shouldEqual Some(0)
totGeneral.get("NonInformative") shouldEqual Some(0)
totGeneral.get("Called") shouldEqual Some(1)
totGeneral.get("Total") shouldEqual Some(1)
totGeneral.get("HomVar") shouldEqual Some(0)
totGeneral.get("HomRef") shouldEqual Some(0)
totGeneral.get("Mixed") shouldEqual Some(0)
totGeneral.get("Available") shouldEqual Some(1)
totGeneral.get("Het") shouldEqual Some(1)
totGeneral.get("HetNonRef") shouldEqual Some(0)
} }
} }
...@@ -148,6 +148,7 @@ class Gentrap(val parent: Configurable) ...@@ -148,6 +148,7 @@ class Gentrap(val parent: Configurable)
cufflinksBlind.foreach(validate.gtfFile :+= _.annotationGtf) cufflinksBlind.foreach(validate.gtfFile :+= _.annotationGtf)
cufflinksGuided.foreach(validate.gtfFile :+= _.annotationGtf) cufflinksGuided.foreach(validate.gtfFile :+= _.annotationGtf)
cufflinksStrict.foreach(validate.gtfFile :+= _.annotationGtf) cufflinksStrict.foreach(validate.gtfFile :+= _.annotationGtf)
stringtie.foreach(validate.gtfFile :+= _.annotationGtf)
validate.jobOutputFile = new File(outputDir, ".validate.annotation.out") validate.jobOutputFile = new File(outputDir, ".validate.annotation.out")
add(validate) add(validate)
...@@ -169,6 +170,11 @@ class Gentrap(val parent: Configurable) ...@@ -169,6 +170,11 @@ class Gentrap(val parent: Configurable)
Some(new BaseCounts(this)) Some(new BaseCounts(this))
else None else None
lazy val stringtie: Option[Stringtie] =
if (expMeasures().contains(ExpMeasures.Stringtie))
Some(new Stringtie(this))
else None
lazy val cufflinksBlind: Option[CufflinksBlind] = lazy val cufflinksBlind: Option[CufflinksBlind] =
if (expMeasures().contains(ExpMeasures.CufflinksBlind)) if (expMeasures().contains(ExpMeasures.CufflinksBlind))
Some(new CufflinksBlind(this)) Some(new CufflinksBlind(this))
...@@ -186,7 +192,7 @@ class Gentrap(val parent: Configurable) ...@@ -186,7 +192,7 @@ class Gentrap(val parent: Configurable)
def executedMeasures: List[QScript with Measurement] = def executedMeasures: List[QScript with Measurement] =
(fragmentsPerGene :: baseCounts :: cufflinksBlind :: (fragmentsPerGene :: baseCounts :: cufflinksBlind ::
cufflinksGuided :: cufflinksStrict :: Nil).flatten cufflinksGuided :: cufflinksStrict :: stringtie :: Nil).flatten
/** Whether to do simple variant calling on RNA or not */ /** Whether to do simple variant calling on RNA or not */
lazy val shivaVariantcalling: Option[ShivaVariantcalling] = lazy val shivaVariantcalling: Option[ShivaVariantcalling] =
...@@ -299,7 +305,8 @@ object Gentrap extends PipelineCommand { ...@@ -299,7 +305,8 @@ object Gentrap extends PipelineCommand {
/** Enumeration of available expression measures */ /** Enumeration of available expression measures */
object ExpMeasures extends Enumeration { object ExpMeasures extends Enumeration {
val FragmentsPerGene, BaseCounts, CufflinksStrict, CufflinksGuided, CufflinksBlind = Value val FragmentsPerGene, BaseCounts, CufflinksStrict, CufflinksGuided, CufflinksBlind, Stringtie =
Value
} }
/** Enumeration of available strandedness */ /** Enumeration of available strandedness */
......
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.gentrap.measures
import nl.lumc.sasc.biopet.core.annotations.AnnotationGtf
import nl.lumc.sasc.biopet.extensions.stringtie.{StringtieMerge, Stringtie => StringtieTool}
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
/**
* Created by pjvan_thof on 1/12/16.
*/