Commit a464f561 authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Merge branch 'develop' of git.lumc.nl:biopet/biopet into feature-gears

parents 41eade8b 4258ac1b
......@@ -5,13 +5,15 @@
*/
package nl.lumc.sasc.biopet.extensions.gatk.broad
import nl.lumc.sasc.biopet.core.{ Version, CommandLineResources, Reference, BiopetJavaCommandLineFunction }
import nl.lumc.sasc.biopet.core._
import org.broadinstitute.gatk.engine.phonehome.GATKRunReport
import org.broadinstitute.gatk.queue.extensions.gatk.CommandLineGATK
trait GatkGeneral extends CommandLineGATK with CommandLineResources with Reference with Version {
memoryLimit = Option(3)
var executable: String = config("java", default = "java", submodule = "java", freeVar = false)
override def subPath = "gatk" :: super.subPath
jarFile = config("gatk_jar")
......@@ -20,6 +22,7 @@ trait GatkGeneral extends CommandLineGATK with CommandLineResources with Referen
override def defaultCoreMemory = 4.0
override def faiRequired = true
override def dictRequired = true
if (config.contains("intervals")) intervals = config("intervals").asFileList
if (config.contains("exclude_intervals")) excludeIntervals = config("exclude_intervals").asFileList
......@@ -39,5 +42,8 @@ trait GatkGeneral extends CommandLineGATK with CommandLineResources with Referen
override def versionExitcode = List(0, 1)
def versionCommand = "java" + " -jar " + jarFile + " -version"
override def getVersion = super.getVersion.collect { case v => "Gatk " + v }
override def getVersion = {
BiopetCommandLineFunction.preProcessExecutable(executable).path.foreach(executable = _)
super.getVersion.collect { case v => "Gatk " + v }
}
}
......@@ -10,7 +10,7 @@ import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
class GenotypeGVCFs(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.GenotypeGVCFs with GatkGeneral {
annotation ++= config("annotation", default = Seq("FisherStrand", "QualByDepth", "ChromosomeCounts")).asStringList
annotation ++= config("annotation", default = Seq(), freeVar = false).asStringList
if (config.contains("dbsnp")) dbsnp = config("dbsnp")
if (config.contains("scattercount", "genotypegvcfs")) scatterCount = config("scattercount")
......
......@@ -44,12 +44,13 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers {
bams <- 0 to 2;
raw <- bool;
bcftools <- bool;
bcftools_singlesample <- bool;
haplotypeCallerGvcf <- bool;
haplotypeCallerAllele <- bool;
unifiedGenotyperAllele <- bool;
unifiedGenotyper <- bool;
haplotypeCaller <- bool
) yield Array[Any](bams, raw, bcftools, unifiedGenotyper, haplotypeCaller, haplotypeCallerGvcf, haplotypeCallerAllele, unifiedGenotyperAllele)
) yield Array[Any](bams, raw, bcftools, bcftools_singlesample, unifiedGenotyper, haplotypeCaller, haplotypeCallerGvcf, haplotypeCallerAllele, unifiedGenotyperAllele)
).toArray
}
......@@ -57,6 +58,7 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers {
def testShivaVariantcalling(bams: Int,
raw: Boolean,
bcftools: Boolean,
bcftools_singlesample: Boolean,
unifiedGenotyper: Boolean,
haplotypeCaller: Boolean,
haplotypeCallerGvcf: Boolean,
......@@ -65,6 +67,7 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers {
val callers: ListBuffer[String] = ListBuffer()
if (raw) callers.append("raw")
if (bcftools) callers.append("bcftools")
if (bcftools_singlesample) callers.append("bcftools_singlesample")
if (unifiedGenotyper) callers.append("unifiedgenotyper")
if (haplotypeCallerGvcf) callers.append("haplotypecaller_gvcf")
if (haplotypeCallerAllele) callers.append("haplotypecaller_allele")
......@@ -78,7 +81,8 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers {
val illegalArgumentException = pipeline.inputBams.isEmpty ||
(!raw && !bcftools &&
!haplotypeCaller && !unifiedGenotyper &&
!haplotypeCallerGvcf && !haplotypeCallerAllele && !unifiedGenotyperAllele)
!haplotypeCallerGvcf && !haplotypeCallerAllele && !unifiedGenotyperAllele &&
!bcftools_singlesample)
if (illegalArgumentException) intercept[IllegalArgumentException] {
pipeline.script()
......@@ -90,7 +94,7 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers {
pipeline.functions.count(_.isInstanceOf[CombineVariants]) shouldBe 1 + (if (raw) 1 else 0)
//pipeline.functions.count(_.isInstanceOf[Bcftools]) shouldBe (if (bcftools) 1 else 0)
//FIXME: Can not check for bcftools because of piping
pipeline.functions.count(_.isInstanceOf[MpileupToVcf]) shouldBe (if (raw) bams else 0)
//pipeline.functions.count(_.isInstanceOf[MpileupToVcf]) shouldBe (if (raw) bams else 0)
pipeline.functions.count(_.isInstanceOf[VcfFilter]) shouldBe (if (raw) bams else 0)
pipeline.functions.count(_.isInstanceOf[HaplotypeCaller]) shouldBe (if (haplotypeCaller) 1 else 0) +
(if (haplotypeCallerAllele) 1 else 0) + (if (haplotypeCallerGvcf) bams else 0)
......
......@@ -26,7 +26,6 @@ import org.ggf.drmaa.JobTemplate
import scala.collection.mutable
import scala.io.Source
import scala.sys.process.{ Process, ProcessLogger }
import scala.util.matching.Regex
import scala.collection.JavaConversions._
/** Biopet command line trait to auto check executable and cluster values */
......@@ -50,20 +49,18 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
val writer = new PrintWriter(file)
writer.println("set -eubf")
writer.println("set -o pipefail")
lines.foreach(writer.println(_))
lines.foreach(writer.println)
writer.close()
}
// This overrides the default "sh" from queue. For Biopet the default is "bash"
updateJobRun = {
case jt: JobTemplate => {
case jt: JobTemplate =>
changeScript(new File(jt.getArgs.head.toString))
jt.setRemoteCommand(remoteCommand)
}
case ps: ProcessSettings => {
case ps: ProcessSettings =>
changeScript(new File(ps.getCommand.tail.head))
ps.setCommand(Array(remoteCommand) ++ ps.getCommand.tail)
}
}
/**
......@@ -91,50 +88,19 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
}
/** can override this value is executable may not be converted to CanonicalPath */
/**
* Can override this value is executable may not be converted to CanonicalPath
* @deprecated
*/
val executableToCanonicalPath = true
/**
* Checks executable. Follow full CanonicalPath, checks if it is existing and do a md5sum on it to store in job report
*/
protected[core] def preProcessExecutable() {
if (!BiopetCommandLineFunction.executableMd5Cache.contains(executable)) {
if (executable != null) {
if (!BiopetCommandLineFunction.executableCache.contains(executable)) {
try {
val oldExecutable = executable
val buffer = new StringBuffer()
val cmd = Seq("which", executable)
val process = Process(cmd).run(ProcessLogger(buffer.append(_)))
if (process.exitValue == 0) {
executable = buffer.toString
val file = new File(executable)
if (executableToCanonicalPath) executable = file.getCanonicalPath
else executable = file.getAbsolutePath
} else Logging.addError("executable: '" + executable + "' not found, please check config")
BiopetCommandLineFunction.executableCache += oldExecutable -> executable
BiopetCommandLineFunction.executableCache += executable -> executable
} catch {
case ioe: java.io.IOException =>
logger.warn(s"Could not use 'which' on '$executable', check on executable skipped: " + ioe)
}
} else executable = BiopetCommandLineFunction.executableCache(executable)
if (!BiopetCommandLineFunction.executableMd5Cache.contains(executable)) {
if (new File(executable).exists()) {
val is = new FileInputStream(executable)
val cnt = is.available
val bytes = Array.ofDim[Byte](cnt)
is.read(bytes)
is.close()
val temp = MessageDigest.getInstance("MD5").digest(bytes).map("%02X".format(_)).mkString.toLowerCase
BiopetCommandLineFunction.executableMd5Cache += executable -> temp
} else BiopetCommandLineFunction.executableMd5Cache += executable -> "file_does_not_exist"
}
}
}
val md5 = BiopetCommandLineFunction.executableMd5Cache.get(executable)
addJobReportBinding("md5sum_exe", md5.getOrElse("None"))
val exe = BiopetCommandLineFunction.preProcessExecutable(executable)
exe.path.foreach(executable = _)
addJobReportBinding("md5sum_exe", exe.md5.getOrElse("N/A"))
}
/** executes checkExecutable method and fill job report */
......@@ -166,10 +132,9 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
that.beforeGraph()
that.internalBeforeGraph()
this match {
case p: BiopetPipe => {
case p: BiopetPipe =>
p.commands.last._outputAsStdout = true
new BiopetPipe(p.commands ::: that :: Nil)
}
case _ => new BiopetPipe(List(this, that))
}
}
......@@ -230,7 +195,48 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
}
/** stores global caches */
object BiopetCommandLineFunction {
object BiopetCommandLineFunction extends Logging {
private[core] val executableMd5Cache: mutable.Map[String, String] = mutable.Map()
private[core] val executableCache: mutable.Map[String, String] = mutable.Map()
case class Executable(path: Option[String], md5: Option[String])
def preProcessExecutable(executable: String): Executable = {
if (!BiopetCommandLineFunction.executableMd5Cache.contains(executable)) {
if (executable != null) {
if (!BiopetCommandLineFunction.executableCache.contains(executable)) {
try {
val buffer = new StringBuffer()
val cmd = Seq("which", executable)
val process = Process(cmd).run(ProcessLogger(buffer.append(_)))
if (process.exitValue == 0) {
val file = new File(buffer.toString)
BiopetCommandLineFunction.executableCache += executable -> file.getAbsolutePath
} else {
Logging.addError("executable: '" + executable + "' not found, please check config")
BiopetCommandLineFunction.executableCache += executable -> executable
}
} catch {
case ioe: java.io.IOException =>
logger.warn(s"Could not use 'which' on '$executable', check on executable skipped: " + ioe)
BiopetCommandLineFunction.executableCache += executable -> executable
}
}
if (!BiopetCommandLineFunction.executableMd5Cache.contains(executable)) {
val newExe = BiopetCommandLineFunction.executableCache(executable)
if (new File(newExe).exists()) {
val is = new FileInputStream(newExe)
val cnt = is.available
val bytes = Array.ofDim[Byte](cnt)
is.read(bytes)
is.close()
val temp = MessageDigest.getInstance("MD5").digest(bytes).map("%02X".format(_)).mkString.toLowerCase
BiopetCommandLineFunction.executableMd5Cache += newExe -> temp
} else BiopetCommandLineFunction.executableMd5Cache += newExe -> "file_does_not_exist"
}
}
}
Executable(BiopetCommandLineFunction.executableCache.get(executable),
BiopetCommandLineFunction.executableMd5Cache.get(executable))
}
}
......@@ -21,9 +21,9 @@ import org.broadinstitute.gatk.queue.function.JavaCommandLineFunction
trait BiopetJavaCommandLineFunction extends JavaCommandLineFunction with BiopetCommandLineFunction {
executable = config("java", default = "java", submodule = "java", freeVar = false)
javaGCThreads = config("java_gc_threads")
javaGCHeapFreeLimit = config("java_gc_heap_freelimit")
javaGCTimeLimit = config("java_gc_timelimit")
javaGCThreads = config("java_gc_threads", default = 4)
javaGCHeapFreeLimit = config("java_gc_heap_freelimit", default = 10)
javaGCTimeLimit = config("java_gc_timelimit", default = 50)
override def defaultVmemFactor: Double = 2.0
......@@ -38,8 +38,6 @@ trait BiopetJavaCommandLineFunction extends JavaCommandLineFunction with BiopetC
if (javaMainClass != null && javaClasspath.isEmpty)
javaClasspath = JavaCommandLineFunction.currentClasspath
//threads = getThreads(defaultThreads)
}
/** Creates command to execute extension */
......
......@@ -28,7 +28,7 @@ trait CommandLineResources extends CommandLineFunction with Configurable {
var residentFactor: Double = config("resident_factor", default = 1.2)
private var _coreMemory: Double = 2.0
def coreMemeory = _coreMemory
def coreMemory = _coreMemory
var retry = 0
......@@ -91,7 +91,7 @@ trait CommandLineResources extends CommandLineFunction with Configurable {
commands.foreach(_.setResources())
nCoresRequest = Some(commands.map(_.threads).sum + threadsCorrection)
_coreMemory = commands.map(cmd => cmd.coreMemeory * (cmd.threads.toDouble / threads.toDouble)).sum
_coreMemory = commands.map(cmd => cmd.coreMemory * (cmd.threads.toDouble / threads.toDouble)).sum
memoryLimit = Some(_coreMemory * threads)
residentLimit = Some((_coreMemory + (0.5 * retry)) * residentFactor)
vmem = Some((_coreMemory * (vmemFactor + (0.5 * retry))) + "G")
......
......@@ -54,8 +54,6 @@ object Version extends Logging {
if (versionCache.contains(versionCommand)) return versionCache.get(versionCommand)
else if (versionCommand == null || versionRegex == null) return None
else {
val exe = new File(versionCommand.trim.split(" ")(0))
if (!exe.exists()) return None
val stdout = new StringBuffer()
val stderr = new StringBuffer()
def outputLog = "Version command: \n" + versionCommand +
......
......@@ -35,10 +35,9 @@ class Tabix(val root: Configurable) extends BiopetCommandLineFunction with Versi
@Output(doc = "Output (for region query)", required = false)
var outputQuery: File = null
@Output(doc = "Output (for indexing)", required = false) // NOTE: it's a def since we can't change the index name ~ it's always input_name + .tbi
lazy val outputIndex: File = {
require(input != null, "Input must be defined")
new File(input.toString + ".tbi")
def outputIndex: File = {
require(input != null, "Input should be defined")
new File(input.getAbsolutePath + ".tbi")
}
@Argument(doc = "Regions to query", required = false)
......@@ -70,7 +69,8 @@ class Tabix(val root: Configurable) extends BiopetCommandLineFunction with Versi
p match {
case Some(fmt) =>
require(validFormats.contains(fmt), "-p flag must be one of " + validFormats.mkString(", "))
case None => ;
outputFiles :+= outputIndex
case None =>
}
}
......@@ -96,3 +96,19 @@ class Tabix(val root: Configurable) extends BiopetCommandLineFunction with Versi
else baseCommand
}
}
object Tabix {
def apply(root: Configurable, input: File) = {
val tabix = new Tabix(root)
tabix.input = input
tabix.p = tabix.input.getName match {
case s if s.endsWith(".vcf.gz") => Some("vcf")
case s if s.endsWith(".bed.gz") => Some("bed")
case s if s.endsWith(".sam.gz") => Some("sam")
case s if s.endsWith(".gff.gz") => Some("gff")
case s if s.endsWith(".psltbl.gz") => Some("psltbl")
case _ => throw new IllegalArgumentException("Unknown file type")
}
tabix
}
}
......@@ -29,6 +29,8 @@ class WigToBigWig(val root: Configurable) extends BiopetCommandLineFunction {
@Input(doc = "Input wig file")
var inputWigFile: File = _
override def defaultCoreMemory = 3.0
@Input(doc = "Input chrom sizes file", required = true)
var inputChromSizesFile: File = _
......
......@@ -22,28 +22,66 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/** This extension is based on bcftools 1.1-134 */
class BcftoolsCall(val root: Configurable) extends Bcftools {
@Input(doc = "Input File")
@Input(doc = "Input File", required = false)
var input: File = _
@Output(doc = "output File")
@Output(doc = "output File", required = false)
var output: File = _
var O: String = null
var v: Boolean = config("v", default = true)
var O: Option[String] = None
var v: Boolean = config("v", default = false)
var c: Boolean = config("c", default = false)
var m: Boolean = config("m", default = false)
var r: Option[String] = config("r")
@Input(required = false)
var R: Option[File] = config("R")
var s: Option[String] = config("s")
@Input(required = false)
var S: Option[File] = config("S")
var t: Option[String] = config("t")
@Input(required = false)
var T: Option[File] = config("T")
var A: Boolean = config("A", default = false)
var f: List[String] = config("f", default = Nil)
var g: Option[Int] = config("g")
var i: Boolean = config("i", default = false)
var M: Boolean = config("M", default = false)
var V: Option[String] = config("V")
var C: Option[String] = config("C")
var n: Option[Float] = config("n")
var p: Option[Float] = config("p")
var P: Option[Float] = config("P")
var X: Boolean = config("X", default = false)
var Y: Boolean = config("Y", default = false)
override def beforeGraph(): Unit = {
require(c != m)
}
def cmdBase = required(executable) +
def cmdLine = required(executable) +
required("call") +
optional("-O", O) +
conditional(v, "-v") +
conditional(c, "-c") +
conditional(m, "-m")
def cmdPipeInput = cmdBase + "-"
def cmdPipe = cmdBase + input
def cmdLine = cmdPipe + " > " + required(output)
conditional(m, "-m") +
optional("-r", r) +
optional("-R", R) +
optional("-s", s) +
optional("-S", S) +
optional("-t", t) +
optional("-T", T) +
conditional(A, "-A") +
repeat("-f", f) +
optional("-g", g) +
conditional(i, "-i") +
conditional(M, "-M") +
optional("-V", V) +
optional("-C", C) +
optional("-n", n) +
optional("-p", p) +
optional("-P", P) +
conditional(X, "-X") +
conditional(Y, "-Y") +
(if (outputAsStsout) "" else required("-o", output)) +
(if (inputAsStdin) "-" else required(input))
}
package nl.lumc.sasc.biopet.extensions.bcftools
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
/**
* Created by sajvanderzeeuw on 16-10-15.
*/
class BcftoolsMerge(val root: Configurable) extends Bcftools {
@Input(doc = "Input File", required = true)
var input: List[File] = Nil
@Output(doc = "output File", required = false)
var output: File = _
@Input(required = false)
var R: Option[File] = config("R")
@Input(required = false)
var useheader: Option[File] = config("useheader")
@Input(required = false)
var l: Option[File] = config("l")
var forcesamples: Boolean = config("forcesamples", default = false)
var printheader: Boolean = config("printheader", default = false)
var f: List[String] = config("f", default = Nil)
var i: List[String] = config("i", default = Nil)
var m: Option[String] = config("m")
var O: Option[String] = config("O")
var r: List[String] = config("r", default = Nil)
def cmdLine = required(executable) +
required("merge") +
(if (outputAsStsout) "" else required("-o", output)) +
conditional(forcesamples, "--force-samples") +
conditional(printheader, "--print-header") +
optional("--use-header", useheader) +
optional("-f", f) +
optional("-i", i) +
optional("-l", l) +
optional("-m", m) +
optional("-O", O) +
optional("-r", r) +
optional("-R", R) +
repeat(input)
}
......@@ -53,6 +53,8 @@ class CombineVariants(val root: Configurable) extends Gatk {
case Some("UNIQUIFY") | Some("PRIORITIZE") | Some("UNSORTED") | Some("REQUIRE_UNIQUE") | None =>
case _ => throw new IllegalArgumentException("Wrong option for genotypeMergeOptions")
}
deps :::= inputFiles.filter(_.getName.endsWith("vcf.gz")).map(x => new File(x.getAbsolutePath + ".tbi"))
deps = deps.distinct
}
override def cmdLine = super.cmdLine +
......
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.core.summary.Summarizable
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import org.broadinstitute.gatk.utils.report.{ GATKReportTable, GATKReport }
/**
* Extension for CombineVariants from GATK
*
* Created by pjvan_thof on 2/26/15.
*/
class GenotypeConcordance(val root: Configurable) extends Gatk with Summarizable {
val analysisType = "GenotypeConcordance"
@Input(required = true)
var evalFile: File = null
@Input(required = true)
var compFile: File = null
@Output(required = true)
var outputFile: File = null
var moltenize = true
def summaryFiles = Map("output" -> outputFile)
def summaryStats = {
val report = new GATKReport(outputFile)
val compProportions = report.getTable("GenotypeConcordance_CompProportions")
val counts = report.getTable("GenotypeConcordance_Counts")
val evalProportions = report.getTable("GenotypeConcordance_EvalProportions")
val genotypeSummary = report.getTable("GenotypeConcordance_Summary")
val siteSummary = report.getTable("SiteConcordance_Summary")
val samples = for (i <- 0 until genotypeSummary.getNumRows) yield genotypeSummary.get(i, "Sample").toString
def getMap(table: GATKReportTable, column: String) = samples.distinct.map(sample => sample -> {
(for (i <- 0 until table.getNumRows if table.get(i, "Sample") == sample) yield s"${table.get(i, "Eval_Genotype")}__${table.get(i, "Comp_Genotype")}" -> table.get(i, column)).toMap
}).toMap
Map(
"compProportions" -> getMap(compProportions, "Proportion"),
"counts" -> getMap(counts, "Count"),