Commit 4bb140dd authored by Sander Bollen's avatar Sander Bollen
Browse files

Merge branch 'develop' into add-cnmops-kopisu

parents 150690ae 3d9a0df1
......@@ -26,7 +26,7 @@
<parent>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Biopet</artifactId>
<version>0.7.0-SNAPSHOT</version>
<version>0.8.0-SNAPSHOT</version>
<relativePath>../</relativePath>
</parent>
......
......@@ -24,7 +24,7 @@
<parent>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Biopet</artifactId>
<version>0.7.0-SNAPSHOT</version>
<version>0.8.0-SNAPSHOT</version>
<relativePath>../</relativePath>
</parent>
......
......@@ -31,7 +31,7 @@
<parent>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Biopet</artifactId>
<version>0.7.0-SNAPSHOT</version>
<version>0.8.0-SNAPSHOT</version>
<relativePath>../</relativePath>
</parent>
......@@ -54,5 +54,17 @@
<artifactId>Mapping</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
<version>6.8</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_2.10</artifactId>
<version>2.2.1</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>
......@@ -33,10 +33,18 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
def this() = this(null)
case class FastaOutput(variants: File, consensus: File, consensusVariants: File)
case class FastaOutput(variants: File, consensus: File, consensusVariants: File) {
def summaryFiles(prefix: Option[String] = None) = Map(
s"${prefix.map(_ + "_").getOrElse("")}variants_fasta" -> variants,
s"${prefix.map(_ + "_").getOrElse("")}consensus_fasta" -> consensus,
s"${prefix.map(_ + "_").getOrElse("")}consensus_variants_fasta" -> consensusVariants
)
}
def variantcallers = List("unifiedgenotyper")
val numBoot = config("boot_runs", default = 100, namespace = "raxml").asInt
override def defaults = Map(
"ploidy" -> 1,
"variantcallers" -> variantcallers
......@@ -46,28 +54,26 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
def summaryFile: File = new File(outputDir, "Basty.summary.json")
//TODO: Add summary
def summaryFiles: Map[String, File] = Map()
//TODO: Add summary
def summarySettings: Map[String, Any] = Map()
def summarySettings: Map[String, Any] = Map("boot_runs" -> numBoot)
def makeSample(id: String) = new Sample(id)
class Sample(sampleId: String) extends AbstractSample(sampleId) {
//TODO: Add summary
def summaryFiles: Map[String, File] = Map()
def summaryFiles: Map[String, File] = output.summaryFiles() ++ outputSnps.summaryFiles(Some("snps_only"))
//TODO: Add summary
def summaryStats: Map[String, Any] = Map()
override def summarySettings: Map[String, Any] = Map()
def makeLibrary(id: String) = new Library(id)
class Library(libId: String) extends AbstractLibrary(libId) {
//TODO: Add summary
def summaryFiles: Map[String, File] = Map()
//TODO: Add summary
def summaryStats: Map[String, Any] = Map()
override def summarySettings: Map[String, Any] = Map()
protected def addJobs(): Unit = {}
}
......@@ -136,7 +142,6 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
add(raxmlMl)
val r = new scala.util.Random(seed)
val numBoot = config("boot_runs", default = 100, namespace = "raxml").asInt
val bootList = for (t <- 0 until numBoot) yield {
val raxmlBoot = new Raxml(this)
raxmlBoot.input = variants
......
@HD VN:1.4 SO:unsorted
@SQ SN:chr1 LN:9 UR:file:/home/pjvan_thof/pipelines/biopet/public/mapping/src/test/resources/ref.fa M5:fe15dbbd0900310caf32827f6da57550
package nl.lumc.sasc.biopet.pipelines.basty
import java.io.{ File, FileOutputStream }
import com.google.common.io.Files
import nl.lumc.sasc.biopet.extensions.{ Raxml, RunGubbins }
import nl.lumc.sasc.biopet.extensions.gatk.{ BaseRecalibrator, IndelRealigner, PrintReads, RealignerTargetCreator }
import nl.lumc.sasc.biopet.extensions.picard.MarkDuplicates
import nl.lumc.sasc.biopet.extensions.tools.{ BastyGenerateFasta, VcfStats }
import nl.lumc.sasc.biopet.utils.{ ConfigUtils, Logging }
import nl.lumc.sasc.biopet.utils.config.Config
import org.broadinstitute.gatk.queue.QSettings
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.{ DataProvider, Test }
/**
* Created by pjvanthof on 27/09/16.
*/
class BastyTest extends TestNGSuite with Matchers {
def initPipeline(map: Map[String, Any]): Basty = {
new Basty() {
override def configNamespace = "shiva"
override def globalConfig = new Config(ConfigUtils.mergeMaps(map, BastyTest.config))
qSettings = new QSettings
qSettings.runName = "test"
}
}
@DataProvider(name = "bastyOptions")
def bastyOptions = {
for (
s1 <- sample1; s2 <- sample2
) yield Array("", s1, s2)
}
def sample1 = Array(false, true)
def sample2 = Array(false, true)
def realign = true
def baseRecalibration = true
def multisampleCalling: Boolean = true
def sampleCalling = false
def libraryCalling = false
def dbsnp = false
def svCalling = false
def cnvCalling = false
def annotation = false
def bootRuns: Option[Int] = None
@Test(dataProvider = "bastyOptions")
def testBasty(f: String, sample1: Boolean, sample2: Boolean): Unit = {
val map = {
var m: Map[String, Any] = BastyTest.config
if (sample1) m = ConfigUtils.mergeMaps(BastyTest.sample1, m)
if (sample2) m = ConfigUtils.mergeMaps(BastyTest.sample2, m)
if (dbsnp) m = ConfigUtils.mergeMaps(Map("dbsnp" -> "test.vcf.gz"), m)
ConfigUtils.mergeMaps(Map(
"multisample_variantcalling" -> multisampleCalling,
"single_sample_variantcalling" -> sampleCalling,
"library_variantcalling" -> libraryCalling,
"use_indel_realigner" -> realign,
"use_base_recalibration" -> baseRecalibration,
"sv_calling" -> svCalling,
"cnv_calling" -> cnvCalling,
"annotation" -> annotation,
"boot_runs" -> bootRuns), m)
}
if (!sample1 && !sample2) { // When no samples
intercept[IllegalArgumentException] {
initPipeline(map).script()
}
Logging.errors.clear()
} else {
val pipeline = initPipeline(map)
pipeline.script()
val numberLibs = (if (sample1) 1 else 0) + (if (sample2) 2 else 0)
val numberSamples = (if (sample1) 1 else 0) + (if (sample2) 1 else 0)
pipeline.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (numberLibs + (if (sample2) 1 else 0))
// Gatk preprocess
pipeline.functions.count(_.isInstanceOf[IndelRealigner]) shouldBe (numberLibs * (if (realign) 1 else 0) + (if (sample2 && realign) 1 else 0))
pipeline.functions.count(_.isInstanceOf[RealignerTargetCreator]) shouldBe (numberLibs * (if (realign) 1 else 0) + (if (sample2 && realign) 1 else 0))
pipeline.functions.count(_.isInstanceOf[BaseRecalibrator]) shouldBe (if (dbsnp && baseRecalibration) (numberLibs * 2) else 0)
pipeline.functions.count(_.isInstanceOf[PrintReads]) shouldBe (if (dbsnp && baseRecalibration) numberLibs else 0)
pipeline.summarySettings.get("boot_runs") shouldBe Some(bootRuns.getOrElse(100))
pipeline.summaryFile shouldBe new File(BastyTest.outputDir, "Basty.summary.json")
pipeline.summaryFiles shouldBe Map()
pipeline.samples foreach {
case (sampleId, sample) =>
sample.summarySettings shouldBe Map()
sample.summaryFiles.get("variants_fasta") should not be None
sample.summaryFiles.get("consensus_fasta") should not be None
sample.summaryFiles.get("consensus_variants_fasta") should not be None
sample.summaryFiles.get("snps_only_variants_fasta") should not be None
sample.summaryFiles.get("snps_only_consensus_fasta") should not be None
sample.summaryFiles.get("snps_only_consensus_variants_fasta") should not be None
sample.summaryStats shouldBe Map()
sample.libraries.foreach {
case (libId, lib) =>
lib.summarySettings shouldBe Map()
lib.summaryFiles shouldBe Map()
lib.summaryStats shouldBe Map()
}
}
pipeline.functions.count(_.isInstanceOf[VcfStats]) shouldBe (
(if (multisampleCalling) 2 else 0) +
(if (sampleCalling) numberSamples * 2 else 0) +
(if (libraryCalling) numberLibs * 2 else 0))
pipeline.functions.count(_.isInstanceOf[BastyGenerateFasta]) shouldBe 2 + (2 * numberSamples)
pipeline.functions.count(_.isInstanceOf[Raxml]) shouldBe (2 * (2 + bootRuns.getOrElse(100)))
pipeline.functions.count(_.isInstanceOf[RunGubbins]) shouldBe 2
}
}
}
object BastyTest {
val outputDir = Files.createTempDir()
outputDir.deleteOnExit()
new File(outputDir, "input").mkdirs()
def inputTouch(name: String): String = {
val file = new File(outputDir, "input" + File.separator + name)
Files.touch(file)
file.getAbsolutePath
}
private def copyFile(name: String): Unit = {
val is = getClass.getResourceAsStream("/" + name)
val os = new FileOutputStream(new File(outputDir, name))
org.apache.commons.io.IOUtils.copy(is, os)
os.close()
}
copyFile("ref.fa")
copyFile("ref.dict")
copyFile("ref.fa.fai")
val config = Map(
"name_prefix" -> "test",
"cache" -> true,
"dir" -> "test",
"vep_script" -> "test",
"output_dir" -> outputDir,
"reference_fasta" -> (outputDir + File.separator + "ref.fa"),
"gatk_jar" -> "test",
"samtools" -> Map("exe" -> "test"),
"bcftools" -> Map("exe" -> "test"),
"fastqc" -> Map("exe" -> "test"),
"input_alleles" -> "test",
"variantcallers" -> "raw",
"fastqc" -> Map("exe" -> "test"),
"seqtk" -> Map("exe" -> "test"),
"sickle" -> Map("exe" -> "test"),
"cutadapt" -> Map("exe" -> "test"),
"bwa" -> Map("exe" -> "test"),
"samtools" -> Map("exe" -> "test"),
"macs2" -> Map("exe" -> "test"),
"igvtools" -> Map("exe" -> "test", "igvtools_jar" -> "test"),
"wigtobigwig" -> Map("exe" -> "test"),
"md5sum" -> Map("exe" -> "test"),
"bgzip" -> Map("exe" -> "test"),
"tabix" -> Map("exe" -> "test"),
"breakdancerconfig" -> Map("exe" -> "test"),
"breakdancercaller" -> Map("exe" -> "test"),
"pindelconfig" -> Map("exe" -> "test"),
"pindelcaller" -> Map("exe" -> "test"),
"pindelvcf" -> Map("exe" -> "test"),
"clever" -> Map("exe" -> "test"),
"delly" -> Map("exe" -> "test"),
"rungubbins" -> Map("exe" -> "test"),
"raxml" -> Map("exe" -> "test"),
"pysvtools" -> Map(
"exe" -> "test",
"exclusion_regions" -> "test",
"translocations_only" -> false),
"freec" -> Map(
"exe" -> "test",
"chrFiles" -> "test",
"chrLenFile" -> "test"
)
)
val sample1 = Map(
"samples" -> Map("sample1" -> Map("libraries" -> Map(
"lib1" -> Map(
"R1" -> inputTouch("1_1_R1.fq"),
"R2" -> inputTouch("1_1_R2.fq")
)
)
)))
val sample2 = Map(
"samples" -> Map("sample3" -> Map("libraries" -> Map(
"lib1" -> Map(
"R1" -> inputTouch("2_1_R1.fq"),
"R2" -> inputTouch("2_1_R2.fq")
),
"lib2" -> Map(
"R1" -> inputTouch("2_2_R1.fq"),
"R2" -> inputTouch("2_2_R2.fq")
)
)
)))
}
\ No newline at end of file
......@@ -21,7 +21,7 @@
<parent>
<artifactId>Biopet</artifactId>
<groupId>nl.lumc.sasc</groupId>
<version>0.7.0-SNAPSHOT</version>
<version>0.8.0-SNAPSHOT</version>
<relativePath>../</relativePath>
</parent>
<modelVersion>4.0.0</modelVersion>
......
......@@ -18,7 +18,7 @@ import java.io.{ File, FileInputStream, PrintWriter }
import java.security.MessageDigest
import nl.lumc.sasc.biopet.utils.Logging
import org.broadinstitute.gatk.utils.commandline.{ Gather, Input, Output }
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import org.broadinstitute.gatk.utils.runtime.ProcessSettings
import org.ggf.drmaa.JobTemplate
......@@ -40,6 +40,8 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
def defaultRemoteCommand = "bash"
private val remoteCommand: String = config("remote_command", default = defaultRemoteCommand)
val preCommands: List[String] = config("pre_commands", default = Nil, freeVar = false)
private def changeScript(file: File): Unit = {
val lines = Source.fromFile(file).getLines().toList
val writer = new PrintWriter(file)
......@@ -111,7 +113,7 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
* Checks executable. Follow full CanonicalPath, checks if it is existing and do a md5sum on it to store in job report
*/
protected[core] def preProcessExecutable() {
val exe = BiopetCommandLineFunction.preProcessExecutable(executable)
val exe = BiopetCommandLineFunction.preProcessExecutable(executable, preCommands)
exe.path.foreach(executable = _)
addJobReportBinding("md5sum_exe", exe.md5.getOrElse("N/A"))
}
......@@ -219,7 +221,8 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
*/
override final def commandLine: String = {
preCmdInternal()
val cmd = cmdLine +
val cmd = preCommands.mkString("\n", "\n", "\n") +
cmdLine +
stdinFile.map(file => " < " + required(file.getAbsoluteFile)).getOrElse("") +
stdoutFile.map(file => " > " + required(file.getAbsoluteFile)).getOrElse("")
addJobReportBinding("command", cmd)
......@@ -240,13 +243,18 @@ object BiopetCommandLineFunction extends Logging {
private[core] val executableCache: mutable.Map[String, String] = mutable.Map()
case class Executable(path: Option[String], md5: Option[String])
def preProcessExecutable(executable: String): Executable = {
def preProcessExecutable(executable: String, pre_commands: List[String] = Nil): Executable = {
if (!BiopetCommandLineFunction.executableMd5Cache.contains(executable)) {
if (executable != null) {
if (!BiopetCommandLineFunction.executableCache.contains(executable)) {
try {
val buffer = new StringBuffer()
val cmd = Seq("which", executable)
val tempFile = File.createTempFile("which.", ".sh")
val writer = new PrintWriter(tempFile)
pre_commands.foreach(cmd => writer.println(cmd + " > /dev/null 2> /dev/null"))
writer.println(s"which $executable")
writer.close()
val cmd = Seq("bash", tempFile.getAbsolutePath)
val process = Process(cmd).run(ProcessLogger(buffer.append(_)))
if (process.exitValue == 0) {
val file = new File(buffer.toString)
......
......@@ -24,6 +24,7 @@ trait BiopetJavaCommandLineFunction extends JavaCommandLineFunction with BiopetC
javaGCHeapFreeLimit = config("java_gc_heap_freelimit", default = 10)
javaGCTimeLimit = config("java_gc_timelimit", default = 50)
override def defaultResidentFactor: Double = 1.5
override def defaultVmemFactor: Double = 2.0
/** Constructs java opts, this adds scala threads */
......
......@@ -37,9 +37,10 @@ trait CommandLineResources extends CommandLineFunction with Configurable {
var vmem: Option[String] = config("vmem")
def defaultCoreMemory: Double = 2.0
def defaultVmemFactor: Double = 1.4
def defaultResidentFactor: Double = 1.2
var vmemFactor: Double = config("vmem_factor", default = defaultVmemFactor)
var residentFactor: Double = config("resident_factor", default = 1.2)
var residentFactor: Double = config("resident_factor", default = defaultResidentFactor)
private var _coreMemory: Double = 2.0
def coreMemory = _coreMemory
......
......@@ -21,7 +21,7 @@
<parent>
<artifactId>Biopet</artifactId>
<groupId>nl.lumc.sasc</groupId>
<version>0.7.0-SNAPSHOT</version>
<version>0.8.0-SNAPSHOT</version>
</parent>
<modelVersion>4.0.0</modelVersion>
......
......@@ -30,7 +30,7 @@ class Bgzip(val root: Configurable) extends BiopetCommandLineFunction {
var output: File = null
var f: Boolean = config("f", default = false)
executable = config("exe", default = "bgzip")
executable = config("exe", default = "bgzip", freeVar = false)
def cmdLine = required(executable) +
conditional(f, "-f") +
......
......@@ -275,12 +275,12 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu
else Map()
}
protected val removeOnConflict = Set("Output_file", "Command_line_options", "Run_time", "Start_time", "End_time", "Input_file_(format)", "Novel_/_existing_variants")
protected val nonNumber = Set("VEP_version_(API)", "Cache/Database", "Species")
protected val removeOnConflict = Set("Output_file", "Run_time", "Start_time", "End_time", "Novel_/_existing_variants", "Input_file_(format)")
protected val nonNumber = Set("VEP_version_(API)", "Cache/Database", "Species", "Command_line_options")
override def resolveSummaryConflict(v1: Any, v2: Any, key: String): Any = {
if (removeOnConflict.contains(key)) None
else if (nonNumber.contains(key)) v1
else if (nonNumber.contains(key)) v2
else {
(v1, v2) match {
case (x1: Int, x2: Int) => x1 + x2
......@@ -301,9 +301,10 @@ class VariantEffectPredictor(val root: Configurable) extends BiopetCommandLineFu
(for ((header, headerIndex) <- headers) yield {
val name = header.stripPrefix("[").stripSuffix("]")
name.replaceAll(" ", "_") -> (contents.drop(headerIndex + 1).takeWhile(!isHeader(_)).map { line =>
name.replaceAll(" ", "_") -> (contents.drop(headerIndex + 1).takeWhile(!isHeader(_)).flatMap { line =>
val values = line.split("\t", 2)
values.head.replaceAll(" ", "_") -> tryToParseNumber(values.last).getOrElse(0)
if (values.last.isEmpty || values.last == "-") None
else Some(values.head.replaceAll(" ", "_") -> tryToParseNumber(values.last).getOrElse(values.last))
}.toMap)
}).toMap
}
......
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.bedtools
import java.io.File
......
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.hisat
import java.io.File
......
......@@ -14,16 +14,17 @@
*/
package nl.lumc.sasc.biopet.extensions.igvtools
import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction }
import nl.lumc.sasc.biopet.core.{ BiopetJavaCommandLineFunction, Version }
/**
* General igvtools extension
*
* Created by wyleung on 5-1-15
*/
abstract class IGVTools extends BiopetCommandLineFunction with Version {
executable = config("exe", default = "igvtools", namespace = "igvtools", freeVar = false)
def versionCommand = executable + " version"
abstract class IGVTools extends BiopetJavaCommandLineFunction with Version {
jarFile = config("igvtools_jar", namespace = "igvtools")
def versionCommand = executable + s" -jar ${jarFile.getAbsolutePath} version"
def versionRegex = """IGV Version:? ([\w\.]*) .*""".r
override def versionExitcode = List(0)
}
\ No newline at end of file
......@@ -76,25 +76,23 @@ class IGVToolsCount(val root: Configurable) extends IGVTools {
}
/** Returns command to execute */
def cmdLine = {
required(executable) +
required("count") +
optional("--maxZoom", maxZoom) +
optional("--windowSize", windowSize) +
optional("--extFactor", extFactor) +
optional("--preExtFactor", preExtFactor) +
optional("--postExtFactor", postExtFactor) +
optional("--windowFunctions", windowFunctions) +
optional("--strands", strands) +
conditional(bases, "--bases") +
optional("--query", query) +
optional("--minMapQuality", minMapQuality) +
conditional(includeDuplicates, "--includeDuplicates") +
conditional(pairs, "--pairs") +
required(input) +
required(outputArg) +
required(genomeChromSizes)
}
override def cmdLine = super.cmdLine +
required("count") +
optional("--maxZoom", maxZoom) +
optional("--windowSize", windowSize) +
optional("--extFactor", extFactor) +
optional("--preExtFactor", preExtFactor) +
optional("--postExtFactor", postExtFactor) +
optional("--windowFunctions", windowFunctions) +
optional("--strands", strands) +
conditional(bases, "--bases") +
optional("--query", query) +
optional("--minMapQuality", minMapQuality) +