Commit 5e2ceed3 authored by Peter van 't Hof's avatar Peter van 't Hof Committed by GitHub

Merge pull request #75 from biopet/fix-BIOPET-650

Adding Sambamba as merge step
parents 02292fea b9cb7c04
......@@ -98,7 +98,7 @@ class BastyTest extends TestNGSuite with Matchers {
val numberLibs = (if (sample1) 1 else 0) + (if (sample2) 2 else 0)
val numberSamples = (if (sample1) 1 else 0) + (if (sample2) 1 else 0)
pipeline.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (numberLibs + (if (sample2) 1 else 0))
pipeline.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (numberLibs + numberSamples)
// Gatk preprocess
pipeline.functions.count(_.isInstanceOf[IndelRealigner]) shouldBe (numberLibs * (if (realign) 1 else 0) + (if (sample2 && realign) 1 else 0))
......
......@@ -25,6 +25,6 @@ abstract class Sambamba extends BiopetCommandLineFunction with Version {
executable = config("exe", default = "sambamba", namespace = "sambamba", freeVar = false)
def versionCommand = executable
def versionRegex = """sambamba v(.*)""".r
def versionRegex = """sambamba v?(.*)""".r
override def versionExitcode = List(0, 1)
}
\ No newline at end of file
......@@ -21,7 +21,6 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/** Extension for sambemba markdup */
class SambambaMarkdup(val parent: Configurable) extends Sambamba {
override def defaultThreads = 4
@Input(doc = "Bam File")
var input: File = _
......@@ -32,17 +31,29 @@ class SambambaMarkdup(val parent: Configurable) extends Sambamba {
var removeDuplicates: Boolean = config("remove_duplicates", default = false)
// @doc: compression_level 6 is average, 0 = no compression, 9 = best
val compressionLevel: Option[Int] = config("compression_level", default = 6)
val hashTableSize: Option[Int] = config("hash-table-size", default = 262144)
val overflowListSize: Option[Int] = config("overflow-list-size", default = 200000)
val ioBufferSize: Option[Int] = config("io-buffer-size", default = 128)
val compressionLevel: Option[Int] = config("compression_level")
val hashTableSize: Option[Int] = config("hash-table-size")
val overflowListSize: Option[Int] = config("overflow-list-size")
val ioBufferSize: Option[Int] = config("io-buffer-size")
val showProgress: Boolean = config("show-progress", default = true)
override def defaultThreads = 4
override def defaultCoreMemory = 4.0
@Output
private var indexOutput: File = _
override def beforeGraph(): Unit = {
indexOutput = new File(output + ".bai")
}
/** Returns command to execute */
def cmdLine = required(executable) +
def cmdLine: String = required(executable) +
required("markdup") +
conditional(removeDuplicates, "--remove-duplicates") +
optional("-t", nCoresRequest) +
optional("-l", compressionLevel) +
conditional(showProgress, "--show-progress") +
optional("--hash-table-size=", hashTableSize, spaceSeparated = false) +
optional("--overflow-list-size=", overflowListSize, spaceSeparated = false) +
optional("--io-buffer-size=", ioBufferSize, spaceSeparated = false) +
......@@ -51,10 +62,11 @@ class SambambaMarkdup(val parent: Configurable) extends Sambamba {
}
object SambambaMarkdup {
def apply(root: Configurable, input: File, output: File): SambambaMarkdup = {
def apply(root: Configurable, input: File, output: File, isIntermediate: Boolean = false): SambambaMarkdup = {
val markdup = new SambambaMarkdup(root)
markdup.input = input
markdup.output = output
markdup.isIntermediate = isIntermediate
markdup
}
......
......@@ -21,7 +21,6 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/** Extension for sambemba merge */
class SambambaMerge(val parent: Configurable) extends Sambamba {
override def defaultThreads = 4
@Input(doc = "Bam File[s]")
var input: List[File] = Nil
......@@ -30,13 +29,29 @@ class SambambaMerge(val parent: Configurable) extends Sambamba {
var output: File = _
// @doc: compression_level 6 is average, 0 = no compression, 9 = best
val compressionLevel: Option[Int] = config("compression_level", default = 6)
val compressionLevel: Option[Int] = config("compression_level")
val header: Boolean = config("header", default = false)
val showProgress: Boolean = config("show-progress", default = true)
val filter: Option[String] = config("filter")
override def defaultThreads = 4
override def defaultCoreMemory = 4.0
@Output
private var indexOutput: File = _
override def beforeGraph(): Unit = {
indexOutput = new File(output + ".bai")
}
/** Returns command to execute */
def cmdLine = required(executable) +
def cmdLine: String = required(executable) +
required("merge") +
optional("-t", nCoresRequest) +
optional("-l", compressionLevel) +
optional("-F", filter) +
conditional(header, "--header") +
conditional(showProgress, "--show-progress") +
required(output) +
repeat("", input)
repeat(input)
}
......@@ -19,7 +19,7 @@ import java.io.File
import htsjdk.samtools.SamReaderFactory
import htsjdk.samtools.reference.FastaSequenceFile
import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension
import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference, MultiSampleQScript }
import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, PipelineCommand, Reference }
import nl.lumc.sasc.biopet.extensions.Ln
import nl.lumc.sasc.biopet.extensions.picard._
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
......@@ -28,8 +28,8 @@ import nl.lumc.sasc.biopet.pipelines.gears.GearsSingle
import nl.lumc.sasc.biopet.utils.Logging
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
import MultisampleMapping.MergeStrategy
import nl.lumc.sasc.biopet.extensions.sambamba.{ SambambaMarkdup, SambambaMerge }
import scala.collection.JavaConversions._
......@@ -125,7 +125,7 @@ trait MultisampleMappingTrait extends MultiSampleQScript
} else None
def bamFile: Option[File] = mapping match {
case Some(m) => Some(m.finalBamFile)
case Some(m) => Some(m.mergedBamFile)
case _ if inputBam.isDefined => Some(new File(libDir, s"$sampleId-$libId.bam"))
case _ => None
}
......@@ -247,9 +247,9 @@ trait MultisampleMappingTrait extends MultiSampleQScript
mergeStrategy match {
case MergeStrategy.None =>
case (MergeStrategy.MergeSam | MergeStrategy.MarkDuplicates) if libraries.flatMap(_._2.bamFile).size == 1 =>
case (MergeStrategy.MergeSam) if libraries.flatMap(_._2.bamFile).size == 1 =>
add(Ln.linkBamFile(qscript, libraries.flatMap(_._2.bamFile).head, bamFile.get): _*)
case (MergeStrategy.PreProcessMergeSam | MergeStrategy.PreProcessMarkDuplicates) if libraries.flatMap(_._2.preProcessBam).size == 1 =>
case (MergeStrategy.PreProcessMergeSam) if libraries.flatMap(_._2.preProcessBam).size == 1 =>
add(Ln.linkBamFile(qscript, libraries.flatMap(_._2.preProcessBam).head, bamFile.get): _*)
case MergeStrategy.MergeSam =>
add(MergeSamFiles(qscript, libraries.flatMap(_._2.bamFile).toList, bamFile.get, isIntermediate = !keepMergedFiles))
......@@ -259,6 +259,20 @@ trait MultisampleMappingTrait extends MultiSampleQScript
add(MarkDuplicates(qscript, libraries.flatMap(_._2.bamFile).toList, bamFile.get, isIntermediate = !keepMergedFiles))
case MergeStrategy.PreProcessMarkDuplicates =>
add(MarkDuplicates(qscript, libraries.flatMap(_._2.preProcessBam).toList, bamFile.get, isIntermediate = !keepMergedFiles))
case MergeStrategy.PreProcessSambambaMarkdup =>
val mergedBam = if (libraries.flatMap(_._2.bamFile).size == 1) {
add(Ln.linkBamFile(qscript, libraries.flatMap(_._2.preProcessBam).head, new File(sampleDir, "merged.bam")): _*)
libraries.flatMap(_._2.preProcessBam).head
} else {
val merge = new SambambaMerge(qscript)
merge.input = libraries.flatMap(_._2.preProcessBam).toList
merge.output = new File(sampleDir, "merged.bam")
merge.isIntermediate = true
add(merge)
merge.output
}
add(SambambaMarkdup(qscript, mergedBam, bamFile.get, isIntermediate = !keepMergedFiles))
add(Ln(qscript, bamFile.get + ".bai", bamFile.get.getAbsolutePath.stripSuffix(".bam") + ".bai"))
case _ => throw new IllegalStateException("This should not be possible, unimplemented MergeStrategy?")
}
......@@ -301,7 +315,7 @@ class MultisampleMapping(val parent: Configurable) extends QScript with Multisam
object MultisampleMapping extends PipelineCommand {
object MergeStrategy extends Enumeration {
val None, MergeSam, MarkDuplicates, PreProcessMergeSam, PreProcessMarkDuplicates = Value
val None, MergeSam, MarkDuplicates, PreProcessMergeSam, PreProcessMarkDuplicates, PreProcessSambambaMarkdup = Value
}
/** When file is not absolute an error is raise att the end of the script of a pipeline */
......
......@@ -20,6 +20,7 @@ import com.google.common.io.Files
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.extensions.centrifuge.Centrifuge
import nl.lumc.sasc.biopet.extensions.picard.{ MarkDuplicates, MergeSamFiles }
import nl.lumc.sasc.biopet.extensions.sambamba.SambambaMarkdup
import nl.lumc.sasc.biopet.utils.{ ConfigUtils, Logging }
import nl.lumc.sasc.biopet.utils.config.Config
import org.apache.commons.io.FileUtils
......@@ -91,16 +92,22 @@ trait MultisampleMappingTestTrait extends TestNGSuite with Matchers {
pipeline.script()
val numberFastqLibs = (if (sample1) 1 else 0) + (if (sample2) 2 else 0) + (if (sample3 && bamToFastq) 1 else 0) + (if (sample4 && bamToFastq) 1 else 0)
val numberSamples = (if (sample1) 1 else 0) + (if (sample2) 1 else 0)
val numberSamples = (if (sample1) 1 else 0) + (if (sample2) 1 else 0) + (if (sample3) 1 else 0) + (if (sample4) 1 else 0)
val pipesJobs = pipeline.functions.filter(_.isInstanceOf[BiopetCommandLineFunction])
.flatMap(_.asInstanceOf[BiopetCommandLineFunction].pipesJobs)
if (merge == MultisampleMapping.MergeStrategy.PreProcessMarkDuplicates) {
""
}
import MultisampleMapping.MergeStrategy
pipeline.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (numberFastqLibs +
(if (sample2 && (merge == MergeStrategy.MarkDuplicates || merge == MergeStrategy.PreProcessMarkDuplicates)) 1 else 0))
(if (merge == MergeStrategy.MarkDuplicates || merge == MergeStrategy.PreProcessMarkDuplicates) numberSamples else 0))
pipeline.functions.count(_.isInstanceOf[MergeSamFiles]) shouldBe (
(if (sample2 && (merge == MergeStrategy.MergeSam || merge == MergeStrategy.PreProcessMergeSam)) 1 else 0))
pipeline.functions.count(_.isInstanceOf[SambambaMarkdup]) shouldBe
(if (merge == MergeStrategy.PreProcessSambambaMarkdup) numberSamples else 0)
pipeline.samples.foreach {
case (sampleName, sample) =>
if (merge == MergeStrategy.None) sample.bamFile shouldBe None
......@@ -211,6 +218,7 @@ object MultisampleMappingTestTrait {
"sickle" -> Map("exe" -> "test"),
"cutadapt" -> Map("exe" -> "test"),
"bwa" -> Map("exe" -> "test"),
"sambamba" -> Map("exe" -> "test"),
"samtools" -> Map("exe" -> "test"),
"igvtools" -> Map("exe" -> "test", "igvtools_jar" -> "test"),
"wigtobigwig" -> Map("exe" -> "test"),
......@@ -232,7 +240,7 @@ object MultisampleMappingTestTrait {
)))
val sample2 = Map(
"samples" -> Map("sample3" -> Map("libraries" -> Map(
"samples" -> Map("sample2" -> Map("libraries" -> Map(
"lib1" -> Map(
"R1" -> inputTouch("2_1_R1.fq"),
"R2" -> inputTouch("2_1_R2.fq")
......
......@@ -99,7 +99,7 @@ trait ShivaTestTrait extends TestNGSuite with Matchers {
val numberLibs = (if (sample1) 1 else 0) + (if (sample2) 2 else 0)
val numberSamples = (if (sample1) 1 else 0) + (if (sample2) 1 else 0)
pipeline.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (numberLibs + (if (sample2) 1 else 0))
pipeline.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (numberLibs + numberSamples)
// Gatk preprocess
pipeline.functions.count(_.isInstanceOf[IndelRealigner]) shouldBe (numberLibs * (if (realign) 1 else 0) + (if (sample2 && realign) 1 else 0))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment