Skip to content
Snippets Groups Projects
Commit 755c54b0 authored by Sander Bollen's avatar Sander Bollen
Browse files

Merge branch 'fix-chunking' into 'develop'

Fix chunking

Chunking was broken in develop, this should fix it

See merge request !250
parents 98bc93d9 df83160f
No related branches found
No related tags found
No related merge requests found
......@@ -85,9 +85,11 @@ trait CommandLineResources extends CommandLineFunction with Configurable {
this.freeze()
}
var threadsCorrection = 0
protected def combineResources(commands: List[CommandLineResources]): Unit = {
commands.foreach(_.setResources())
nCoresRequest = Some(commands.map(_.threads).sum)
nCoresRequest = Some(commands.map(_.threads).sum + threadsCorrection)
_coreMemory = commands.map(cmd => cmd.coreMemeory * (cmd.threads.toDouble / threads.toDouble)).sum
memoryLimit = Some(_coreMemory * threads)
......
......@@ -36,7 +36,7 @@ class Zcat(val root: Configurable) extends BiopetCommandLineFunction {
/** Returns command to execute */
def cmdLine = required(executable) +
(if (inputAsStdin) "" else required(input)) +
(if (inputAsStdin) "" else repeat(input)) +
(if (outputAsStsout) "" else " > " + required(output))
}
......
......@@ -39,10 +39,10 @@ trait Configurable extends ImplicitConversions {
def defaults: Map[String, Any] = Map()
/** This method merge defaults from the root to it's own */
protected def internalDefaults: Map[String, Any] = {
protected[config] def internalDefaults: Map[String, Any] = {
(root != null, defaults.isEmpty) match {
case (true, true) => root.defaults
case (true, false) => ConfigUtils.mergeMaps(defaults, root.defaults)
case (true, true) => root.internalDefaults
case (true, false) => ConfigUtils.mergeMaps(defaults, root.internalDefaults)
case (false, true) => globalConfig.defaults
case (false, false) => ConfigUtils.mergeMaps(defaults, globalConfig.defaults)
}
......
......@@ -153,29 +153,31 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with
else None
/** Adds all chunkable jobs of flexiprep */
def runTrimClip(R1_in: File, outDir: File, chunk: String): (File, Option[File], List[File]) =
def runTrimClip(R1_in: File, outDir: File, chunk: String): (File, Option[File]) =
runTrimClip(R1_in, None, outDir, chunk)
/** Adds all chunkable jobs of flexiprep */
def runTrimClip(R1_in: File, outDir: File): (File, Option[File], List[File]) =
def runTrimClip(R1_in: File, outDir: File): (File, Option[File]) =
runTrimClip(R1_in, None, outDir, "")
/** Adds all chunkable jobs of flexiprep */
def runTrimClip(R1_in: File, R2_in: Option[File], outDir: File): (File, Option[File], List[File]) =
def runTrimClip(R1_in: File, R2_in: Option[File], outDir: File): (File, Option[File]) =
runTrimClip(R1_in, R2_in, outDir, "")
/** Adds all chunkable jobs of flexiprep */
def runTrimClip(R1_in: File, R2_in: Option[File], outDir: File, chunkarg: String): (File, Option[File], List[File]) = {
def runTrimClip(R1_in: File,
R2_in: Option[File],
outDir: File,
chunkarg: String): (File, Option[File]) = {
val chunk = if (chunkarg.isEmpty || chunkarg.endsWith("_")) chunkarg else chunkarg + "_"
var R1 = R1_in
var R2 = R2_in
def deps: List[File] = Nil
val qcCmdR1 = new QcCommand(this, fastqc_R1)
qcCmdR1.input = R1_in
qcCmdR1.read = "R1"
qcCmdR1.output = if (paired) new File(fastqR1Qc.getAbsolutePath.stripSuffix(".gz"))
qcCmdR1.output = if (paired) new File(outDir, fastqR1Qc.getName.stripSuffix(".gz"))
else fastqR1Qc
qcCmdR1.deps :+= fastqc_R1.output
qcCmdR1.isIntermediate = paired || !keepQcFastqFiles
......@@ -184,7 +186,7 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with
if (paired) {
val qcCmdR2 = new QcCommand(this, fastqc_R2)
qcCmdR2.input = R2_in.get
qcCmdR2.output = new File(fastqR2Qc.get.getAbsolutePath.stripSuffix(".gz"))
qcCmdR2.output = new File(outDir, fastqR2Qc.get.getName.stripSuffix(".gz"))
qcCmdR2.read = "R2"
addSummarizable(qcCmdR2, "qc_command_R2")
......@@ -195,8 +197,8 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with
fqSync.refFastq = R1_in
fqSync.inputFastq1 = qcCmdR1.output
fqSync.inputFastq2 = qcCmdR2.output
fqSync.outputFastq1 = fastqR1Qc
fqSync.outputFastq2 = fastqR2Qc.get
fqSync.outputFastq1 = new File(outDir, fastqR1Qc.getName)
fqSync.outputFastq2 = new File(outDir, fastqR2Qc.get.getName)
fqSync.outputStats = new File(outDir, s"${sampleId.getOrElse("x")}-${libId.getOrElse("x")}.sync.stats")
val pipe = new BiopetFifoPipe(this, fqSync :: Nil) {
......@@ -242,7 +244,7 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with
outputFiles += (chunk + "output_R1" -> R1)
if (paired) outputFiles += (chunk + "output_R2" -> R2.get)
(R1, R2, deps)
(R1, R2)
}
/** Adds last non chunkable jobs */
......@@ -251,8 +253,14 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with
throw new IllegalStateException("R1 and R2 file number is not the same")
if (fastq_R1.length > 1) {
add(Zcat(this, fastq_R1, fastqR1Qc) | new Gzip(this) > fastqR1Qc)
if (paired) add(Zcat(this, fastq_R2, fastqR2Qc.get) | new Gzip(this) > fastqR2Qc.get)
val zcat = new Zcat(this)
zcat.input = fastq_R1
add(zcat | new Gzip(this) > fastqR1Qc)
if (paired) {
val zcat = new Zcat(this)
zcat.input = fastq_R2
add(zcat | new Gzip(this) > fastqR2Qc.get)
}
}
outputFiles += ("output_R1_gzip" -> fastqR1Qc)
......
......@@ -118,8 +118,6 @@ class GentrapTest extends TestNGSuite with Matchers {
val functions = gentrap.functions.groupBy(_.getClass)
val numSamples = sampleConfig("samples").size
functions(classOf[Gsnap]).size should be >= 1
if (expMeasures.contains("fragments_per_gene")) {
gentrap.functions
.collect { case x: HtseqCount => x.output.toString.endsWith(".fragments_per_gene") }.size shouldBe numSamples
......
......@@ -116,7 +116,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
"skip_markduplicates" -> skipMarkduplicates,
"aligner" -> aligner,
"chunking" -> chunking,
"numberChunks" -> numberChunks.getOrElse(1)
"numberChunks" -> (if (chunking) numberChunks.getOrElse(1) else None)
) ++ (if (root == null) Map("reference" -> referenceSummary) else Map())
override def reportClass = {
......@@ -204,13 +204,11 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
for ((chunkDir, fastqfile) <- chunks) {
var R1 = fastqfile._1
var R2 = fastqfile._2
var deps: List[File] = Nil
if (!skipFlexiprep) {
val flexiout = flexiprep.runTrimClip(R1, R2, new File(chunkDir, "flexiprep"), chunkDir)
logger.debug(chunkDir + " - " + flexiout)
R1 = flexiout._1
if (paired) R2 = flexiout._2
deps = flexiout._3
fastq_R1_output :+= R1
R2.foreach(R2 => fastq_R2_output :+= R2)
}
......@@ -218,15 +216,15 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
val outputBam = new File(chunkDir, outputName + ".bam")
bamFiles :+= outputBam
aligner match {
case "bwa-mem" => addBwaMem(R1, R2, outputBam, deps)
case "bwa-aln" => addBwaAln(R1, R2, outputBam, deps)
case "bowtie" => addBowtie(R1, R2, outputBam, deps)
case "gsnap" => addGsnap(R1, R2, outputBam, deps)
case "bwa-mem" => addBwaMem(R1, R2, outputBam)
case "bwa-aln" => addBwaAln(R1, R2, outputBam)
case "bowtie" => addBowtie(R1, R2, outputBam)
case "gsnap" => addGsnap(R1, R2, outputBam)
// TODO: make TopHat here accept multiple input files
case "tophat" => addTophat(R1, R2, outputBam, deps)
case "stampy" => addStampy(R1, R2, outputBam, deps)
case "star" => addStar(R1, R2, outputBam, deps)
case "star-2pass" => addStar2pass(R1, R2, outputBam, deps)
case "tophat" => addTophat(R1, R2, outputBam)
case "stampy" => addStampy(R1, R2, outputBam)
case "star" => addStar(R1, R2, outputBam)
case "star-2pass" => addStar2pass(R1, R2, outputBam)
case _ => throw new IllegalStateException("Option aligner: '" + aligner + "' is not valid")
}
if (chunking && numberChunks.getOrElse(1) > 1 && config("chunk_metrics", default = false))
......@@ -267,10 +265,9 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
}
/** Add bwa aln jobs */
def addBwaAln(R1: File, R2: Option[File], output: File, deps: List[File]): File = {
def addBwaAln(R1: File, R2: Option[File], output: File): File = {
val bwaAlnR1 = new BwaAln(this)
bwaAlnR1.fastq = R1
bwaAlnR1.deps = deps
bwaAlnR1.output = swapExt(output.getParent, output, ".bam", ".R1.sai")
bwaAlnR1.isIntermediate = true
add(bwaAlnR1)
......@@ -278,7 +275,6 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
val samFile: File = if (paired) {
val bwaAlnR2 = new BwaAln(this)
bwaAlnR2.fastq = R2.get
bwaAlnR2.deps = deps
bwaAlnR2.output = swapExt(output.getParent, output, ".bam", ".R2.sai")
bwaAlnR2.isIntermediate = true
add(bwaAlnR2)
......@@ -313,41 +309,47 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
}
/** Adds bwa mem jobs */
def addBwaMem(R1: File, R2: Option[File], output: File, deps: List[File]): File = {
def addBwaMem(R1: File, R2: Option[File], output: File): File = {
val bwaCommand = new BwaMem(this)
bwaCommand.R1 = R1
if (paired) bwaCommand.R2 = R2.get
bwaCommand.deps = deps
bwaCommand.R = Some(getReadGroupBwa)
val sortSam = new SortSam(this)
sortSam.output = output
add(bwaCommand | sortSam, chunking || !skipMarkduplicates)
val pipe = bwaCommand | sortSam
pipe.isIntermediate = chunking || !skipMarkduplicates
pipe.threadsCorrection = -1
add(pipe)
output
}
def addGsnap(R1: File, R2: Option[File], output: File, deps: List[File]): File = {
def addGsnap(R1: File, R2: Option[File], output: File): File = {
val zcatR1 = extractIfNeeded(R1, output.getParentFile)
val zcatR2 = if (paired) Some(extractIfNeeded(R2.get, output.getParentFile)) else None
val gsnapCommand = new Gsnap(this)
gsnapCommand.input = if (paired) List(R1, R2.get) else List(R1)
gsnapCommand.deps = deps
gsnapCommand.output = swapExt(output.getParent, output, ".bam", ".sam")
gsnapCommand.isIntermediate = true
add(gsnapCommand)
gsnapCommand.input = if (paired) List(zcatR1._2, zcatR2.get._2) else List(zcatR1._2)
gsnapCommand.output = swapExt(output.getParentFile, output, ".bam", ".sam")
val reorderSam = new ReorderSam(this)
reorderSam.input = gsnapCommand.output
reorderSam.output = swapExt(output.getParent, output, ".sorted.bam", ".reordered.bam")
add(reorderSam)
addAddOrReplaceReadGroups(reorderSam.output, output)
reorderSam.output = swapExt(output.getParentFile, output, ".sorted.bam", ".reordered.bam")
val ar = addAddOrReplaceReadGroups(reorderSam.output, output)
val pipe = new BiopetFifoPipe(this, (zcatR1._1 :: (if (paired) zcatR2.get._1 else None) ::
Some(gsnapCommand) :: Some(ar._1) :: Some(reorderSam) :: Nil).flatten)
pipe.threadsCorrection = -1
zcatR1._1.foreach(x => pipe.threadsCorrection -= 1)
zcatR2.foreach(_._1.foreach(x => pipe.threadsCorrection -= 1))
add(pipe)
ar._2
}
def addTophat(R1: File, R2: Option[File], output: File, deps: List[File]): File = {
def addTophat(R1: File, R2: Option[File], output: File): File = {
// TODO: merge mapped and unmapped BAM ~ also dealing with validation errors in the unmapped BAM
val tophat = new Tophat(this)
tophat.R1 = tophat.R1 :+ R1
if (paired) tophat.R2 = tophat.R2 :+ R2.get
tophat.output_dir = new File(outputDir, "tophat_out")
tophat.deps = deps
// always output BAM
tophat.no_convert_bam = false
// and always keep input ordering
......@@ -381,10 +383,12 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
reorderSam.output = swapExt(output.getParent, output, ".merge.bam", ".reordered.bam")
add(reorderSam)
addAddOrReplaceReadGroups(reorderSam.output, output)
val ar = addAddOrReplaceReadGroups(reorderSam.output, output)
add(ar._1)
ar._2
}
/** Adds stampy jobs */
def addStampy(R1: File, R2: Option[File], output: File, deps: List[File]): File = {
def addStampy(R1: File, R2: Option[File], output: File): File = {
var RG: String = "ID:" + readgroupId + ","
RG += "SM:" + sampleId.get + ","
......@@ -399,10 +403,9 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
val stampyCmd = new Stampy(this)
stampyCmd.R1 = R1
if (paired) stampyCmd.R2 = R2.get
stampyCmd.deps = deps
stampyCmd.readgroup = RG
stampyCmd.sanger = true
stampyCmd.output = this.swapExt(output.getParent, output, ".bam", ".sam")
stampyCmd.output = this.swapExt(output.getParentFile, output, ".bam", ".sam")
stampyCmd.isIntermediate = true
add(stampyCmd)
val sortSam = SortSam(this, stampyCmd.output, output)
......@@ -412,33 +415,54 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
}
/** Adds bowtie jobs */
def addBowtie(R1: File, R2: Option[File], output: File, deps: List[File]): File = {
def addBowtie(R1: File, R2: Option[File], output: File): File = {
val zcatR1 = extractIfNeeded(R1, output.getParentFile)
val zcatR2 = if (paired) Some(extractIfNeeded(R2.get, output.getParentFile)) else None
zcatR1._1.foreach(add(_))
zcatR2.foreach(_._1.foreach(add(_)))
val bowtie = new Bowtie(this)
bowtie.R1 = R1
if (paired) bowtie.R2 = R2
bowtie.deps = deps
bowtie.output = this.swapExt(output.getParent, output, ".bam", ".sam")
bowtie.R1 = zcatR1._2
if (paired) bowtie.R2 = Some(zcatR2.get._2)
bowtie.output = this.swapExt(output.getParentFile, output, ".bam", ".sam")
bowtie.isIntermediate = true
add(bowtie)
addAddOrReplaceReadGroups(bowtie.output, output)
val ar = addAddOrReplaceReadGroups(bowtie.output, output)
val pipe = new BiopetFifoPipe(this, (Some(bowtie) :: Some(ar._1) :: Nil).flatten)
pipe.threadsCorrection = -1
add(pipe)
ar._2
}
/** Adds Star jobs */
def addStar(R1: File, R2: Option[File], output: File, deps: List[File]): File = {
val starCommand = Star(this, R1, R2, outputDir, isIntermediate = true, deps = deps)
add(starCommand)
addAddOrReplaceReadGroups(starCommand.outputSam, output)
def addStar(R1: File, R2: Option[File], output: File): File = {
val zcatR1 = extractIfNeeded(R1, output.getParentFile)
val zcatR2 = if (paired) Some(extractIfNeeded(R2.get, output.getParentFile)) else None
val starCommand = Star(this, zcatR1._2, zcatR2.map(_._2), outputDir, isIntermediate = true)
val ar = addAddOrReplaceReadGroups(starCommand.outputSam, output)
val pipe = new BiopetFifoPipe(this, (zcatR1._1 :: (if (paired) zcatR2.get._1 else None) ::
Some(starCommand) :: Some(ar._1) :: Nil).flatten)
pipe.threadsCorrection = -1
zcatR1._1.foreach(x => pipe.threadsCorrection -= 1)
zcatR2.foreach(_._1.foreach(x => pipe.threadsCorrection -= 1))
add(pipe)
ar._2
}
/** Adds Start 2 pass jobs */
def addStar2pass(R1: File, R2: Option[File], output: File, deps: List[File]): File = {
val starCommand = Star._2pass(this, R1, R2, outputDir, isIntermediate = true, deps = deps)
def addStar2pass(R1: File, R2: Option[File], output: File): File = {
val zcatR1 = extractIfNeeded(R1, output.getParentFile)
val zcatR2 = if (paired) Some(extractIfNeeded(R2.get, output.getParentFile)) else None
zcatR1._1.foreach(add(_))
zcatR2.foreach(_._1.foreach(add(_)))
val starCommand = Star._2pass(this, zcatR1._2, zcatR2.map(_._2), outputDir, isIntermediate = true)
addAll(starCommand._2)
addAddOrReplaceReadGroups(starCommand._1, output)
val ar = addAddOrReplaceReadGroups(starCommand._1, output)
add(ar._1)
ar._2
}
/** Adds AddOrReplaceReadGroups */
def addAddOrReplaceReadGroups(input: File, output: File): File = {
def addAddOrReplaceReadGroups(input: File, output: File): (AddOrReplaceReadGroups, File) = {
val addOrReplaceReadGroups = AddOrReplaceReadGroups(this, input, output)
addOrReplaceReadGroups.createIndex = true
......@@ -450,9 +474,8 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
if (readgroupSequencingCenter.isDefined) addOrReplaceReadGroups.RGCN = readgroupSequencingCenter.get
if (readgroupDescription.isDefined) addOrReplaceReadGroups.RGDS = readgroupDescription.get
if (!skipMarkduplicates) addOrReplaceReadGroups.isIntermediate = true
add(addOrReplaceReadGroups)
addOrReplaceReadGroups.output
(addOrReplaceReadGroups, addOrReplaceReadGroups.output)
}
/** Returns readgroup for bwa */
......@@ -477,22 +500,18 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
* @param runDir directory to extract when needed
* @return returns extracted file
*/
def extractIfNeeded(file: File, runDir: File): File = {
if (file == null) file
else if (file.getName.endsWith(".gz") || file.getName.endsWith(".gzip")) {
def extractIfNeeded(file: File, runDir: File): (Option[BiopetCommandLineFunction], File) = {
require(file != null)
if (file.getName.endsWith(".gz") || file.getName.endsWith(".gzip")) {
var newFile: File = swapExt(runDir, file, ".gz", "")
if (file.getName.endsWith(".gzip")) newFile = swapExt(runDir, file, ".gzip", "")
val zcatCommand = Zcat(this, file, newFile)
zcatCommand.isIntermediate = true
add(zcatCommand)
newFile
(Some(zcatCommand), newFile)
} else if (file.getName.endsWith(".bz2")) {
val newFile = swapExt(runDir, file, ".bz2", "")
val pbzip2 = Pbzip2(this, file, newFile)
pbzip2.isIntermediate = true
add(pbzip2)
newFile
} else file
(Some(pbzip2), newFile)
} else (None, file)
}
}
......
......@@ -91,28 +91,6 @@ class MappingTest extends TestNGSuite with Matchers {
//Flexiprep
mapping.functions.count(_.isInstanceOf[Fastqc]) shouldBe (if (skipFlexiprep) 0 else if (paired) 4 else 2)
//aligners
mapping.functions.count(_.isInstanceOf[BwaAln]) shouldBe ((if (aligner == "bwa-aln") if (paired) 2 else 1 else 0) * chunks)
mapping.functions.count(_.isInstanceOf[BwaSampe]) shouldBe ((if (aligner == "bwa-aln") if (paired) 1 else 0 else 0) * chunks)
mapping.functions.count(_.isInstanceOf[BwaSamse]) shouldBe ((if (aligner == "bwa-aln") if (paired) 0 else 1 else 0) * chunks)
mapping.functions.count(_.isInstanceOf[Star]) shouldBe ((if (aligner == "star") 1 else if (aligner == "star-2pass") 3 else 0) * chunks)
mapping.functions.count(_.isInstanceOf[Bowtie]) shouldBe ((if (aligner == "bowtie") 1 else 0) * chunks)
mapping.functions.count(_.isInstanceOf[Stampy]) shouldBe ((if (aligner == "stampy") 1 else 0) * chunks)
// Sort sam or replace readgroup
val sort = aligner match {
case "bwa-mem" | "bwa-aln" | "stampy" => "sortsam"
case "star" | "star-2pass" | "bowtie" | "gsnap" | "tophat" => "replacereadgroups"
case _ => throw new IllegalArgumentException("aligner: " + aligner + " does not exist")
}
if (aligner != "tophat") { // FIXME
//mapping.functions.count(_.isInstanceOf[SortSam]) shouldBe ((if (sort == "sortsam") 1 else 0) * chunks)
mapping.functions.count(_.isInstanceOf[AddOrReplaceReadGroups]) shouldBe ((if (sort == "replacereadgroups") 1 else 0) * chunks)
mapping.functions.count(_.isInstanceOf[MergeSamFiles]) shouldBe (if (skipMarkDuplicate && chunks > 1) 1 else 0)
mapping.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (if (skipMarkDuplicate) 0 else 1)
}
}
// remove temporary run directory all tests in the class have been run
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment