Skip to content
Snippets Groups Projects
Commit bec375ba authored by bow's avatar bow
Browse files

Merge branch 'feature-unittest_core' into 'develop'

Flexiprep unit test + fix for small bugs

See subject

See merge request !94
parents faa0baaf d04c7f03
No related branches found
No related tags found
No related merge requests found
package nl.lumc.sasc.biopet.pipelines.mapping
import java.io.File
import nl.lumc.sasc.biopet.core.config.Config
import nl.lumc.sasc.biopet.extensions.bwa.{ BwaSamse, BwaSampe, BwaAln, BwaMem }
import nl.lumc.sasc.biopet.extensions.picard.{ MergeSamFiles, AddOrReplaceReadGroups, MarkDuplicates, SortSam }
import nl.lumc.sasc.biopet.extensions._
import nl.lumc.sasc.biopet.pipelines.flexiprep.Cutadapt
import nl.lumc.sasc.biopet.pipelines.flexiprep.Fastqc
import nl.lumc.sasc.biopet.pipelines.flexiprep.Sickle
import nl.lumc.sasc.biopet.pipelines.flexiprep._
import nl.lumc.sasc.biopet.tools.FastqSync
import nl.lumc.sasc.biopet.utils.ConfigUtils
import org.broadinstitute.gatk.queue.QSettings
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.{ DataProvider, Test }
/**
* Created by pjvan_thof on 2/12/15.
*/
class MappingTest extends TestNGSuite with Matchers {
def initPipeline(map: Map[String, Any]): Mapping = {
new Mapping {
override def configName = "mapping"
override def globalConfig = new Config(map)
qSettings = new QSettings
qSettings.runName = "test"
}
}
@DataProvider(name = "mappingOptions", parallel = true)
def mappingOptions = {
val aligners = Array("bwa", "bwa-aln", "star", "star-2pass", "bowtie", "stampy")
val paired = Array(true, false)
val chunks = Array(1, 5, 10, 100)
val skipMarkDuplicates = Array(true, false)
val skipFlexipreps = Array(true, false)
for (
aligner <- aligners;
pair <- paired;
chunk <- chunks;
skipMarkDuplicate <- skipMarkDuplicates;
skipFlexiprep <- skipFlexipreps
) yield Array(aligner, pair, chunk, skipMarkDuplicate, skipFlexiprep)
}
@Test(dataProvider = "mappingOptions")
def testMapping(aligner: String, paired: Boolean, chunks: Int, skipMarkDuplicate: Boolean, skipFlexiprep: Boolean) = {
val map = ConfigUtils.mergeMaps(Map("output_dir" -> MappingTest.outputDir,
"aligner" -> aligner,
"number_chunks" -> chunks,
"skip_markduplicates" -> skipMarkDuplicate,
"skip_flexiprep" -> skipFlexiprep
), Map(MappingTest.excutables.toSeq: _*))
val mapping: Mapping = initPipeline(map)
mapping.input_R1 = new File(mapping.outputDir, "bla_R1.fq")
if (paired) mapping.input_R2 = Some(new File(mapping.outputDir, "bla_R2.fq"))
mapping.sampleId = "1"
mapping.libId = "1"
mapping.script()
//Flexiprep
mapping.functions.count(_.isInstanceOf[Fastqc]) shouldBe (if (skipFlexiprep) 0 else if (paired) 4 else 2)
mapping.functions.count(_.isInstanceOf[Zcat]) shouldBe 0
mapping.functions.count(_.isInstanceOf[SeqtkSeq]) shouldBe ((if (skipFlexiprep) 0 else if (paired) 2 else 1) * chunks)
mapping.functions.count(_.isInstanceOf[Cutadapt]) shouldBe ((if (skipFlexiprep) 0 else if (paired) 2 else 1) * chunks)
mapping.functions.count(_.isInstanceOf[FastqSync]) shouldBe ((if (skipFlexiprep) 0 else if (paired && !skipFlexiprep) 1 else 0) * chunks)
mapping.functions.count(_.isInstanceOf[Sickle]) shouldBe ((if (skipFlexiprep) 0 else 1) * chunks)
mapping.functions.count(_.isInstanceOf[Gzip]) shouldBe (if (skipFlexiprep) 0 else if (paired) 2 else 1)
//aligners
mapping.functions.count(_.isInstanceOf[BwaMem]) shouldBe ((if (aligner == "bwa") 1 else 0) * chunks)
mapping.functions.count(_.isInstanceOf[BwaAln]) shouldBe ((if (aligner == "bwa-aln") (if (paired) 2 else 1) else 0) * chunks)
mapping.functions.count(_.isInstanceOf[BwaSampe]) shouldBe ((if (aligner == "bwa-aln") (if (paired) 1 else 0) else 0) * chunks)
mapping.functions.count(_.isInstanceOf[BwaSamse]) shouldBe ((if (aligner == "bwa-aln") (if (paired) 0 else 1) else 0) * chunks)
mapping.functions.count(_.isInstanceOf[Star]) shouldBe ((if (aligner == "star") 1 else if (aligner == "star-2pass") 3 else 0) * chunks)
mapping.functions.count(_.isInstanceOf[Bowtie]) shouldBe ((if (aligner == "bowtie") 1 else 0) * chunks)
mapping.functions.count(_.isInstanceOf[Stampy]) shouldBe ((if (aligner == "stampy") 1 else 0) * chunks)
// Sort sam or replace readgroup
val sort = aligner match {
case "bwa" | "bwa-aln" | "stampy" => "sortsam"
case "star" | "star-2pass" | "bowtie" => "replacereadgroups"
case _ => throw new IllegalArgumentException("aligner: " + aligner + " does not exist")
}
mapping.functions.count(_.isInstanceOf[SortSam]) shouldBe ((if (sort == "sortsam") 1 else 0) * chunks)
mapping.functions.count(_.isInstanceOf[AddOrReplaceReadGroups]) shouldBe ((if (sort == "replacereadgroups") 1 else 0) * chunks)
mapping.functions.count(_.isInstanceOf[MergeSamFiles]) shouldBe (if (skipMarkDuplicate && chunks > 1) 1 else 0)
mapping.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (if (skipMarkDuplicate) 0 else 1)
}
}
object MappingTest {
val outputDir = System.getProperty("java.io.tmpdir") + File.separator + "flexiprep"
val excutables = Map(
"reference" -> "test",
"seqstat" -> Map("exe" -> "test"),
"fastqc" -> Map("exe" -> "test"),
"seqtk" -> Map("exe" -> "test"),
"sickle" -> Map("exe" -> "test"),
"bwa" -> Map("exe" -> "test"),
"star" -> Map("exe" -> "test"),
"bowtie" -> Map("exe" -> "test"),
"stampy" -> Map("exe" -> "test", "genome" -> "test", "hash" -> "test"),
"samtools" -> Map("exe" -> "test")
)
}
\ No newline at end of file
......@@ -15,6 +15,8 @@
*/
package nl.lumc.sasc.biopet.pipelines.sage
import java.io.File
import nl.lumc.sasc.biopet.core.{ BiopetQScript, MultiSampleQScript, PipelineCommand }
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.extensions.Cat
......@@ -72,7 +74,7 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript {
mapping.sampleId = sampleId
protected def addJobs(): Unit = {
flexiprep.outputDir = libDir + "flexiprep/"
flexiprep.outputDir = new File(libDir, "flexiprep/")
flexiprep.input_R1 = inputFastq
flexiprep.init
flexiprep.biopetScript
......@@ -123,7 +125,6 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript {
}
def init() {
if (!outputDir.endsWith("/")) outputDir += "/"
if (transcriptome.isEmpty && tagsLibrary.isEmpty)
throw new IllegalStateException("No transcriptome or taglib found")
if (countBed.isEmpty)
......@@ -138,10 +139,10 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript {
if (tagsLibrary.isEmpty) {
val cdl = new SageCreateLibrary(this)
cdl.input = transcriptome.get
cdl.output = outputDir + "taglib/tag.lib"
cdl.noAntiTagsOutput = outputDir + "taglib/no_antisense_genes.txt"
cdl.noTagsOutput = outputDir + "taglib/no_sense_genes.txt"
cdl.allGenesOutput = outputDir + "taglib/all_genes.txt"
cdl.output = new File(outputDir, "taglib/tag.lib")
cdl.noAntiTagsOutput = new File(outputDir, "taglib/no_antisense_genes.txt")
cdl.noTagsOutput = new File(outputDir, "taglib/no_sense_genes.txt")
cdl.allGenesOutput = new File(outputDir, "taglib/all_genes.txt")
add(cdl)
tagsLibrary = Some(cdl.output)
}
......
#
# Biopet is built on top of GATK Queue for building bioinformatic
# pipelines. It is mainly intended to support LUMC SHARK cluster which is running
# SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
# should also be able to execute Biopet tools and pipelines.
#
# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
#
# Contact us at: sasc@lumc.nl
#
# A dual licensing mode is applied. The source code within this project that are
# not part of GATK Queue is freely available for non-commercial use under an AGPL
# license; For commercial users or users who do not want to follow the AGPL
# license, please contact us to obtain a separate license.
#
# Set root logger level to DEBUG and its only appender to A1.
log4j.rootLogger=ERROR, A1
# A1 is set to be a ConsoleAppender.
log4j.appender.A1=org.apache.log4j.ConsoleAppender
# A1 uses PatternLayout.
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n
\ No newline at end of file
#
# Biopet is built on top of GATK Queue for building bioinformatic
# pipelines. It is mainly intended to support LUMC SHARK cluster which is running
# SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
# should also be able to execute Biopet tools and pipelines.
#
# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
#
# Contact us at: sasc@lumc.nl
#
# A dual licensing mode is applied. The source code within this project that are
# not part of GATK Queue is freely available for non-commercial use under an AGPL
# license; For commercial users or users who do not want to follow the AGPL
# license, please contact us to obtain a separate license.
#
# Set root logger level to DEBUG and its only appender to A1.
log4j.rootLogger=ERROR, A1
# A1 is set to be a ConsoleAppender.
log4j.appender.A1=org.apache.log4j.ConsoleAppender
# A1 uses PatternLayout.
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment