From 052f9e1125b9972e1209c934eb3afc3ce5acc96c Mon Sep 17 00:00:00 2001
From: Peter van 't Hof <p.j.van_t_hof@lumc.nl>
Date: Tue, 17 May 2016 11:11:06 +0200
Subject: [PATCH] Added mapping to gears testing

---
 .../pipelines/mapping/MappingTest.scala       | 33 ++++++++++++-----
 .../mapping/MultisampleMappingTest.scala      | 35 +++++++++++++------
 .../shiva/ShivaVariantcallingTest.scala       | 14 ++++----
 3 files changed, 57 insertions(+), 25 deletions(-)

diff --git a/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingTest.scala b/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingTest.scala
index 7b8f289df..cc2ba1b9e 100644
--- a/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingTest.scala
+++ b/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MappingTest.scala
@@ -18,6 +18,7 @@ package nl.lumc.sasc.biopet.pipelines.mapping
 import java.io.{ File, FileOutputStream }
 
 import com.google.common.io.Files
+import nl.lumc.sasc.biopet.extensions.kraken.Kraken
 import nl.lumc.sasc.biopet.pipelines.flexiprep.Fastqc
 import nl.lumc.sasc.biopet.utils.ConfigUtils
 import nl.lumc.sasc.biopet.utils.config.Config
@@ -25,7 +26,7 @@ import org.apache.commons.io.FileUtils
 import org.broadinstitute.gatk.queue.QSettings
 import org.scalatest.Matchers
 import org.scalatest.testng.TestNGSuite
-import org.testng.annotations.{ BeforeClass, AfterClass, DataProvider, Test }
+import org.testng.annotations.{ AfterClass, BeforeClass, DataProvider, Test }
 
 /**
  * Test class for [[Mapping]]
@@ -42,14 +43,15 @@ abstract class AbstractTestMapping(val aligner: String) extends TestNGSuite with
     }
   }
 
+  def paired = Array(true, false)
+  def chunks = Array(1, 5)
+  def skipMarkDuplicates = Array(true, false)
+  def skipFlexipreps = Array(true, false)
+  def zipped = Array(true, false)
+  def unmappedToGears = false
+
   @DataProvider(name = "mappingOptions")
   def mappingOptions = {
-    val paired = Array(true, false)
-    val chunks = Array(1, 5)
-    val skipMarkDuplicates = Array(true, false)
-    val skipFlexipreps = Array(true, false)
-    val zipped = Array(true, false)
-
     for (
       pair <- paired;
       chunk <- chunks;
@@ -68,7 +70,8 @@ abstract class AbstractTestMapping(val aligner: String) extends TestNGSuite with
       "aligner" -> aligner,
       "number_chunks" -> chunks,
       "skip_markduplicates" -> skipMarkDuplicate,
-      "skip_flexiprep" -> skipFlexiprep
+      "skip_flexiprep" -> skipFlexiprep,
+      "unmapped_to_gears" -> unmappedToGears
     ), Map(executables.toSeq: _*))
     val mapping: Mapping = initPipeline(map)
 
@@ -85,6 +88,8 @@ abstract class AbstractTestMapping(val aligner: String) extends TestNGSuite with
 
     //Flexiprep
     mapping.functions.count(_.isInstanceOf[Fastqc]) shouldBe (if (skipFlexiprep) 0 else if (paired) 4 else 2)
+
+    mapping.functions.count(_.isInstanceOf[Kraken]) shouldBe (if (unmappedToGears) 1 else 0)
   }
 
   val outputDir = Files.createTempDir()
@@ -133,6 +138,8 @@ abstract class AbstractTestMapping(val aligner: String) extends TestNGSuite with
     "bowtie2" -> Map("exe" -> "test"),
     "stampy" -> Map("exe" -> "test", "genome" -> "test", "hash" -> "test"),
     "samtools" -> Map("exe" -> "test"),
+    "kraken" -> Map("exe" -> "test", "db" -> "test"),
+    "krakenreport" -> Map("exe" -> "test", "db" -> "test"),
     "md5sum" -> Map("exe" -> "test")
   )
 
@@ -151,3 +158,13 @@ class MappingBowtie2Test extends AbstractTestMapping("bowtie2")
 class MappingStampyTest extends AbstractTestMapping("stampy")
 class MappingGsnapTest extends AbstractTestMapping("gsnap")
 class MappingTophatTest extends AbstractTestMapping("tophat")
+
+class MappingGearsTest extends AbstractTestMapping("bwa-mem") {
+  override def unmappedToGears = true
+
+  override def paired = Array(true)
+  override def chunks = Array(1)
+  override def skipMarkDuplicates = Array(true)
+  override def skipFlexipreps = Array(true)
+  override def zipped = Array(true)
+}
diff --git a/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingTest.scala b/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingTest.scala
index abb3bf59e..beac59c9c 100644
--- a/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingTest.scala
+++ b/mapping/src/test/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingTest.scala
@@ -1,8 +1,9 @@
 package nl.lumc.sasc.biopet.pipelines.mapping
 
-import java.io.{File, FileOutputStream}
+import java.io.{ File, FileOutputStream }
 
 import com.google.common.io.Files
+import nl.lumc.sasc.biopet.extensions.kraken.Kraken
 import nl.lumc.sasc.biopet.extensions.picard.{ MarkDuplicates, MergeSamFiles }
 import nl.lumc.sasc.biopet.utils.ConfigUtils
 import nl.lumc.sasc.biopet.utils.config.Config
@@ -12,8 +13,8 @@ import org.scalatest.testng.TestNGSuite
 import org.testng.annotations.{ DataProvider, Test }
 
 /**
-  * Created by pjvanthof on 15/05/16.
-  */
+ * Created by pjvanthof on 15/05/16.
+ */
 trait MultisampleMappingTestTrait extends TestNGSuite with Matchers {
   def initPipeline(map: Map[String, Any]): MultisampleMapping = {
     new MultisampleMapping() {
@@ -36,7 +37,7 @@ trait MultisampleMappingTestTrait extends TestNGSuite with Matchers {
   @DataProvider(name = "mappingOptions")
   def mappingOptions = {
     for (
-      merge <- mergeStrategies.toArray; s1 <- sample1 ; s2 <- sample2
+      merge <- mergeStrategies.toArray; s1 <- sample1; s2 <- sample2
     ) yield Array(merge, s1, s2)
   }
 
@@ -76,13 +77,18 @@ trait MultisampleMappingTestTrait extends TestNGSuite with Matchers {
         (if (sample2 && (merge == MergeStrategy.MarkDuplicates || merge == MergeStrategy.PreProcessMarkDuplicates)) 1 else 0))
       pipeline.functions.count(_.isInstanceOf[MergeSamFiles]) shouldBe (
         (if (sample2 && (merge == MergeStrategy.MergeSam || merge == MergeStrategy.PreProcessMergeSam)) 1 else 0))
-      pipeline.samples.foreach { case (sampleName, sample) =>
-        if (merge == MergeStrategy.None) sample.bamFile shouldBe None
-        sample.summaryStats shouldBe Map()
-        sample.libraries.foreach { case (libraryId, library) =>
-          library.summaryStats shouldBe Map()
-        }
+      pipeline.samples.foreach {
+        case (sampleName, sample) =>
+          if (merge == MergeStrategy.None) sample.bamFile shouldBe None
+          sample.summaryStats shouldBe Map()
+          sample.libraries.foreach {
+            case (libraryId, library) =>
+              library.summaryStats shouldBe Map()
+          }
       }
+
+      pipeline.functions.count(_.isInstanceOf[Kraken]) shouldBe (if (unmappedToGears) (numberFastqLibs + numberSamples) else 0)
+
       pipeline.summarySettings.get("merge_strategy") shouldBe Some(merge.toString)
     }
   }
@@ -98,6 +104,13 @@ class MultisampleMappingNoSamplesTest extends MultisampleMappingTestTrait {
   override def mergeStrategies = MultisampleMapping.MergeStrategy.values.filter(_ == MultisampleMapping.MergeStrategy.PreProcessMarkDuplicates)
 }
 
+class MultisampleMappingGearsTest extends MultisampleMappingTestTrait {
+  override def sample1 = Array(true)
+  override def sample2 = Array(false)
+  override def unmappedToGears = true
+  override def mergeStrategies = MultisampleMapping.MergeStrategy.values.filter(_ == MultisampleMapping.MergeStrategy.PreProcessMarkDuplicates)
+}
+
 class MultisampleMappingBamTest extends MultisampleMappingTestTrait {
   override def sample1 = Array(false)
   override def sample2 = Array(false)
@@ -168,6 +181,8 @@ object MultisampleMappingTestTrait {
     "samtools" -> Map("exe" -> "test"),
     "igvtools" -> Map("exe" -> "test"),
     "wigtobigwig" -> Map("exe" -> "test"),
+    "kraken" -> Map("exe" -> "test", "db" -> "test"),
+    "krakenreport" -> Map("exe" -> "test", "db" -> "test"),
     "md5sum" -> Map("exe" -> "test")
   )
 
diff --git a/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala b/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala
index 46f3061c4..2904d79a6 100644
--- a/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala
+++ b/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala
@@ -5,21 +5,21 @@
  */
 package nl.lumc.sasc.biopet.pipelines.shiva
 
-import java.io.{File, FileOutputStream}
+import java.io.{ File, FileOutputStream }
 
 import com.google.common.io.Files
 import nl.lumc.sasc.biopet.core.BiopetPipe
 import nl.lumc.sasc.biopet.extensions.Freebayes
-import nl.lumc.sasc.biopet.extensions.bcftools.{BcftoolsCall, BcftoolsMerge}
-import nl.lumc.sasc.biopet.extensions.gatk.{CombineVariants, GenotypeConcordance, HaplotypeCaller, UnifiedGenotyper}
+import nl.lumc.sasc.biopet.extensions.bcftools.{ BcftoolsCall, BcftoolsMerge }
+import nl.lumc.sasc.biopet.extensions.gatk.{ CombineVariants, GenotypeConcordance, HaplotypeCaller, UnifiedGenotyper }
 import nl.lumc.sasc.biopet.utils.config.Config
-import nl.lumc.sasc.biopet.extensions.tools.{MpileupToVcf, VcfFilter, VcfStats}
-import nl.lumc.sasc.biopet.extensions.vt.{VtDecompose, VtNormalize}
+import nl.lumc.sasc.biopet.extensions.tools.{ MpileupToVcf, VcfFilter, VcfStats }
+import nl.lumc.sasc.biopet.extensions.vt.{ VtDecompose, VtNormalize }
 import nl.lumc.sasc.biopet.utils.ConfigUtils
 import org.broadinstitute.gatk.queue.QSettings
 import org.scalatest.Matchers
 import org.scalatest.testng.TestNGSuite
-import org.testng.annotations.{DataProvider, Test}
+import org.testng.annotations.{ DataProvider, Test }
 
 import scala.collection.mutable.ListBuffer
 
@@ -117,7 +117,7 @@ trait ShivaVariantcallingTestTrait extends TestNGSuite with Matchers {
         (if (haplotypeCallerAllele) 1 else 0) + (if (haplotypeCallerGvcf) bams else 0)
       pipeline.functions.count(_.isInstanceOf[UnifiedGenotyper]) shouldBe (if (unifiedGenotyper) 1 else 0) +
         (if (unifiedGenotyperAllele) 1 else 0)
-      pipeline.functions.count(_.isInstanceOf[VcfStats]) shouldBe (1 + callers.size + (roiBedFiles ++ ampliconBedFile).length * (1+ callers.size))
+      pipeline.functions.count(_.isInstanceOf[VcfStats]) shouldBe (1 + callers.size + (roiBedFiles ++ ampliconBedFile).length * (1 + callers.size))
       pipeline.functions.count(_.isInstanceOf[VtNormalize]) shouldBe (if (normalize) callers.size else 0)
       pipeline.functions.count(_.isInstanceOf[VtDecompose]) shouldBe (if (decompose) callers.size else 0)
       pipeline.functions.count(_.isInstanceOf[GenotypeConcordance]) shouldBe (if (referenceVcf.isDefined) 1 + callers.size else 0)
-- 
GitLab