From 6a492c14b52302cb4e3ffadcb61606501fd5bb60 Mon Sep 17 00:00:00 2001
From: bow <bow@bow.web.id>
Date: Sat, 25 Oct 2014 23:30:15 +0200
Subject: [PATCH] Use DataProvider for testing multiple combinations

---
 .../biopet/tools/ExtractAlignedFastq.scala    |  19 +--
 .../tools/ExtractAlignedFastqUnitTest.scala   | 148 ++++++++++--------
 2 files changed, 92 insertions(+), 75 deletions(-)

diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala
index 3bce35461..df5e20100 100644
--- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala
+++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala
@@ -5,13 +5,14 @@
 package nl.lumc.sasc.biopet.tools
 
 import java.io.File
+
 import scala.collection.mutable.{ Set => MSet }
 import scala.collection.JavaConverters._
 
 import htsjdk.samtools.SAMFileReader
 import htsjdk.samtools.SAMFileReader.QueryInterval
 import htsjdk.samtools.SAMRecord
-import htsjdk.samtools.fastq.{BasicFastqWriter, FastqWriter, FastqReader, FastqRecord}
+import htsjdk.samtools.fastq.{ BasicFastqWriter, FastqReader, FastqRecord }
 import htsjdk.tribble.Feature
 import htsjdk.tribble.BasicFeature
 
@@ -19,6 +20,7 @@ import nl.lumc.sasc.biopet.core.ToolCommand
 
 object ExtractAlignedFastq extends ToolCommand {
 
+  type FastqPair = (FastqRecord, FastqRecord)
   /**
    * Function to create iterator over features given input interval string
    *
@@ -72,7 +74,7 @@ object ExtractAlignedFastq extends ToolCommand {
                              inAln: File,
                              commonSuffixLength: Int = 0,
                              readGroupIds: Set[String] = Set.empty[String]
-                            ): ((FastqRecord, FastqRecord) => Boolean) = {
+                            ): (FastqPair => Boolean) = {
 
     /** function to make interval queries for BAM files */
     def makeQueryInterval(aln: SAMFileReader, feat: Feature): QueryInterval =
@@ -119,17 +121,16 @@ object ExtractAlignedFastq extends ToolCommand {
         }
        )
 
-    (rec1: FastqRecord, rec2: FastqRecord) => rec2 match {
-      case null       => selected.contains(rec1.getReadHeader)
+    (pair: FastqPair) => pair._2 match {
+      case null       => selected.contains(pair._1.getReadHeader)
       case otherwise  =>
-        require (commonSuffixLength < rec1.getReadHeader.length)
-        require (commonSuffixLength < rec2.getReadHeader.length)
-        println(rec1.getReadHeader.dropRight(commonSuffixLength))
-        selected.contains(rec1.getReadHeader.dropRight(commonSuffixLength))
+        require(commonSuffixLength < pair._1.getReadHeader.length)
+        require(commonSuffixLength < pair._2.getReadHeader.length)
+        selected.contains(pair._1.getReadHeader.dropRight(commonSuffixLength))
     }
   }
 
-  def selectFastqReads(memFunc: (FastqRecord, FastqRecord) => Boolean,
+  def selectFastqReads(memFunc: FastqPair => Boolean,
                        inputFastq1: File,
                        outputFastq1: File,
                        inputFastq2: File = null,
diff --git a/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastqUnitTest.scala b/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastqUnitTest.scala
index fbc639be5..638ca788e 100644
--- a/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastqUnitTest.scala
+++ b/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastqUnitTest.scala
@@ -8,7 +8,7 @@ import java.io.File
 import java.nio.file.Paths
 import org.scalatest.Matchers
 import org.scalatest.testng.TestNGSuite
-import org.testng.annotations.Test
+import org.testng.annotations.{ DataProvider, Test }
 
 import htsjdk.samtools.fastq.FastqRecord
 import htsjdk.tribble._
@@ -20,38 +20,20 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with Matchers {
   private def resourceFile(p: String): File =
     new File(Paths.get(getClass.getResource(p).toURI).toString)
 
-  private def makeFeatures(features: (String, Int, Int)*): Seq[Feature] =
-    features.map(x => new BasicFeature(x._1, x._2, x._3))
-
-  private def makeFastqRecords(raws: (String, String, String, String)*): Seq[FastqRecord] =
-    raws.map(x => new FastqRecord(x._1, x._2, x._3, x._4))
-
-  val sBam01 = resourceFile("/single01.bam")
-  val pBam01 = resourceFile("/paired01.bam")
-
-  val sFastq1 = makeFastqRecords(
-    ("r01", "A", "", "H"),
-    ("r02", "T", "", "H"),
-    ("r03", "G", "", "H"),
-    ("r04", "C", "", "H"),
-    ("r05", "AT", "", "HH")
-  )
-
-  val pFastq1a = makeFastqRecords(
-    ("r01/1", "A", "", "H"),
-    ("r02/1", "T", "", "H"),
-    ("r03/1", "G", "", "H"),
-    ("r04/1", "C", "", "H"),
-    ("r05/1", "AT", "", "HH")
-  )
-
-  val pFastq1b = makeFastqRecords(
-    ("r01/2", "A", "", "H"),
-    ("r02/2", "T", "", "H"),
-    ("r03/2", "G", "", "H"),
-    ("r04/2", "C", "", "H"),
-    ("r05/2", "AT", "", "HH")
-  )
+  private def makeFeature(chr: String, start: Int, end: Int): Feature =
+    new BasicFeature(chr, start ,end)
+
+  private def makeRecord(header: String): FastqRecord =
+    new FastqRecord(header, "ATGC", "", "HIHI")
+
+  private def makeSingleRecords(headers: String*): Map[String, FastqPair] =
+    headers.map(x => (x, (makeRecord(x), null))).toMap
+
+  private def makePairRecords(headers: (String, (String, String))*): Map[String, FastqPair] =
+    headers.map(x => (x._1, (makeRecord(x._2._1), makeRecord(x._2._2)))).toMap
+
+  private def makeClue(tName: String, f: File, rName: String): String =
+    tName + " on " + f.getName + ", read " + rName + ": "
 
   @Test def testIntervalStartEnd() = {
     val obs = makeFeatureFromString(List("chr5:1000-1100")).next()
@@ -90,40 +72,74 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with Matchers {
       makeFeatureFromString(List("chr5:1000-")).next()
     }
 
-  @Test def testMembershipSingleBamDefault() = {
-    val feats = makeFeatures(
-      ("chrQ", 30, 49),     // no overlap, adjacent left of read r02
-      ("chrQ", 200, 210),   // no overlap, adjacent right of read r01
-      ("chrQ", 220, 230),   // no overlap
-      ("chrQ", 430, 460),   // overlap, partial on interval and read r04
-      ("chrQ", 693, 698))   // overlap, interval enveloped read r03
-    val memFunc = makeMembershipFunction(feats, sBam01)
-    // r01 is not in the set
-    memFunc(sFastq1(0), null) shouldBe false
-    // r02 is not in the set
-    memFunc(sFastq1(1), null) shouldBe false
-    // r03 is in the set
-    memFunc(sFastq1(2), null) shouldBe true
-    // r04 is in the set
-    memFunc(sFastq1(3), null) shouldBe true
-    // r05 is not in the set
-    memFunc(sFastq1(4), null) shouldBe false
+  @DataProvider(name = "singleAlnProvider1", parallel = true)
+  def singleAlnProvider1() = {
+    val sFastq1 = makeSingleRecords("r01", "r02", "r03", "r04", "r05")
+    val sFastq1Default = sFastq1.keys.map(x => (x, false)).toMap
+    val sBam01 = resourceFile("/single01.bam")
+
+    Array(
+      Array("adjacent left",
+        makeFeature("chrQ", 30, 49), sBam01, sFastq1, sFastq1Default),
+      Array("adjacent right",
+        makeFeature("chrQ", 200, 210), sBam01, sFastq1, sFastq1Default),
+      Array("no overlap",
+        makeFeature("chrQ", 220, 230), sBam01, sFastq1, sFastq1Default),
+      Array("partial overlap",
+        makeFeature("chrQ", 430, 460), sBam01, sFastq1, sFastq1Default.updated("r04", true)),
+      Array("enveloped",
+        makeFeature("chrQ", 693, 698), sBam01, sFastq1, sFastq1Default.updated("r03", true))
+    )
   }
 
-  @Test def testMembershipPairBamDefault() = {
-    val feats = makeFeatures(
-      ("chrQ", 30, 49),     // no overlap, adjacent left of read r02
-      ("chrQ", 200, 210),   // no overlap, adjacent right of read r01
-      ("chrQ", 220, 230),   // no overlap, middle of read r01
-      ("chrQ", 430, 460),   // overlap, partial on interval and read r04
-      ("chrQ", 693, 698),   // overlap, interval enveloped read r03
-      ("chrQ", 900, 999))   // enveloped inside read r05 split
-    val memFunc = makeMembershipFunction(feats, pBam01, 2)
-    memFunc(pFastq1a(0), pFastq1b(0)) shouldBe false
-    memFunc(pFastq1a(1), pFastq1b(1)) shouldBe false
-    memFunc(pFastq1a(2), pFastq1b(2)) shouldBe true
-    memFunc(pFastq1a(3), pFastq1b(3)) shouldBe true
-    memFunc(pFastq1a(4), pFastq1b(4)) shouldBe true
+  @Test(dataProvider = "singleAlnProvider1")
+  def testSingleBamDefault(name: String, feat: Feature, inAln: File,
+                           fastqMap: Map[String, FastqPair], resultMap: Map[String, Boolean]) = {
+    require(resultMap.keySet == fastqMap.keySet)
+    val memFunc = makeMembershipFunction(Iterable(feat), inAln)
+    for ((key, (rec1, rec2)) <- fastqMap) {
+      withClue(makeClue(name, inAln, key)) {
+        memFunc(rec1, rec2) shouldBe resultMap(key)
+      }
+    }
   }
-}
 
+  @DataProvider(name = "pairAlnProvider1", parallel = true)
+  def pairAlnProvider1() = {
+    val pFastq1 = makePairRecords(
+      ("r01", ("r01/1", "r01/2")),
+      ("r02", ("r02/1", "r02/2")),
+      ("r03", ("r03/1", "r03/2")),
+      ("r04", ("r04/1", "r04/2")),
+      ("r05", ("r05/1", "r05/2")))
+    val pFastq1Default = pFastq1.keys.map(x => (x, false)).toMap
+    val pBam01 = resourceFile("/paired01.bam")
+
+    Array(
+      Array("adjacent left",
+        makeFeature("chrQ", 30, 49), pBam01, pFastq1, pFastq1Default),
+      Array("adjacent right",
+        makeFeature("chrQ", 200, 210), pBam01, pFastq1, pFastq1Default),
+      Array("no overlap",
+        makeFeature("chrQ", 220, 230), pBam01, pFastq1, pFastq1Default),
+      Array("partial overlap",
+        makeFeature("chrQ", 430, 460), pBam01, pFastq1, pFastq1Default.updated("r04", true)),
+      Array("enveloped",
+        makeFeature("chrQ", 693, 698), pBam01, pFastq1, pFastq1Default.updated("r03", true)),
+      Array("in intron",
+        makeFeature("chrQ", 900, 999), pBam01, pFastq1, pFastq1Default.updated("r05", true))
+    )
+  }
+
+  @Test(dataProvider = "pairAlnProvider1")
+  def testPairBamDefault(name: String, feat: Feature, inAln: File,
+                         fastqMap: Map[String, FastqPair], resultMap: Map[String, Boolean]) = {
+    require(resultMap.keySet == fastqMap.keySet)
+    val memFunc = makeMembershipFunction(Iterable(feat), inAln, commonSuffixLength = 2)
+    for ((key, (rec1, rec2)) <- fastqMap) {
+      withClue(makeClue(name, inAln, key)) {
+        memFunc(rec1, rec2) shouldBe resultMap(key)
+      }
+    }
+  }
+}
-- 
GitLab