Skip to content
Snippets Groups Projects
Commit 6a492c14 authored by bow's avatar bow
Browse files

Use DataProvider for testing multiple combinations

parent e6b0f47c
No related branches found
No related tags found
No related merge requests found
......@@ -5,13 +5,14 @@
package nl.lumc.sasc.biopet.tools
import java.io.File
import scala.collection.mutable.{ Set => MSet }
import scala.collection.JavaConverters._
import htsjdk.samtools.SAMFileReader
import htsjdk.samtools.SAMFileReader.QueryInterval
import htsjdk.samtools.SAMRecord
import htsjdk.samtools.fastq.{BasicFastqWriter, FastqWriter, FastqReader, FastqRecord}
import htsjdk.samtools.fastq.{ BasicFastqWriter, FastqReader, FastqRecord }
import htsjdk.tribble.Feature
import htsjdk.tribble.BasicFeature
......@@ -19,6 +20,7 @@ import nl.lumc.sasc.biopet.core.ToolCommand
object ExtractAlignedFastq extends ToolCommand {
type FastqPair = (FastqRecord, FastqRecord)
/**
* Function to create iterator over features given input interval string
*
......@@ -72,7 +74,7 @@ object ExtractAlignedFastq extends ToolCommand {
inAln: File,
commonSuffixLength: Int = 0,
readGroupIds: Set[String] = Set.empty[String]
): ((FastqRecord, FastqRecord) => Boolean) = {
): (FastqPair => Boolean) = {
/** function to make interval queries for BAM files */
def makeQueryInterval(aln: SAMFileReader, feat: Feature): QueryInterval =
......@@ -119,17 +121,16 @@ object ExtractAlignedFastq extends ToolCommand {
}
)
(rec1: FastqRecord, rec2: FastqRecord) => rec2 match {
case null => selected.contains(rec1.getReadHeader)
(pair: FastqPair) => pair._2 match {
case null => selected.contains(pair._1.getReadHeader)
case otherwise =>
require (commonSuffixLength < rec1.getReadHeader.length)
require (commonSuffixLength < rec2.getReadHeader.length)
println(rec1.getReadHeader.dropRight(commonSuffixLength))
selected.contains(rec1.getReadHeader.dropRight(commonSuffixLength))
require(commonSuffixLength < pair._1.getReadHeader.length)
require(commonSuffixLength < pair._2.getReadHeader.length)
selected.contains(pair._1.getReadHeader.dropRight(commonSuffixLength))
}
}
def selectFastqReads(memFunc: (FastqRecord, FastqRecord) => Boolean,
def selectFastqReads(memFunc: FastqPair => Boolean,
inputFastq1: File,
outputFastq1: File,
inputFastq2: File = null,
......
......@@ -8,7 +8,7 @@ import java.io.File
import java.nio.file.Paths
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test
import org.testng.annotations.{ DataProvider, Test }
import htsjdk.samtools.fastq.FastqRecord
import htsjdk.tribble._
......@@ -20,38 +20,20 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with Matchers {
private def resourceFile(p: String): File =
new File(Paths.get(getClass.getResource(p).toURI).toString)
private def makeFeatures(features: (String, Int, Int)*): Seq[Feature] =
features.map(x => new BasicFeature(x._1, x._2, x._3))
private def makeFastqRecords(raws: (String, String, String, String)*): Seq[FastqRecord] =
raws.map(x => new FastqRecord(x._1, x._2, x._3, x._4))
val sBam01 = resourceFile("/single01.bam")
val pBam01 = resourceFile("/paired01.bam")
val sFastq1 = makeFastqRecords(
("r01", "A", "", "H"),
("r02", "T", "", "H"),
("r03", "G", "", "H"),
("r04", "C", "", "H"),
("r05", "AT", "", "HH")
)
val pFastq1a = makeFastqRecords(
("r01/1", "A", "", "H"),
("r02/1", "T", "", "H"),
("r03/1", "G", "", "H"),
("r04/1", "C", "", "H"),
("r05/1", "AT", "", "HH")
)
val pFastq1b = makeFastqRecords(
("r01/2", "A", "", "H"),
("r02/2", "T", "", "H"),
("r03/2", "G", "", "H"),
("r04/2", "C", "", "H"),
("r05/2", "AT", "", "HH")
)
private def makeFeature(chr: String, start: Int, end: Int): Feature =
new BasicFeature(chr, start ,end)
private def makeRecord(header: String): FastqRecord =
new FastqRecord(header, "ATGC", "", "HIHI")
private def makeSingleRecords(headers: String*): Map[String, FastqPair] =
headers.map(x => (x, (makeRecord(x), null))).toMap
private def makePairRecords(headers: (String, (String, String))*): Map[String, FastqPair] =
headers.map(x => (x._1, (makeRecord(x._2._1), makeRecord(x._2._2)))).toMap
private def makeClue(tName: String, f: File, rName: String): String =
tName + " on " + f.getName + ", read " + rName + ": "
@Test def testIntervalStartEnd() = {
val obs = makeFeatureFromString(List("chr5:1000-1100")).next()
......@@ -90,40 +72,74 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with Matchers {
makeFeatureFromString(List("chr5:1000-")).next()
}
@Test def testMembershipSingleBamDefault() = {
val feats = makeFeatures(
("chrQ", 30, 49), // no overlap, adjacent left of read r02
("chrQ", 200, 210), // no overlap, adjacent right of read r01
("chrQ", 220, 230), // no overlap
("chrQ", 430, 460), // overlap, partial on interval and read r04
("chrQ", 693, 698)) // overlap, interval enveloped read r03
val memFunc = makeMembershipFunction(feats, sBam01)
// r01 is not in the set
memFunc(sFastq1(0), null) shouldBe false
// r02 is not in the set
memFunc(sFastq1(1), null) shouldBe false
// r03 is in the set
memFunc(sFastq1(2), null) shouldBe true
// r04 is in the set
memFunc(sFastq1(3), null) shouldBe true
// r05 is not in the set
memFunc(sFastq1(4), null) shouldBe false
@DataProvider(name = "singleAlnProvider1", parallel = true)
def singleAlnProvider1() = {
val sFastq1 = makeSingleRecords("r01", "r02", "r03", "r04", "r05")
val sFastq1Default = sFastq1.keys.map(x => (x, false)).toMap
val sBam01 = resourceFile("/single01.bam")
Array(
Array("adjacent left",
makeFeature("chrQ", 30, 49), sBam01, sFastq1, sFastq1Default),
Array("adjacent right",
makeFeature("chrQ", 200, 210), sBam01, sFastq1, sFastq1Default),
Array("no overlap",
makeFeature("chrQ", 220, 230), sBam01, sFastq1, sFastq1Default),
Array("partial overlap",
makeFeature("chrQ", 430, 460), sBam01, sFastq1, sFastq1Default.updated("r04", true)),
Array("enveloped",
makeFeature("chrQ", 693, 698), sBam01, sFastq1, sFastq1Default.updated("r03", true))
)
}
@Test def testMembershipPairBamDefault() = {
val feats = makeFeatures(
("chrQ", 30, 49), // no overlap, adjacent left of read r02
("chrQ", 200, 210), // no overlap, adjacent right of read r01
("chrQ", 220, 230), // no overlap, middle of read r01
("chrQ", 430, 460), // overlap, partial on interval and read r04
("chrQ", 693, 698), // overlap, interval enveloped read r03
("chrQ", 900, 999)) // enveloped inside read r05 split
val memFunc = makeMembershipFunction(feats, pBam01, 2)
memFunc(pFastq1a(0), pFastq1b(0)) shouldBe false
memFunc(pFastq1a(1), pFastq1b(1)) shouldBe false
memFunc(pFastq1a(2), pFastq1b(2)) shouldBe true
memFunc(pFastq1a(3), pFastq1b(3)) shouldBe true
memFunc(pFastq1a(4), pFastq1b(4)) shouldBe true
@Test(dataProvider = "singleAlnProvider1")
def testSingleBamDefault(name: String, feat: Feature, inAln: File,
fastqMap: Map[String, FastqPair], resultMap: Map[String, Boolean]) = {
require(resultMap.keySet == fastqMap.keySet)
val memFunc = makeMembershipFunction(Iterable(feat), inAln)
for ((key, (rec1, rec2)) <- fastqMap) {
withClue(makeClue(name, inAln, key)) {
memFunc(rec1, rec2) shouldBe resultMap(key)
}
}
}
}
@DataProvider(name = "pairAlnProvider1", parallel = true)
def pairAlnProvider1() = {
val pFastq1 = makePairRecords(
("r01", ("r01/1", "r01/2")),
("r02", ("r02/1", "r02/2")),
("r03", ("r03/1", "r03/2")),
("r04", ("r04/1", "r04/2")),
("r05", ("r05/1", "r05/2")))
val pFastq1Default = pFastq1.keys.map(x => (x, false)).toMap
val pBam01 = resourceFile("/paired01.bam")
Array(
Array("adjacent left",
makeFeature("chrQ", 30, 49), pBam01, pFastq1, pFastq1Default),
Array("adjacent right",
makeFeature("chrQ", 200, 210), pBam01, pFastq1, pFastq1Default),
Array("no overlap",
makeFeature("chrQ", 220, 230), pBam01, pFastq1, pFastq1Default),
Array("partial overlap",
makeFeature("chrQ", 430, 460), pBam01, pFastq1, pFastq1Default.updated("r04", true)),
Array("enveloped",
makeFeature("chrQ", 693, 698), pBam01, pFastq1, pFastq1Default.updated("r03", true)),
Array("in intron",
makeFeature("chrQ", 900, 999), pBam01, pFastq1, pFastq1Default.updated("r05", true))
)
}
@Test(dataProvider = "pairAlnProvider1")
def testPairBamDefault(name: String, feat: Feature, inAln: File,
fastqMap: Map[String, FastqPair], resultMap: Map[String, Boolean]) = {
require(resultMap.keySet == fastqMap.keySet)
val memFunc = makeMembershipFunction(Iterable(feat), inAln, commonSuffixLength = 2)
for ((key, (rec1, rec2)) <- fastqMap) {
withClue(makeClue(name, inAln, key)) {
memFunc(rec1, rec2) shouldBe resultMap(key)
}
}
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment