From 6a492c14b52302cb4e3ffadcb61606501fd5bb60 Mon Sep 17 00:00:00 2001 From: bow <bow@bow.web.id> Date: Sat, 25 Oct 2014 23:30:15 +0200 Subject: [PATCH] Use DataProvider for testing multiple combinations --- .../biopet/tools/ExtractAlignedFastq.scala | 19 +-- .../tools/ExtractAlignedFastqUnitTest.scala | 148 ++++++++++-------- 2 files changed, 92 insertions(+), 75 deletions(-) diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala index 3bce35461..df5e20100 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala @@ -5,13 +5,14 @@ package nl.lumc.sasc.biopet.tools import java.io.File + import scala.collection.mutable.{ Set => MSet } import scala.collection.JavaConverters._ import htsjdk.samtools.SAMFileReader import htsjdk.samtools.SAMFileReader.QueryInterval import htsjdk.samtools.SAMRecord -import htsjdk.samtools.fastq.{BasicFastqWriter, FastqWriter, FastqReader, FastqRecord} +import htsjdk.samtools.fastq.{ BasicFastqWriter, FastqReader, FastqRecord } import htsjdk.tribble.Feature import htsjdk.tribble.BasicFeature @@ -19,6 +20,7 @@ import nl.lumc.sasc.biopet.core.ToolCommand object ExtractAlignedFastq extends ToolCommand { + type FastqPair = (FastqRecord, FastqRecord) /** * Function to create iterator over features given input interval string * @@ -72,7 +74,7 @@ object ExtractAlignedFastq extends ToolCommand { inAln: File, commonSuffixLength: Int = 0, readGroupIds: Set[String] = Set.empty[String] - ): ((FastqRecord, FastqRecord) => Boolean) = { + ): (FastqPair => Boolean) = { /** function to make interval queries for BAM files */ def makeQueryInterval(aln: SAMFileReader, feat: Feature): QueryInterval = @@ -119,17 +121,16 @@ object ExtractAlignedFastq extends ToolCommand { } ) - (rec1: FastqRecord, rec2: FastqRecord) => rec2 match { - case null => selected.contains(rec1.getReadHeader) + (pair: FastqPair) => pair._2 match { + case null => selected.contains(pair._1.getReadHeader) case otherwise => - require (commonSuffixLength < rec1.getReadHeader.length) - require (commonSuffixLength < rec2.getReadHeader.length) - println(rec1.getReadHeader.dropRight(commonSuffixLength)) - selected.contains(rec1.getReadHeader.dropRight(commonSuffixLength)) + require(commonSuffixLength < pair._1.getReadHeader.length) + require(commonSuffixLength < pair._2.getReadHeader.length) + selected.contains(pair._1.getReadHeader.dropRight(commonSuffixLength)) } } - def selectFastqReads(memFunc: (FastqRecord, FastqRecord) => Boolean, + def selectFastqReads(memFunc: FastqPair => Boolean, inputFastq1: File, outputFastq1: File, inputFastq2: File = null, diff --git a/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastqUnitTest.scala b/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastqUnitTest.scala index fbc639be5..638ca788e 100644 --- a/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastqUnitTest.scala +++ b/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastqUnitTest.scala @@ -8,7 +8,7 @@ import java.io.File import java.nio.file.Paths import org.scalatest.Matchers import org.scalatest.testng.TestNGSuite -import org.testng.annotations.Test +import org.testng.annotations.{ DataProvider, Test } import htsjdk.samtools.fastq.FastqRecord import htsjdk.tribble._ @@ -20,38 +20,20 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with Matchers { private def resourceFile(p: String): File = new File(Paths.get(getClass.getResource(p).toURI).toString) - private def makeFeatures(features: (String, Int, Int)*): Seq[Feature] = - features.map(x => new BasicFeature(x._1, x._2, x._3)) - - private def makeFastqRecords(raws: (String, String, String, String)*): Seq[FastqRecord] = - raws.map(x => new FastqRecord(x._1, x._2, x._3, x._4)) - - val sBam01 = resourceFile("/single01.bam") - val pBam01 = resourceFile("/paired01.bam") - - val sFastq1 = makeFastqRecords( - ("r01", "A", "", "H"), - ("r02", "T", "", "H"), - ("r03", "G", "", "H"), - ("r04", "C", "", "H"), - ("r05", "AT", "", "HH") - ) - - val pFastq1a = makeFastqRecords( - ("r01/1", "A", "", "H"), - ("r02/1", "T", "", "H"), - ("r03/1", "G", "", "H"), - ("r04/1", "C", "", "H"), - ("r05/1", "AT", "", "HH") - ) - - val pFastq1b = makeFastqRecords( - ("r01/2", "A", "", "H"), - ("r02/2", "T", "", "H"), - ("r03/2", "G", "", "H"), - ("r04/2", "C", "", "H"), - ("r05/2", "AT", "", "HH") - ) + private def makeFeature(chr: String, start: Int, end: Int): Feature = + new BasicFeature(chr, start ,end) + + private def makeRecord(header: String): FastqRecord = + new FastqRecord(header, "ATGC", "", "HIHI") + + private def makeSingleRecords(headers: String*): Map[String, FastqPair] = + headers.map(x => (x, (makeRecord(x), null))).toMap + + private def makePairRecords(headers: (String, (String, String))*): Map[String, FastqPair] = + headers.map(x => (x._1, (makeRecord(x._2._1), makeRecord(x._2._2)))).toMap + + private def makeClue(tName: String, f: File, rName: String): String = + tName + " on " + f.getName + ", read " + rName + ": " @Test def testIntervalStartEnd() = { val obs = makeFeatureFromString(List("chr5:1000-1100")).next() @@ -90,40 +72,74 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with Matchers { makeFeatureFromString(List("chr5:1000-")).next() } - @Test def testMembershipSingleBamDefault() = { - val feats = makeFeatures( - ("chrQ", 30, 49), // no overlap, adjacent left of read r02 - ("chrQ", 200, 210), // no overlap, adjacent right of read r01 - ("chrQ", 220, 230), // no overlap - ("chrQ", 430, 460), // overlap, partial on interval and read r04 - ("chrQ", 693, 698)) // overlap, interval enveloped read r03 - val memFunc = makeMembershipFunction(feats, sBam01) - // r01 is not in the set - memFunc(sFastq1(0), null) shouldBe false - // r02 is not in the set - memFunc(sFastq1(1), null) shouldBe false - // r03 is in the set - memFunc(sFastq1(2), null) shouldBe true - // r04 is in the set - memFunc(sFastq1(3), null) shouldBe true - // r05 is not in the set - memFunc(sFastq1(4), null) shouldBe false + @DataProvider(name = "singleAlnProvider1", parallel = true) + def singleAlnProvider1() = { + val sFastq1 = makeSingleRecords("r01", "r02", "r03", "r04", "r05") + val sFastq1Default = sFastq1.keys.map(x => (x, false)).toMap + val sBam01 = resourceFile("/single01.bam") + + Array( + Array("adjacent left", + makeFeature("chrQ", 30, 49), sBam01, sFastq1, sFastq1Default), + Array("adjacent right", + makeFeature("chrQ", 200, 210), sBam01, sFastq1, sFastq1Default), + Array("no overlap", + makeFeature("chrQ", 220, 230), sBam01, sFastq1, sFastq1Default), + Array("partial overlap", + makeFeature("chrQ", 430, 460), sBam01, sFastq1, sFastq1Default.updated("r04", true)), + Array("enveloped", + makeFeature("chrQ", 693, 698), sBam01, sFastq1, sFastq1Default.updated("r03", true)) + ) } - @Test def testMembershipPairBamDefault() = { - val feats = makeFeatures( - ("chrQ", 30, 49), // no overlap, adjacent left of read r02 - ("chrQ", 200, 210), // no overlap, adjacent right of read r01 - ("chrQ", 220, 230), // no overlap, middle of read r01 - ("chrQ", 430, 460), // overlap, partial on interval and read r04 - ("chrQ", 693, 698), // overlap, interval enveloped read r03 - ("chrQ", 900, 999)) // enveloped inside read r05 split - val memFunc = makeMembershipFunction(feats, pBam01, 2) - memFunc(pFastq1a(0), pFastq1b(0)) shouldBe false - memFunc(pFastq1a(1), pFastq1b(1)) shouldBe false - memFunc(pFastq1a(2), pFastq1b(2)) shouldBe true - memFunc(pFastq1a(3), pFastq1b(3)) shouldBe true - memFunc(pFastq1a(4), pFastq1b(4)) shouldBe true + @Test(dataProvider = "singleAlnProvider1") + def testSingleBamDefault(name: String, feat: Feature, inAln: File, + fastqMap: Map[String, FastqPair], resultMap: Map[String, Boolean]) = { + require(resultMap.keySet == fastqMap.keySet) + val memFunc = makeMembershipFunction(Iterable(feat), inAln) + for ((key, (rec1, rec2)) <- fastqMap) { + withClue(makeClue(name, inAln, key)) { + memFunc(rec1, rec2) shouldBe resultMap(key) + } + } } -} + @DataProvider(name = "pairAlnProvider1", parallel = true) + def pairAlnProvider1() = { + val pFastq1 = makePairRecords( + ("r01", ("r01/1", "r01/2")), + ("r02", ("r02/1", "r02/2")), + ("r03", ("r03/1", "r03/2")), + ("r04", ("r04/1", "r04/2")), + ("r05", ("r05/1", "r05/2"))) + val pFastq1Default = pFastq1.keys.map(x => (x, false)).toMap + val pBam01 = resourceFile("/paired01.bam") + + Array( + Array("adjacent left", + makeFeature("chrQ", 30, 49), pBam01, pFastq1, pFastq1Default), + Array("adjacent right", + makeFeature("chrQ", 200, 210), pBam01, pFastq1, pFastq1Default), + Array("no overlap", + makeFeature("chrQ", 220, 230), pBam01, pFastq1, pFastq1Default), + Array("partial overlap", + makeFeature("chrQ", 430, 460), pBam01, pFastq1, pFastq1Default.updated("r04", true)), + Array("enveloped", + makeFeature("chrQ", 693, 698), pBam01, pFastq1, pFastq1Default.updated("r03", true)), + Array("in intron", + makeFeature("chrQ", 900, 999), pBam01, pFastq1, pFastq1Default.updated("r05", true)) + ) + } + + @Test(dataProvider = "pairAlnProvider1") + def testPairBamDefault(name: String, feat: Feature, inAln: File, + fastqMap: Map[String, FastqPair], resultMap: Map[String, Boolean]) = { + require(resultMap.keySet == fastqMap.keySet) + val memFunc = makeMembershipFunction(Iterable(feat), inAln, commonSuffixLength = 2) + for ((key, (rec1, rec2)) <- fastqMap) { + withClue(makeClue(name, inAln, key)) { + memFunc(rec1, rec2) shouldBe resultMap(key) + } + } + } +} -- GitLab