diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala index 7f7fe2239ba176f7b1ed9104c5f9e7a507f44760..e56202dfa2dfd65f954bc32b9b17f448dcc93c13 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala @@ -21,6 +21,9 @@ object ExtractAlignedFastq extends ToolCommand { type FastqInput = (FastqRecord, Option[FastqRecord]) + /** function to get FastqRecord ID */ + def fastqId(rec: FastqRecord) = rec.getReadHeader.split(" ")(0) + /** * Function to create iterator over Interval given input interval string * @@ -90,10 +93,10 @@ object ExtractAlignedFastq extends ToolCommand { } val queries: Array[QueryInterval] = iv.toList - // sort Interval - .sortBy(x => (x.getSequence, x.getStart, x.getEnd)) // transform to QueryInterval .map(x => new QueryInterval(getSequenceIndex(x.getSequence), x.getStart, x.getEnd)) + // sort Interval + .sortBy(x => (x.referenceIndex, x.start, x.end)) // cast to array .toArray @@ -113,11 +116,12 @@ object ExtractAlignedFastq extends ToolCommand { ) (pair: FastqInput) => pair._2 match { - case None => selected.contains(pair._1.getReadHeader) + case None => selected.contains(fastqId(pair._1)) case Some(x) => - require(commonSuffixLength < pair._1.getReadHeader.length) - require(commonSuffixLength < x.getReadHeader.length) - selected.contains(pair._1.getReadHeader.dropRight(commonSuffixLength)) + val rec1Id = fastqId(pair._1) + require(commonSuffixLength < rec1Id.length) + require(commonSuffixLength < fastqId(x).length) + selected.contains(rec1Id.dropRight(commonSuffixLength)) } } @@ -224,6 +228,7 @@ object ExtractAlignedFastq extends ToolCommand { minMapQ = commandArgs.minMapQ, commonSuffixLength = commandArgs.commonSuffixLength) + logger.info("Writing to output file(s) ...") (commandArgs.inputFastq2, commandArgs.outputFastq2) match { case (None, None) => extractReads(memFunc, diff --git a/biopet-framework/src/test/resources/paired01a.fq b/biopet-framework/src/test/resources/paired01a.fq index 530a1bb9bada4865a047a8c8fe73dd5700d0b84d..d1fcedf3780772a936e94a929595f13d4dcc9e65 100644 --- a/biopet-framework/src/test/resources/paired01a.fq +++ b/biopet-framework/src/test/resources/paired01a.fq @@ -1,4 +1,4 @@ -@r01/1 +@r01/1 hello A + H diff --git a/biopet-framework/src/test/resources/paired01b.fq b/biopet-framework/src/test/resources/paired01b.fq index 72cf9246dcce5c29a2500ced269dc3c17fd18847..b93021532b61081eb371c6f784dd2f6f53eb29f3 100644 --- a/biopet-framework/src/test/resources/paired01b.fq +++ b/biopet-framework/src/test/resources/paired01b.fq @@ -1,4 +1,4 @@ -@r01/2 +@r01/2 hello T + I diff --git a/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastqUnitTest.scala b/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastqUnitTest.scala index 4cd7205c0ac6e22c4f93c384600c9f570bc88c06..1939d5351da41e518634d079ff4dc59e21f591c5 100644 --- a/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastqUnitTest.scala +++ b/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastqUnitTest.scala @@ -27,8 +27,11 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with MockitoSugar with Mat private def resourcePath(p: String): String = Paths.get(getClass.getResource(p).toURI).toString - private def makeInterval(chr: String, start: Int, end: Int): Interval = - new Interval(chr, start, end) + private def makeInterval(chr: String, start: Int, end: Int): Iterator[Interval] = + Iterator(new Interval(chr, start, end)) + + private def makeInterval(ivs: Iterable[(String, Int, Int)]): Iterator[Interval] = + ivs.map(x => new Interval(x._1, x._2, x._3)).toIterator private def makeRecord(header: String): FastqRecord = new FastqRecord(header, "ATGC", "", "HIHI") @@ -106,15 +109,18 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with MockitoSugar with Mat Array("partial overlap", makeInterval("chrQ", 430, 460), sBam01, sFastq1, sFastq1Default.updated("r04", true)), Array("enveloped", - makeInterval("chrQ", 693, 698), sBam01, sFastq1, sFastq1Default.updated("r03", true)) + makeInterval("chrQ", 693, 698), sBam01, sFastq1, sFastq1Default.updated("r03", true)), + Array("partial overlap and enveloped", + makeInterval(List(("chrQ", 693, 698), ("chrQ", 430, 460))), sBam01, + sFastq1, sFastq1Default.updated("r03", true).updated("r04", true)) ) } @Test(dataProvider = "singleAlnProvider1") - def testSingleBamDefault(name: String, feat: Interval, inAln: File, + def testSingleBamDefault(name: String, feats: Iterator[Interval], inAln: File, fastqMap: Map[String, FastqInput], resultMap: Map[String, Boolean]) = { require(resultMap.keySet == fastqMap.keySet) - val memFunc = makeMembershipFunction(Iterator(feat), inAln) + val memFunc = makeMembershipFunction(feats, inAln) for ((key, (rec1, rec2)) <- fastqMap) { withClue(makeClue(name, inAln, key)) { memFunc(rec1, rec2) shouldBe resultMap(key) @@ -139,10 +145,10 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with MockitoSugar with Mat } @Test(dataProvider = "singleAlnProvider2") - def testSingleBamMinMapQ(name: String, feat: Interval, inAln: File, minMapQ: Int, + def testSingleBamMinMapQ(name: String, feats: Iterator[Interval], inAln: File, minMapQ: Int, fastqMap: Map[String, FastqInput], resultMap: Map[String, Boolean]) = { require(resultMap.keySet == fastqMap.keySet) - val memFunc = makeMembershipFunction(Iterator(feat), inAln, minMapQ) + val memFunc = makeMembershipFunction(feats, inAln, minMapQ) for ((key, (rec1, rec2)) <- fastqMap) { withClue(makeClue(name, inAln, key)) { memFunc(rec1, rec2) shouldBe resultMap(key) @@ -172,15 +178,18 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with MockitoSugar with Mat Array("enveloped", makeInterval("chrQ", 693, 698), pBam01, pFastq1, pFastq1Default.updated("r03", true)), Array("in intron", - makeInterval("chrQ", 900, 999), pBam01, pFastq1, pFastq1Default.updated("r05", true)) + makeInterval("chrQ", 900, 999), pBam01, pFastq1, pFastq1Default.updated("r05", true)), + Array("partial overlap and enveloped", + makeInterval(List(("chrQ", 693, 698), ("chrQ", 430, 460))), pBam01, + pFastq1, pFastq1Default.updated("r03", true).updated("r04", true)) ) } @Test(dataProvider = "pairAlnProvider1") - def testPairBamDefault(name: String, feat: Interval, inAln: File, + def testPairBamDefault(name: String, feats: Iterator[Interval], inAln: File, fastqMap: Map[String, FastqInput], resultMap: Map[String, Boolean]) = { require(resultMap.keySet == fastqMap.keySet) - val memFunc = makeMembershipFunction(Iterator(feat), inAln, commonSuffixLength = 2) + val memFunc = makeMembershipFunction(feats, inAln, commonSuffixLength = 2) for ((key, (rec1, rec2)) <- fastqMap) { withClue(makeClue(name, inAln, key)) { memFunc(rec1, rec2) shouldBe resultMap(key) @@ -189,7 +198,7 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with MockitoSugar with Mat } @Test def testWriteSingleFastqDefault() = { - val memFunc = (recs: FastqInput) => Set("r01", "r03").contains(recs._1.getReadHeader) + val memFunc = (recs: FastqInput) => Set("r01", "r03").contains(fastqId(recs._1)) val in1 = new FastqReader(resourceFile("/single01.fq")) val mo1 = mock[BasicFastqWriter] val obs = inOrd(mo1) @@ -201,15 +210,15 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with MockitoSugar with Mat @Test def testWritePairFastqDefault() = { val mockSet = Set("r01/1", "r01/2", "r03/1", "r03/2") - val memFunc = (recs: FastqInput) => mockSet.contains(recs._1.getReadHeader) || mockSet.contains(recs._2.get.getReadHeader) + val memFunc = (recs: FastqInput) => mockSet.contains(fastqId(recs._1)) || mockSet.contains(fastqId(recs._2.get)) val in1 = new FastqReader(resourceFile("/paired01a.fq")) val in2 = new FastqReader(resourceFile("/paired01b.fq")) val mo1 = mock[BasicFastqWriter] val mo2 = mock[BasicFastqWriter] val obs = inOrd(mo1, mo2) extractReads(memFunc, in1, mo1, in2, mo2) - obs.verify(mo1).write(new FastqRecord("r01/1", "A", "", "H")) - obs.verify(mo2).write(new FastqRecord("r01/2", "T", "", "I")) + obs.verify(mo1).write(new FastqRecord("r01/1 hello", "A", "", "H")) + obs.verify(mo2).write(new FastqRecord("r01/2 hello", "T", "", "I")) obs.verify(mo1).write(new FastqRecord("r03/1", "G", "", "H")) obs.verify(mo2).write(new FastqRecord("r03/2", "C", "", "I")) verify(mo1, times(2)).write(anyObject.asInstanceOf[FastqRecord])