Skip to content
Snippets Groups Projects
Commit ef085f81 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Merge branch 'feature-extract_fix' into 'develop'

Feature extract fix

Missed a test case which again manifests into a bug where filtering doesn't work for FASTQ files with description lines.

Anyway, this updates the test case and fixes the problem.

EDIT: also fixes bug that arises when supplying multiple intervals.

See merge request !34
parents 9b5aa989 37bb8e35
Branches
Tags
No related merge requests found
...@@ -21,6 +21,9 @@ object ExtractAlignedFastq extends ToolCommand { ...@@ -21,6 +21,9 @@ object ExtractAlignedFastq extends ToolCommand {
type FastqInput = (FastqRecord, Option[FastqRecord]) type FastqInput = (FastqRecord, Option[FastqRecord])
/** function to get FastqRecord ID */
def fastqId(rec: FastqRecord) = rec.getReadHeader.split(" ")(0)
/** /**
* Function to create iterator over Interval given input interval string * Function to create iterator over Interval given input interval string
* *
...@@ -90,10 +93,10 @@ object ExtractAlignedFastq extends ToolCommand { ...@@ -90,10 +93,10 @@ object ExtractAlignedFastq extends ToolCommand {
} }
val queries: Array[QueryInterval] = iv.toList val queries: Array[QueryInterval] = iv.toList
// sort Interval
.sortBy(x => (x.getSequence, x.getStart, x.getEnd))
// transform to QueryInterval // transform to QueryInterval
.map(x => new QueryInterval(getSequenceIndex(x.getSequence), x.getStart, x.getEnd)) .map(x => new QueryInterval(getSequenceIndex(x.getSequence), x.getStart, x.getEnd))
// sort Interval
.sortBy(x => (x.referenceIndex, x.start, x.end))
// cast to array // cast to array
.toArray .toArray
...@@ -113,11 +116,12 @@ object ExtractAlignedFastq extends ToolCommand { ...@@ -113,11 +116,12 @@ object ExtractAlignedFastq extends ToolCommand {
) )
(pair: FastqInput) => pair._2 match { (pair: FastqInput) => pair._2 match {
case None => selected.contains(pair._1.getReadHeader) case None => selected.contains(fastqId(pair._1))
case Some(x) => case Some(x) =>
require(commonSuffixLength < pair._1.getReadHeader.length) val rec1Id = fastqId(pair._1)
require(commonSuffixLength < x.getReadHeader.length) require(commonSuffixLength < rec1Id.length)
selected.contains(pair._1.getReadHeader.dropRight(commonSuffixLength)) require(commonSuffixLength < fastqId(x).length)
selected.contains(rec1Id.dropRight(commonSuffixLength))
} }
} }
...@@ -224,6 +228,7 @@ object ExtractAlignedFastq extends ToolCommand { ...@@ -224,6 +228,7 @@ object ExtractAlignedFastq extends ToolCommand {
minMapQ = commandArgs.minMapQ, minMapQ = commandArgs.minMapQ,
commonSuffixLength = commandArgs.commonSuffixLength) commonSuffixLength = commandArgs.commonSuffixLength)
logger.info("Writing to output file(s) ...")
(commandArgs.inputFastq2, commandArgs.outputFastq2) match { (commandArgs.inputFastq2, commandArgs.outputFastq2) match {
case (None, None) => extractReads(memFunc, case (None, None) => extractReads(memFunc,
......
@r01/1 @r01/1 hello
A A
+ +
H H
......
@r01/2 @r01/2 hello
T T
+ +
I I
......
...@@ -27,8 +27,11 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with MockitoSugar with Mat ...@@ -27,8 +27,11 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with MockitoSugar with Mat
private def resourcePath(p: String): String = private def resourcePath(p: String): String =
Paths.get(getClass.getResource(p).toURI).toString Paths.get(getClass.getResource(p).toURI).toString
private def makeInterval(chr: String, start: Int, end: Int): Interval = private def makeInterval(chr: String, start: Int, end: Int): Iterator[Interval] =
new Interval(chr, start, end) Iterator(new Interval(chr, start, end))
private def makeInterval(ivs: Iterable[(String, Int, Int)]): Iterator[Interval] =
ivs.map(x => new Interval(x._1, x._2, x._3)).toIterator
private def makeRecord(header: String): FastqRecord = private def makeRecord(header: String): FastqRecord =
new FastqRecord(header, "ATGC", "", "HIHI") new FastqRecord(header, "ATGC", "", "HIHI")
...@@ -106,15 +109,18 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with MockitoSugar with Mat ...@@ -106,15 +109,18 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with MockitoSugar with Mat
Array("partial overlap", Array("partial overlap",
makeInterval("chrQ", 430, 460), sBam01, sFastq1, sFastq1Default.updated("r04", true)), makeInterval("chrQ", 430, 460), sBam01, sFastq1, sFastq1Default.updated("r04", true)),
Array("enveloped", Array("enveloped",
makeInterval("chrQ", 693, 698), sBam01, sFastq1, sFastq1Default.updated("r03", true)) makeInterval("chrQ", 693, 698), sBam01, sFastq1, sFastq1Default.updated("r03", true)),
Array("partial overlap and enveloped",
makeInterval(List(("chrQ", 693, 698), ("chrQ", 430, 460))), sBam01,
sFastq1, sFastq1Default.updated("r03", true).updated("r04", true))
) )
} }
@Test(dataProvider = "singleAlnProvider1") @Test(dataProvider = "singleAlnProvider1")
def testSingleBamDefault(name: String, feat: Interval, inAln: File, def testSingleBamDefault(name: String, feats: Iterator[Interval], inAln: File,
fastqMap: Map[String, FastqInput], resultMap: Map[String, Boolean]) = { fastqMap: Map[String, FastqInput], resultMap: Map[String, Boolean]) = {
require(resultMap.keySet == fastqMap.keySet) require(resultMap.keySet == fastqMap.keySet)
val memFunc = makeMembershipFunction(Iterator(feat), inAln) val memFunc = makeMembershipFunction(feats, inAln)
for ((key, (rec1, rec2)) <- fastqMap) { for ((key, (rec1, rec2)) <- fastqMap) {
withClue(makeClue(name, inAln, key)) { withClue(makeClue(name, inAln, key)) {
memFunc(rec1, rec2) shouldBe resultMap(key) memFunc(rec1, rec2) shouldBe resultMap(key)
...@@ -139,10 +145,10 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with MockitoSugar with Mat ...@@ -139,10 +145,10 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with MockitoSugar with Mat
} }
@Test(dataProvider = "singleAlnProvider2") @Test(dataProvider = "singleAlnProvider2")
def testSingleBamMinMapQ(name: String, feat: Interval, inAln: File, minMapQ: Int, def testSingleBamMinMapQ(name: String, feats: Iterator[Interval], inAln: File, minMapQ: Int,
fastqMap: Map[String, FastqInput], resultMap: Map[String, Boolean]) = { fastqMap: Map[String, FastqInput], resultMap: Map[String, Boolean]) = {
require(resultMap.keySet == fastqMap.keySet) require(resultMap.keySet == fastqMap.keySet)
val memFunc = makeMembershipFunction(Iterator(feat), inAln, minMapQ) val memFunc = makeMembershipFunction(feats, inAln, minMapQ)
for ((key, (rec1, rec2)) <- fastqMap) { for ((key, (rec1, rec2)) <- fastqMap) {
withClue(makeClue(name, inAln, key)) { withClue(makeClue(name, inAln, key)) {
memFunc(rec1, rec2) shouldBe resultMap(key) memFunc(rec1, rec2) shouldBe resultMap(key)
...@@ -172,15 +178,18 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with MockitoSugar with Mat ...@@ -172,15 +178,18 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with MockitoSugar with Mat
Array("enveloped", Array("enveloped",
makeInterval("chrQ", 693, 698), pBam01, pFastq1, pFastq1Default.updated("r03", true)), makeInterval("chrQ", 693, 698), pBam01, pFastq1, pFastq1Default.updated("r03", true)),
Array("in intron", Array("in intron",
makeInterval("chrQ", 900, 999), pBam01, pFastq1, pFastq1Default.updated("r05", true)) makeInterval("chrQ", 900, 999), pBam01, pFastq1, pFastq1Default.updated("r05", true)),
Array("partial overlap and enveloped",
makeInterval(List(("chrQ", 693, 698), ("chrQ", 430, 460))), pBam01,
pFastq1, pFastq1Default.updated("r03", true).updated("r04", true))
) )
} }
@Test(dataProvider = "pairAlnProvider1") @Test(dataProvider = "pairAlnProvider1")
def testPairBamDefault(name: String, feat: Interval, inAln: File, def testPairBamDefault(name: String, feats: Iterator[Interval], inAln: File,
fastqMap: Map[String, FastqInput], resultMap: Map[String, Boolean]) = { fastqMap: Map[String, FastqInput], resultMap: Map[String, Boolean]) = {
require(resultMap.keySet == fastqMap.keySet) require(resultMap.keySet == fastqMap.keySet)
val memFunc = makeMembershipFunction(Iterator(feat), inAln, commonSuffixLength = 2) val memFunc = makeMembershipFunction(feats, inAln, commonSuffixLength = 2)
for ((key, (rec1, rec2)) <- fastqMap) { for ((key, (rec1, rec2)) <- fastqMap) {
withClue(makeClue(name, inAln, key)) { withClue(makeClue(name, inAln, key)) {
memFunc(rec1, rec2) shouldBe resultMap(key) memFunc(rec1, rec2) shouldBe resultMap(key)
...@@ -189,7 +198,7 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with MockitoSugar with Mat ...@@ -189,7 +198,7 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with MockitoSugar with Mat
} }
@Test def testWriteSingleFastqDefault() = { @Test def testWriteSingleFastqDefault() = {
val memFunc = (recs: FastqInput) => Set("r01", "r03").contains(recs._1.getReadHeader) val memFunc = (recs: FastqInput) => Set("r01", "r03").contains(fastqId(recs._1))
val in1 = new FastqReader(resourceFile("/single01.fq")) val in1 = new FastqReader(resourceFile("/single01.fq"))
val mo1 = mock[BasicFastqWriter] val mo1 = mock[BasicFastqWriter]
val obs = inOrd(mo1) val obs = inOrd(mo1)
...@@ -201,15 +210,15 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with MockitoSugar with Mat ...@@ -201,15 +210,15 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with MockitoSugar with Mat
@Test def testWritePairFastqDefault() = { @Test def testWritePairFastqDefault() = {
val mockSet = Set("r01/1", "r01/2", "r03/1", "r03/2") val mockSet = Set("r01/1", "r01/2", "r03/1", "r03/2")
val memFunc = (recs: FastqInput) => mockSet.contains(recs._1.getReadHeader) || mockSet.contains(recs._2.get.getReadHeader) val memFunc = (recs: FastqInput) => mockSet.contains(fastqId(recs._1)) || mockSet.contains(fastqId(recs._2.get))
val in1 = new FastqReader(resourceFile("/paired01a.fq")) val in1 = new FastqReader(resourceFile("/paired01a.fq"))
val in2 = new FastqReader(resourceFile("/paired01b.fq")) val in2 = new FastqReader(resourceFile("/paired01b.fq"))
val mo1 = mock[BasicFastqWriter] val mo1 = mock[BasicFastqWriter]
val mo2 = mock[BasicFastqWriter] val mo2 = mock[BasicFastqWriter]
val obs = inOrd(mo1, mo2) val obs = inOrd(mo1, mo2)
extractReads(memFunc, in1, mo1, in2, mo2) extractReads(memFunc, in1, mo1, in2, mo2)
obs.verify(mo1).write(new FastqRecord("r01/1", "A", "", "H")) obs.verify(mo1).write(new FastqRecord("r01/1 hello", "A", "", "H"))
obs.verify(mo2).write(new FastqRecord("r01/2", "T", "", "I")) obs.verify(mo2).write(new FastqRecord("r01/2 hello", "T", "", "I"))
obs.verify(mo1).write(new FastqRecord("r03/1", "G", "", "H")) obs.verify(mo1).write(new FastqRecord("r03/1", "G", "", "H"))
obs.verify(mo2).write(new FastqRecord("r03/2", "C", "", "I")) obs.verify(mo2).write(new FastqRecord("r03/2", "C", "", "I"))
verify(mo1, times(2)).write(anyObject.asInstanceOf[FastqRecord]) verify(mo1, times(2)).write(anyObject.asInstanceOf[FastqRecord])
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment