Skip to content
Snippets Groups Projects
Commit 01e3a105 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Merge branch 'feature-extract_aligned_fastq' into 'develop'

Feature extract aligned fastq

This fixes the bug found earlier (unreported) in command line invocation. Also, tests are now done on the command line flag (which comes with the required refactoring).

See merge request !28
parents 22991efc 97a3c3dc
No related branches found
No related tags found
No related merge requests found
......@@ -19,7 +19,7 @@ import nl.lumc.sasc.biopet.core.ToolCommand
object ExtractAlignedFastq extends ToolCommand {
type FastqPair = (FastqRecord, FastqRecord)
type FastqInput = (FastqRecord, Option[FastqRecord])
/**
* Function to create iterator over Interval given input interval string
......@@ -74,7 +74,7 @@ object ExtractAlignedFastq extends ToolCommand {
def makeMembershipFunction(iv: Iterator[Interval],
inAln: File,
minMapQ: Int = 0,
commonSuffixLength: Int = 0): (FastqPair => Boolean) = {
commonSuffixLength: Int = 0): (FastqInput => Boolean) = {
val inAlnReader = SamReaderFactory
.make()
......@@ -112,54 +112,39 @@ object ExtractAlignedFastq extends ToolCommand {
}
)
(pair: FastqPair) => pair._2 match {
case null => selected.contains(pair._1.getReadHeader)
case otherwise =>
(pair: FastqInput) => pair._2 match {
case None => selected.contains(pair._1.getReadHeader)
case Some(x) =>
require(commonSuffixLength < pair._1.getReadHeader.length)
require(commonSuffixLength < pair._2.getReadHeader.length)
require(commonSuffixLength < x.getReadHeader.length)
selected.contains(pair._1.getReadHeader.dropRight(commonSuffixLength))
}
}
def selectFastqReads(memFunc: FastqPair => Boolean,
inputFastq1: FastqReader,
outputFastq1: BasicFastqWriter,
inputFastq2: FastqReader = null,
outputFastq2: BasicFastqWriter = null): Unit = {
val i1 = inputFastq1.iterator.asScala
val i2 = inputFastq2 match {
case null => Iterator.continually(null)
case otherwise => otherwise.iterator.asScala
}
val o1 = outputFastq1
val o2 = (inputFastq2, outputFastq2) match {
case (null, null) => null
case (_, null) => throw new IllegalArgumentException("Missing output FASTQ 2")
case (null, _) => throw new IllegalArgumentException("Output FASTQ 2 supplied but there is no input FASTQ 2")
case (x, y) => outputFastq2
}
logger.info("Writing output file(s) ...")
// zip, filter based on function, and write to output file(s)
i1.zip(i2)
def extractReads(memFunc: FastqInput => Boolean,
inputFastq1: FastqReader, outputFastq1: BasicFastqWriter): Unit =
inputFastq1.iterator.asScala
.zip(Iterator.continually(None))
.filter(rec => memFunc(rec._1, rec._2))
.foreach {
case (rec1, null) =>
o1.write(rec1)
case (rec1, rec2) =>
o1.write(rec1)
o2.write(rec2)
}
}
case class Args(inputBam: File = null,
.foreach(rec => outputFastq1.write(rec._1))
def extractReads(memFunc: FastqInput => Boolean,
inputFastq1: FastqReader, outputFastq1: BasicFastqWriter,
inputFastq2: FastqReader, outputFastq2: BasicFastqWriter): Unit =
inputFastq1.iterator.asScala
.zip(inputFastq2.iterator.asScala)
.filter(rec => memFunc(rec._1, Some(rec._2)))
.foreach(rec => {
outputFastq1.write(rec._1)
outputFastq2.write(rec._2)
})
case class Args(inputBam: File = new File(""),
intervals: List[String] = List.empty[String],
inputFastq1: File = null,
inputFastq2: File = null,
outputFastq1: File = null,
outputFastq2: File = null,
inputFastq1: File = new File(""),
inputFastq2: Option[File] = None,
outputFastq1: File = new File(""),
outputFastq2: Option[File] = None,
minMapQ: Int = 0,
commonSuffixLength: Int = 0) extends AbstractArgs
......@@ -188,7 +173,7 @@ object ExtractAlignedFastq extends ToolCommand {
} text "Input FASTQ file 1"
opt[File]('j', "in2") optional () valueName "<fastq>" action { (x, c) =>
c.copy(inputFastq1 = x)
c.copy(inputFastq2 = Option(x))
} validate {
x => if (x.exists) success else failure("Input FASTQ file 2 not found")
} text "Input FASTQ file 2 (default: none)"
......@@ -198,7 +183,7 @@ object ExtractAlignedFastq extends ToolCommand {
} text "Output FASTQ file 1"
opt[File]('p', "out2") optional () valueName "<fastq>" action { (x, c) =>
c.copy(outputFastq1 = x)
c.copy(outputFastq2 = Option(x))
} text "Output FASTQ file 2 (default: none)"
opt[Int]('Q', "min_mapq") optional () action { (x, c) =>
......@@ -215,35 +200,43 @@ object ExtractAlignedFastq extends ToolCommand {
""".stripMargin)
checkConfig { c =>
if (!c.inputBam.exists)
failure("Input BAM file not found")
else if (!c.inputFastq1.exists)
failure("Input FASTQ file 1 not found")
else if (c.inputFastq2 != null && c.outputFastq2 == null)
if (c.inputFastq2 != None && c.outputFastq2 == None)
failure("Missing output FASTQ file 2")
else if (c.inputFastq2 == null && c.outputFastq2 != null)
else if (c.inputFastq2 == None && c.outputFastq2 != None)
failure("Missing input FASTQ file 2")
else
success
}
}
def main(args: Array[String]): Unit = {
val commandArgs: Args = new OptParser()
def parseArgs(args: Array[String]): Args =
new OptParser()
.parse(args, Args())
.getOrElse(sys.exit(1))
def main(args: Array[String]): Unit = {
val commandArgs: Args = parseArgs(args)
val memFunc = makeMembershipFunction(
iv = makeIntervalFromString(commandArgs.intervals),
inAln = commandArgs.inputBam,
minMapQ = commandArgs.minMapQ,
commonSuffixLength = commandArgs.commonSuffixLength)
selectFastqReads(memFunc,
inputFastq1 = new FastqReader(commandArgs.inputFastq1),
inputFastq2 = new FastqReader(commandArgs.inputFastq2),
outputFastq1 = new BasicFastqWriter(commandArgs.outputFastq1),
outputFastq2 = new BasicFastqWriter(commandArgs.outputFastq2))
(commandArgs.inputFastq2, commandArgs.outputFastq2) match {
case (None, None) => extractReads(memFunc,
new FastqReader(commandArgs.inputFastq1),
new BasicFastqWriter(commandArgs.inputFastq1))
case (Some(i2), Some(o2)) => extractReads(memFunc,
new FastqReader(commandArgs.inputFastq1),
new BasicFastqWriter(commandArgs.outputFastq1),
new FastqReader(i2),
new BasicFastqWriter(o2))
case _ => // handled by the command line config check above
}
}
}
......@@ -8,7 +8,7 @@ import java.io.File
import java.nio.file.Paths
import org.mockito.Matchers._
import org.mockito.Mockito._
import org.mockito.Mockito.{ inOrder => inOrd, times, verify }
import org.scalatest.Matchers
import org.scalatest.mock.MockitoSugar
import org.scalatest.testng.TestNGSuite
......@@ -22,7 +22,10 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with MockitoSugar with Mat
import ExtractAlignedFastq._
private def resourceFile(p: String): File =
new File(Paths.get(getClass.getResource(p).toURI).toString)
new File(resourcePath(p))
private def resourcePath(p: String): String =
Paths.get(getClass.getResource(p).toURI).toString
private def makeInterval(chr: String, start: Int, end: Int): Interval =
new Interval(chr, start, end)
......@@ -30,11 +33,11 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with MockitoSugar with Mat
private def makeRecord(header: String): FastqRecord =
new FastqRecord(header, "ATGC", "", "HIHI")
private def makeSingleRecords(headers: String*): Map[String, FastqPair] =
headers.map(x => (x, (makeRecord(x), null))).toMap
private def makeSingleRecords(headers: String*): Map[String, FastqInput] =
headers.map(x => (x, (makeRecord(x), None))).toMap
private def makePairRecords(headers: (String, (String, String))*): Map[String, FastqPair] =
headers.map(x => (x._1, (makeRecord(x._2._1), makeRecord(x._2._2)))).toMap
private def makePairRecords(headers: (String, (String, String))*): Map[String, FastqInput] =
headers.map(x => (x._1, (makeRecord(x._2._1), Some(makeRecord(x._2._2))))).toMap
private def makeClue(tName: String, f: File, rName: String): String =
tName + " on " + f.getName + ", read " + rName + ": "
......@@ -109,7 +112,7 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with MockitoSugar with Mat
@Test(dataProvider = "singleAlnProvider1")
def testSingleBamDefault(name: String, feat: Interval, inAln: File,
fastqMap: Map[String, FastqPair], resultMap: Map[String, Boolean]) = {
fastqMap: Map[String, FastqInput], resultMap: Map[String, Boolean]) = {
require(resultMap.keySet == fastqMap.keySet)
val memFunc = makeMembershipFunction(Iterator(feat), inAln)
for ((key, (rec1, rec2)) <- fastqMap) {
......@@ -137,7 +140,7 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with MockitoSugar with Mat
@Test(dataProvider = "singleAlnProvider2")
def testSingleBamMinMapQ(name: String, feat: Interval, inAln: File, minMapQ: Int,
fastqMap: Map[String, FastqPair], resultMap: Map[String, Boolean]) = {
fastqMap: Map[String, FastqInput], resultMap: Map[String, Boolean]) = {
require(resultMap.keySet == fastqMap.keySet)
val memFunc = makeMembershipFunction(Iterator(feat), inAln, minMapQ)
for ((key, (rec1, rec2)) <- fastqMap) {
......@@ -175,7 +178,7 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with MockitoSugar with Mat
@Test(dataProvider = "pairAlnProvider1")
def testPairBamDefault(name: String, feat: Interval, inAln: File,
fastqMap: Map[String, FastqPair], resultMap: Map[String, Boolean]) = {
fastqMap: Map[String, FastqInput], resultMap: Map[String, Boolean]) = {
require(resultMap.keySet == fastqMap.keySet)
val memFunc = makeMembershipFunction(Iterator(feat), inAln, commonSuffixLength = 2)
for ((key, (rec1, rec2)) <- fastqMap) {
......@@ -185,53 +188,68 @@ class ExtractAlignedFastqUnitTest extends TestNGSuite with MockitoSugar with Mat
}
}
@Test def testWriteSingleBamDefault() = {
val memFunc = (recs: FastqPair) => Set("r01", "r03").contains(recs._1.getReadHeader)
@Test def testWriteSingleFastqDefault() = {
val memFunc = (recs: FastqInput) => Set("r01", "r03").contains(recs._1.getReadHeader)
val in1 = new FastqReader(resourceFile("/single01.fq"))
val mo1 = mock[BasicFastqWriter]
selectFastqReads(memFunc, in1, mo1)
val obs = inOrd(mo1)
extractReads(memFunc, in1, mo1)
verify(mo1, times(2)).write(anyObject.asInstanceOf[FastqRecord])
verify(mo1).write(new FastqRecord("r01", "A", "", "H"))
verify(mo1).write(new FastqRecord("r03", "G", "", "H"))
obs.verify(mo1).write(new FastqRecord("r01", "A", "", "H"))
obs.verify(mo1).write(new FastqRecord("r03", "G", "", "H"))
}
@Test def testWritePairBamDefault() = {
val memFunc = (recs: FastqPair) => Set("r01/1", "r01/2", "r03/1", "r03/2").contains(recs._1.getReadHeader)
@Test def testWritePairFastqDefault() = {
val mockSet = Set("r01/1", "r01/2", "r03/1", "r03/2")
val memFunc = (recs: FastqInput) => mockSet.contains(recs._1.getReadHeader) || mockSet.contains(recs._2.get.getReadHeader)
val in1 = new FastqReader(resourceFile("/paired01a.fq"))
val in2 = new FastqReader(resourceFile("/paired01b.fq"))
val mo1 = mock[BasicFastqWriter]
val mo2 = mock[BasicFastqWriter]
selectFastqReads(memFunc, in1, mo1, in2, mo2)
val obs = inOrd(mo1, mo2)
extractReads(memFunc, in1, mo1, in2, mo2)
obs.verify(mo1).write(new FastqRecord("r01/1", "A", "", "H"))
obs.verify(mo2).write(new FastqRecord("r01/2", "T", "", "I"))
obs.verify(mo1).write(new FastqRecord("r03/1", "G", "", "H"))
obs.verify(mo2).write(new FastqRecord("r03/2", "C", "", "I"))
verify(mo1, times(2)).write(anyObject.asInstanceOf[FastqRecord])
verify(mo1).write(new FastqRecord("r01/1", "A", "", "H"))
verify(mo1).write(new FastqRecord("r03/1", "G", "", "H"))
verify(mo2, times(2)).write(anyObject.asInstanceOf[FastqRecord])
verify(mo2).write(new FastqRecord("r01/2", "T", "", "I"))
verify(mo2).write(new FastqRecord("r03/2", "C", "", "I"))
}
@Test def testWriteNoOutputFastq2() = {
val memFunc: (FastqPair => Boolean) = (recs) => true
val in1 = mock[FastqReader]
val in2 = mock[FastqReader]
val out1 = mock[BasicFastqWriter]
val thrown = intercept[IllegalArgumentException] {
selectFastqReads(memFunc, in1, out1, in2)
}
thrown.getMessage should ===("Missing output FASTQ 2")
verify(out1, never).write(anyObject.asInstanceOf[FastqRecord])
}
@Test def testWriteNoInputFastq2() = {
val memFunc: (FastqPair => Boolean) = (recs) => true
val in1 = mock[FastqReader]
val out1 = mock[BasicFastqWriter]
val out2 = mock[BasicFastqWriter]
val thrown = intercept[IllegalArgumentException] {
selectFastqReads(memFunc, in1, out1, outputFastq2 = out2)
}
thrown.getMessage should ===("Output FASTQ 2 supplied but there is no input FASTQ 2")
verify(out1, never).write(anyObject.asInstanceOf[FastqRecord])
verify(out2, never).write(anyObject.asInstanceOf[FastqRecord])
@Test def testArgsMinimum() = {
val args = Array(
"-I", resourcePath("/single01.bam"),
"--interval", "chrQ:1-400",
"-i", resourcePath("/single01.fq"),
"-o", "/tmp/tm1.fq"
)
val parsed = parseArgs(args)
parsed.inputBam shouldBe resourceFile("/single01.bam")
parsed.intervals shouldBe List("chrQ:1-400")
parsed.inputFastq1 shouldBe resourceFile("/single01.fq")
parsed.outputFastq1 shouldBe new File("/tmp/tm1.fq")
}
@Test def testArgsMaximum() = {
val args = Array(
"-I", resourcePath("/paired01.bam"),
"--interval", "chrQ:1-400",
"--interval", "chrP:1000-4000",
"-i", resourcePath("/paired01a.fq"),
"-j", resourcePath("/paired01b.fq"),
"-o", "/tmp/tm1.fq",
"-p", "/tmp/tm2.fq",
"-s", "2",
"-Q", "30"
)
val parsed = parseArgs(args)
parsed.inputBam shouldBe resourceFile("/paired01.bam")
parsed.intervals shouldBe List("chrQ:1-400", "chrP:1000-4000")
parsed.inputFastq1 shouldBe resourceFile("/paired01a.fq")
parsed.inputFastq2.get shouldBe resourceFile("/paired01b.fq")
parsed.outputFastq1 shouldBe new File("/tmp/tm1.fq")
parsed.outputFastq2.get shouldBe new File("/tmp/tm2.fq")
parsed.commonSuffixLength shouldBe 2
parsed.minMapQ shouldBe 30
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment