diff --git a/.idea/libraries/Maven__org_mockito_mockito_all_1_9_5.xml b/.idea/libraries/Maven__org_mockito_mockito_all_1_9_5.xml new file mode 100644 index 0000000000000000000000000000000000000000..7797878d7a6e42ddf7b4bca81d9b434485b1c1f9 --- /dev/null +++ b/.idea/libraries/Maven__org_mockito_mockito_all_1_9_5.xml @@ -0,0 +1,13 @@ +<component name="libraryTable"> + <library name="Maven: org.mockito:mockito-all:1.9.5"> + <CLASSES> + <root url="jar://$MAVEN_REPOSITORY$/org/mockito/mockito-all/1.9.5/mockito-all-1.9.5.jar!/" /> + </CLASSES> + <JAVADOC> + <root url="jar://$MAVEN_REPOSITORY$/org/mockito/mockito-all/1.9.5/mockito-all-1.9.5-javadoc.jar!/" /> + </JAVADOC> + <SOURCES> + <root url="jar://$MAVEN_REPOSITORY$/org/mockito/mockito-all/1.9.5/mockito-all-1.9.5-sources.jar!/" /> + </SOURCES> + </library> +</component> \ No newline at end of file diff --git a/Biopet.iml b/Biopet.iml index 02d0bed78622af5b387d7774db3539c061857363..87ee5a467b1042f6a3cde8c772db14b72b4d553b 100644 --- a/Biopet.iml +++ b/Biopet.iml @@ -1,5 +1,10 @@ <?xml version="1.0" encoding="UTF-8"?> <module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4"> + <component name="FacetManager"> + <facet type="Python" name="Python"> + <configuration sdkName="" /> + </facet> + </component> <component name="NewModuleRootManager" inherit-compiler-output="false"> <output url="file://$MODULE_DIR$/target/classes" /> <output-test url="file://$MODULE_DIR$/target/test-classes" /> @@ -8,6 +13,6 @@ </content> <orderEntry type="inheritedJdk" /> <orderEntry type="sourceFolder" forTests="false" /> + <orderEntry type="library" name="gatk-queue-package-distribution-3.3" level="project" /> </component> -</module> - +</module> \ No newline at end of file diff --git a/biopet-framework/BiopetFramework.iml b/biopet-framework/BiopetFramework.iml index cd52bbb064aa631f89cd2e626866d9f7ee166ef3..576d75a0e85692d1617394c0fee61366299e856c 100644 --- a/biopet-framework/BiopetFramework.iml +++ b/biopet-framework/BiopetFramework.iml @@ -29,6 +29,7 @@ <orderEntry type="sourceFolder" forTests="false" /> <orderEntry type="library" name="com.twitter:algebird-core_2.10:0.8.1" level="project" /> <orderEntry type="library" name="gatk-protected" level="project" /> + <orderEntry type="library" name="Maven: org.mockito:mockito-all:1.9.5" level="project" /> <orderEntry type="library" scope="TEST" name="Maven: org.testng:testng:6.8" level="project" /> <orderEntry type="library" scope="TEST" name="Maven: junit:junit:4.10" level="project" /> <orderEntry type="library" scope="TEST" name="Maven: org.hamcrest:hamcrest-core:1.1" level="project" /> @@ -36,9 +37,9 @@ <orderEntry type="library" scope="TEST" name="Maven: com.beust:jcommander:1.27" level="project" /> <orderEntry type="library" scope="TEST" name="Maven: org.yaml:snakeyaml:1.6" level="project" /> <orderEntry type="library" name="Maven: org.scalatest:scalatest_2.11:2.2.1" level="project" /> - <orderEntry type="library" name="Maven: org.scala-lang:scala-library:2.11.2" level="project" /> <orderEntry type="library" name="Maven: org.scala-lang:scala-reflect:2.11.2" level="project" /> <orderEntry type="library" name="Maven: org.scala-lang.modules:scala-xml_2.11:1.0.2" level="project" /> + <orderEntry type="library" name="Maven: org.scala-lang:scala-library:2.11.2" level="project" /> <orderEntry type="library" name="Maven: org.broadinstitute.gatk:gatk-queue-package-distribution:3.3" level="project" /> <orderEntry type="library" name="Maven: io.argonaut:argonaut_2.11:6.1-M4" level="project" /> <orderEntry type="library" name="Maven: org.scalaz:scalaz-core_2.11:7.1.0" level="project" /> @@ -48,6 +49,6 @@ <orderEntry type="library" name="Maven: org.biojava:biojava3-sequencing:3.1.0" level="project" /> <orderEntry type="library" name="Maven: com.google.guava:guava:18.0" level="project" /> <orderEntry type="library" name="Maven: com.github.scopt:scopt_2.10:3.2.0" level="project" /> + <orderEntry type="library" name="Maven: org.mockito:mockito-all:1.9.5" level="project" /> </component> </module> - diff --git a/biopet-framework/pom.xml b/biopet-framework/pom.xml index 36e912c26e4e760f1ce9843a1340eb08b1f81568..f116688f95fc697f13afafa47dd191f347679c45 100644 --- a/biopet-framework/pom.xml +++ b/biopet-framework/pom.xml @@ -72,6 +72,11 @@ <artifactId>scopt_2.10</artifactId> <version>3.2.0</version> </dependency> + <dependency> + <groupId>org.mockito</groupId> + <artifactId>mockito-all</artifactId> + <version>1.9.5</version> + </dependency> </dependencies> <build> <resources> diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutable.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutable.scala index 25d4cacc0bcf211f84b331743ee2c2ca03dbb11a..80308d9f6393c38a5fb278c2fba39fb67fc2f6fe 100644 --- a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutable.scala +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutable.scala @@ -21,6 +21,7 @@ object BiopetExecutable { nl.lumc.sasc.biopet.pipelines.yamsvp.Yamsvp), "tool" -> List( nl.lumc.sasc.biopet.tools.WipeReads, + nl.lumc.sasc.biopet.tools.ExtractAlignedFastq, nl.lumc.sasc.biopet.tools.BiopetFlagstat, nl.lumc.sasc.biopet.tools.CheckAllelesVcfInBam, nl.lumc.sasc.biopet.tools.VcfToTsv, diff --git a/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala new file mode 100644 index 0000000000000000000000000000000000000000..83a607de4a035013cbf82072223f533380ff942c --- /dev/null +++ b/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastq.scala @@ -0,0 +1,249 @@ +/** + * Copyright (c) 2014 Leiden University Medical Center - Sequencing Analysis Support Core <sasc@lumc.nl> + * @author Wibowo Arindrarto <w.arindrarto@lumc.nl> + */ +package nl.lumc.sasc.biopet.tools + +import java.io.File + +import scala.collection.mutable.{ Set => MSet } +import scala.collection.JavaConverters._ + +import htsjdk.samtools.QueryInterval +import htsjdk.samtools.SamReaderFactory +import htsjdk.samtools.ValidationStringency +import htsjdk.samtools.fastq.{ BasicFastqWriter, FastqReader, FastqRecord } +import htsjdk.samtools.util.Interval + +import nl.lumc.sasc.biopet.core.ToolCommand + +object ExtractAlignedFastq extends ToolCommand { + + type FastqPair = (FastqRecord, FastqRecord) + + /** + * Function to create iterator over Interval given input interval string + * + * Valid interval strings are either of these: + * - chr5:10000-11000 + * - chr5:10,000-11,000 + * - chr5:10.000-11.000 + * - chr5:10000-11,000 + * In all cases above, the region span base #10,000 to base number #11,000 in chromosome 5 + * (first base is numbered 1) + * + * An interval string with a single base is also allowed: + * - chr5:10000 + * - chr5:10,000 + * - chr5:10.000 + * + * @param inStrings iterable yielding input interval string + */ + def makeIntervalFromString(inStrings: Iterable[String]): Iterator[Interval] = { + + // FIXME: can we combine these two patterns into one regex? + // matches intervals with start and end coordinates + val ptn1 = """([\w_-]+):([\d.,]+)-([\d.,]+)""".r + // matches intervals with start coordinate only + val ptn2 = """([\w_-]+):([\d.,]+)""".r + // make ints from coordinate strings + // NOTE: while it is possible for coordinates to exceed Int.MaxValue, we are limited + // by the Interval constructor only accepting ints + def intFromCoord(s: String): Int = s.replaceAll(",", "").replaceAll("\\.", "").toInt + + inStrings.map(x => x match { + case ptn1(chr, start, end) => new Interval(chr, intFromCoord(start), intFromCoord(end)) + case ptn2(chr, start) => + val startCoord = intFromCoord(start) + new Interval(chr, startCoord, startCoord) + case _ => throw new IllegalArgumentException("Invalid interval string: " + x) + }) + .toIterator + } + + /** + * Function to create object that checks whether a given FASTQ record is mapped + * to the given interval or not + * + * @param iv iterable yielding features to check + * @param inAln input SAM/BAM file + * @param minMapQ minimum mapping quality of read to include + * @param commonSuffixLength length of suffix common to all read pairs + * @return + */ + def makeMembershipFunction(iv: Iterator[Interval], + inAln: File, + minMapQ: Int = 0, + commonSuffixLength: Int = 0): (FastqPair => Boolean) = { + + val inAlnReader = SamReaderFactory + .make() + .validationStringency(ValidationStringency.LENIENT) + .open(inAln) + require(inAlnReader.hasIndex) + + def getSequenceIndex(name: String): Int = inAlnReader.getFileHeader.getSequenceIndex(name) match { + case x if x >= 0 => + x + case otherwise => + throw new IllegalArgumentException("Chromosome " + name + " is not found in the alignment file") + } + + val queries: Array[QueryInterval] = iv.toList + // sort Interval + .sortBy(x => (x.getSequence, x.getStart, x.getEnd)) + // transform to QueryInterval + .map(x => new QueryInterval(getSequenceIndex(x.getSequence), x.getStart, x.getEnd)) + // cast to array + .toArray + + lazy val selected: MSet[String] = inAlnReader + // query BAM file for overlapping reads + .queryOverlapping(queries) + // for Scala compatibility + .asScala + // filter based on mapping quality + .filter(x => x.getMappingQuality >= minMapQ) + // iteratively add read name to the selected set + .foldLeft(MSet.empty[String])( + (acc, x) => { + logger.debug("Adding " + x.getReadName + " to set ...") + acc += x.getReadName + } + ) + + (pair: FastqPair) => pair._2 match { + case null => selected.contains(pair._1.getReadHeader) + case otherwise => + require(commonSuffixLength < pair._1.getReadHeader.length) + require(commonSuffixLength < pair._2.getReadHeader.length) + selected.contains(pair._1.getReadHeader.dropRight(commonSuffixLength)) + } + } + + def selectFastqReads(memFunc: FastqPair => Boolean, + inputFastq1: FastqReader, + outputFastq1: BasicFastqWriter, + inputFastq2: FastqReader = null, + outputFastq2: BasicFastqWriter = null): Unit = { + + val i1 = inputFastq1.iterator.asScala + val i2 = inputFastq2 match { + case null => Iterator.continually(null) + case otherwise => otherwise.iterator.asScala + } + val o1 = outputFastq1 + val o2 = (inputFastq2, outputFastq2) match { + case (null, null) => null + case (_, null) => throw new IllegalArgumentException("Missing output FASTQ 2") + case (null, _) => throw new IllegalArgumentException("Output FASTQ 2 supplied but there is no input FASTQ 2") + case (x, y) => outputFastq2 + } + + logger.info("Writing output file(s) ...") + // zip, filter based on function, and write to output file(s) + i1.zip(i2) + .filter(rec => memFunc(rec._1, rec._2)) + .foreach { + case (rec1, null) => + o1.write(rec1) + case (rec1, rec2) => + o1.write(rec1) + o2.write(rec2) + } + + } + + case class Args(inputBam: File = null, + intervals: List[String] = List.empty[String], + inputFastq1: File = null, + inputFastq2: File = null, + outputFastq1: File = null, + outputFastq2: File = null, + minMapQ: Int = 0, + commonSuffixLength: Int = 0) extends AbstractArgs + + class OptParser extends AbstractOptParser { + + head( + s""" + |$commandName - Select aligned FASTQ records + """.stripMargin) + + opt[File]('I', "input_file") required () valueName "<bam>" action { (x, c) => + c.copy(inputBam = x) + } validate { + x => if (x.exists) success else failure("Input BAM file not found") + } text "Input BAM file" + + opt[String]('r', "interval") required () unbounded () valueName "<interval>" action { (x, c) => + // yes, we are appending and yes it's O(n) ~ preserving order is more important than speed here + c.copy(intervals = c.intervals :+ x) + } text "Interval strings" + + opt[File]('i', "in1") required () valueName "<fastq>" action { (x, c) => + c.copy(inputFastq1 = x) + } validate { + x => if (x.exists) success else failure("Input FASTQ file 1 not found") + } text "Input FASTQ file 1" + + opt[File]('j', "in2") optional () valueName "<fastq>" action { (x, c) => + c.copy(inputFastq1 = x) + } validate { + x => if (x.exists) success else failure("Input FASTQ file 2 not found") + } text "Input FASTQ file 2 (default: none)" + + opt[File]('o', "out1") required () valueName "<fastq>" action { (x, c) => + c.copy(outputFastq1 = x) + } text "Output FASTQ file 1" + + opt[File]('p', "out2") optional () valueName "<fastq>" action { (x, c) => + c.copy(outputFastq1 = x) + } text "Output FASTQ file 2 (default: none)" + + opt[Int]('Q', "min_mapq") optional () action { (x, c) => + c.copy(minMapQ = x) + } text "Minimum MAPQ of reads in target region to remove (default: 0)" + + opt[Int]('s', "read_suffix_length") optional () action { (x, c) => + c.copy(commonSuffixLength = x) + } text "Length of common suffix from each read pair (default: 0)" + + note( + """ + |This tool creates FASTQ file(s) containing reads mapped to the given alignment intervals. + """.stripMargin) + + checkConfig { c => + if (!c.inputBam.exists) + failure("Input BAM file not found") + else if (!c.inputFastq1.exists) + failure("Input FASTQ file 1 not found") + else if (c.inputFastq2 != null && c.outputFastq2 == null) + failure("Missing output FASTQ file 2") + else if (c.inputFastq2 == null && c.outputFastq2 != null) + failure("Missing input FASTQ file 2") + else + success + } + } + + def main(args: Array[String]): Unit = { + + val commandArgs: Args = new OptParser() + .parse(args, Args()) + .getOrElse(sys.exit(1)) + + val memFunc = makeMembershipFunction( + iv = makeIntervalFromString(commandArgs.intervals), + inAln = commandArgs.inputBam, + minMapQ = commandArgs.minMapQ, + commonSuffixLength = commandArgs.commonSuffixLength) + + selectFastqReads(memFunc, + inputFastq1 = new FastqReader(commandArgs.inputFastq1), + inputFastq2 = new FastqReader(commandArgs.inputFastq2), + outputFastq1 = new BasicFastqWriter(commandArgs.outputFastq1), + outputFastq2 = new BasicFastqWriter(commandArgs.outputFastq2)) + } +} diff --git a/biopet-framework/src/test/resources/paired01a.fq b/biopet-framework/src/test/resources/paired01a.fq new file mode 100644 index 0000000000000000000000000000000000000000..530a1bb9bada4865a047a8c8fe73dd5700d0b84d --- /dev/null +++ b/biopet-framework/src/test/resources/paired01a.fq @@ -0,0 +1,20 @@ +@r01/1 +A ++ +H +@r02/1 +T ++ +I +@r03/1 +G ++ +H +@r04/1 +C ++ +I +@r05/1 +A ++ +H diff --git a/biopet-framework/src/test/resources/paired01b.fq b/biopet-framework/src/test/resources/paired01b.fq new file mode 100644 index 0000000000000000000000000000000000000000..72cf9246dcce5c29a2500ced269dc3c17fd18847 --- /dev/null +++ b/biopet-framework/src/test/resources/paired01b.fq @@ -0,0 +1,20 @@ +@r01/2 +T ++ +I +@r02/2 +A ++ +H +@r03/2 +C ++ +I +@r04/2 +G ++ +H +@r05/2 +T ++ +I diff --git a/biopet-framework/src/test/resources/single01.fq b/biopet-framework/src/test/resources/single01.fq new file mode 100644 index 0000000000000000000000000000000000000000..acf48ca7e1a85ccaee4566da3c1c9094cce84c91 --- /dev/null +++ b/biopet-framework/src/test/resources/single01.fq @@ -0,0 +1,20 @@ +@r01 +A ++ +H +@r02 +T ++ +I +@r03 +G ++ +H +@r04 +C ++ +I +@r05 +A ++ +H diff --git a/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastqUnitTest.scala b/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastqUnitTest.scala new file mode 100644 index 0000000000000000000000000000000000000000..0dac57875f5ae3966b009d9605a2e7c7d55b5d96 --- /dev/null +++ b/biopet-framework/src/test/scala/nl/lumc/sasc/biopet/tools/ExtractAlignedFastqUnitTest.scala @@ -0,0 +1,237 @@ +/** + * Copyright (c) 2014 Leiden University Medical Center - Sequencing Analysis Support Core <sasc@lumc.nl> + * @author Wibowo Arindrarto <w.arindrarto@lumc.nl> + */ +package nl.lumc.sasc.biopet.tools + +import java.io.File +import java.nio.file.Paths + +import org.mockito.Matchers._ +import org.mockito.Mockito._ +import org.scalatest.Matchers +import org.scalatest.mock.MockitoSugar +import org.scalatest.testng.TestNGSuite +import org.testng.annotations.{ DataProvider, Test } + +import htsjdk.samtools.util.Interval +import htsjdk.samtools.fastq.{ BasicFastqWriter, FastqReader, FastqRecord } + +class ExtractAlignedFastqUnitTest extends TestNGSuite with MockitoSugar with Matchers { + + import ExtractAlignedFastq._ + + private def resourceFile(p: String): File = + new File(Paths.get(getClass.getResource(p).toURI).toString) + + private def makeInterval(chr: String, start: Int, end: Int): Interval = + new Interval(chr, start, end) + + private def makeRecord(header: String): FastqRecord = + new FastqRecord(header, "ATGC", "", "HIHI") + + private def makeSingleRecords(headers: String*): Map[String, FastqPair] = + headers.map(x => (x, (makeRecord(x), null))).toMap + + private def makePairRecords(headers: (String, (String, String))*): Map[String, FastqPair] = + headers.map(x => (x._1, (makeRecord(x._2._1), makeRecord(x._2._2)))).toMap + + private def makeClue(tName: String, f: File, rName: String): String = + tName + " on " + f.getName + ", read " + rName + ": " + + @Test def testIntervalStartEnd() = { + val obs = makeIntervalFromString(List("chr5:1000-1100")).next() + val exp = new Interval("chr5", 1000, 1100) + obs.getSequence should === (exp.getSequence) + obs.getStart should === (exp.getStart) + obs.getEnd should === (exp.getEnd) + } + + @Test def testIntervalStartEndComma() = { + val obs = makeIntervalFromString(List("chr5:1,000-1,100")).next() + val exp = new Interval("chr5", 1000, 1100) + obs.getSequence should === (exp.getSequence) + obs.getStart should === (exp.getStart) + obs.getEnd should === (exp.getEnd) + } + + @Test def testIntervalStartEndDot() = { + val obs = makeIntervalFromString(List("chr5:1.000-1.100")).next() + val exp = new Interval("chr5", 1000, 1100) + obs.getSequence should === (exp.getSequence) + obs.getStart should === (exp.getStart) + obs.getEnd should === (exp.getEnd) + } + + @Test def testIntervalStart() = { + val obs = makeIntervalFromString(List("chr5:1000")).next() + val exp = new Interval("chr5", 1000, 1000) + obs.getSequence should === (exp.getSequence) + obs.getStart should === (exp.getStart) + obs.getEnd should === (exp.getEnd) + } + + @Test def testIntervalError() = { + val thrown = intercept[IllegalArgumentException] { + makeIntervalFromString(List("chr5:1000-")).next() + } + thrown.getMessage should === ("Invalid interval string: chr5:1000-") + } + + @Test def testMemFuncIntervalError() = { + val iv = Iterator(new Interval("chrP", 1, 1000)) + val inAln = resourceFile("/single01.bam") + val thrown = intercept[IllegalArgumentException] { + makeMembershipFunction(iv, inAln) + } + thrown.getMessage should === ("Chromosome chrP is not found in the alignment file") + } + + @DataProvider(name = "singleAlnProvider1", parallel = true) + def singleAlnProvider1() = { + val sFastq1 = makeSingleRecords("r01", "r02", "r03", "r04", "r05") + val sFastq1Default = sFastq1.keys.map(x => (x, false)).toMap + val sBam01 = resourceFile("/single01.bam") + + Array( + Array("adjacent left", + makeInterval("chrQ", 30, 49), sBam01, sFastq1, sFastq1Default), + Array("adjacent right", + makeInterval("chrQ", 200, 210), sBam01, sFastq1, sFastq1Default), + Array("no overlap", + makeInterval("chrQ", 220, 230), sBam01, sFastq1, sFastq1Default), + Array("partial overlap", + makeInterval("chrQ", 430, 460), sBam01, sFastq1, sFastq1Default.updated("r04", true)), + Array("enveloped", + makeInterval("chrQ", 693, 698), sBam01, sFastq1, sFastq1Default.updated("r03", true)) + ) + } + + @Test(dataProvider = "singleAlnProvider1") + def testSingleBamDefault(name: String, feat: Interval, inAln: File, + fastqMap: Map[String, FastqPair], resultMap: Map[String, Boolean]) = { + require(resultMap.keySet == fastqMap.keySet) + val memFunc = makeMembershipFunction(Iterator(feat), inAln) + for ((key, (rec1, rec2)) <- fastqMap) { + withClue(makeClue(name, inAln, key)) { + memFunc(rec1, rec2) shouldBe resultMap(key) + } + } + } + + @DataProvider(name = "singleAlnProvider2", parallel = true) + def singleAlnProvider2() = { + val sFastq2 = makeSingleRecords("r01", "r02", "r04", "r07", "r06", "r08") + val sFastq2Default = sFastq2.keys.map(x => (x, false)).toMap + val sBam02 = resourceFile("/single02.bam") + + Array( + Array("less than minimum MAPQ", + makeInterval("chrQ", 830, 890), sBam02, 60, sFastq2, sFastq2Default), + Array("greater than minimum MAPQ", + makeInterval("chrQ", 830, 890), sBam02, 20, sFastq2, sFastq2Default.updated("r07", true)), + Array("equal to minimum MAPQ", + makeInterval("chrQ", 260, 320), sBam02, 30, sFastq2, sFastq2Default.updated("r01", true)) + ) + } + + @Test(dataProvider = "singleAlnProvider2") + def testSingleBamMinMapQ(name: String, feat: Interval, inAln: File, minMapQ: Int, + fastqMap: Map[String, FastqPair], resultMap: Map[String, Boolean]) = { + require(resultMap.keySet == fastqMap.keySet) + val memFunc = makeMembershipFunction(Iterator(feat), inAln, minMapQ) + for ((key, (rec1, rec2)) <- fastqMap) { + withClue(makeClue(name, inAln, key)) { + memFunc(rec1, rec2) shouldBe resultMap(key) + } + } + } + @DataProvider(name = "pairAlnProvider1", parallel = true) + def pairAlnProvider1() = { + val pFastq1 = makePairRecords( + ("r01", ("r01/1", "r01/2")), + ("r02", ("r02/1", "r02/2")), + ("r03", ("r03/1", "r03/2")), + ("r04", ("r04/1", "r04/2")), + ("r05", ("r05/1", "r05/2"))) + val pFastq1Default = pFastq1.keys.map(x => (x, false)).toMap + val pBam01 = resourceFile("/paired01.bam") + + Array( + Array("adjacent left", + makeInterval("chrQ", 30, 49), pBam01, pFastq1, pFastq1Default), + Array("adjacent right", + makeInterval("chrQ", 200, 210), pBam01, pFastq1, pFastq1Default), + Array("no overlap", + makeInterval("chrQ", 220, 230), pBam01, pFastq1, pFastq1Default), + Array("partial overlap", + makeInterval("chrQ", 430, 460), pBam01, pFastq1, pFastq1Default.updated("r04", true)), + Array("enveloped", + makeInterval("chrQ", 693, 698), pBam01, pFastq1, pFastq1Default.updated("r03", true)), + Array("in intron", + makeInterval("chrQ", 900, 999), pBam01, pFastq1, pFastq1Default.updated("r05", true)) + ) + } + + @Test(dataProvider = "pairAlnProvider1") + def testPairBamDefault(name: String, feat: Interval, inAln: File, + fastqMap: Map[String, FastqPair], resultMap: Map[String, Boolean]) = { + require(resultMap.keySet == fastqMap.keySet) + val memFunc = makeMembershipFunction(Iterator(feat), inAln, commonSuffixLength = 2) + for ((key, (rec1, rec2)) <- fastqMap) { + withClue(makeClue(name, inAln, key)) { + memFunc(rec1, rec2) shouldBe resultMap(key) + } + } + } + + @Test def testWriteSingleBamDefault() = { + val memFunc = (recs: FastqPair) => Set("r01", "r03").contains(recs._1.getReadHeader) + val in1 = new FastqReader(resourceFile("/single01.fq")) + val mo1 = mock[BasicFastqWriter] + selectFastqReads(memFunc, in1, mo1) + verify(mo1, times(2)).write(anyObject.asInstanceOf[FastqRecord]) + verify(mo1).write(new FastqRecord("r01", "A", "", "H")) + verify(mo1).write(new FastqRecord("r03", "G", "", "H")) + } + + @Test def testWritePairBamDefault() = { + val memFunc = (recs: FastqPair) => Set("r01/1", "r01/2", "r03/1", "r03/2").contains(recs._1.getReadHeader) + val in1 = new FastqReader(resourceFile("/paired01a.fq")) + val in2 = new FastqReader(resourceFile("/paired01b.fq")) + val mo1 = mock[BasicFastqWriter] + val mo2 = mock[BasicFastqWriter] + selectFastqReads(memFunc, in1, mo1, in2, mo2) + verify(mo1, times(2)).write(anyObject.asInstanceOf[FastqRecord]) + verify(mo1).write(new FastqRecord("r01/1", "A", "", "H")) + verify(mo1).write(new FastqRecord("r03/1", "G", "", "H")) + verify(mo2, times(2)).write(anyObject.asInstanceOf[FastqRecord]) + verify(mo2).write(new FastqRecord("r01/2", "T", "", "I")) + verify(mo2).write(new FastqRecord("r03/2", "C", "", "I")) + } + + @Test def testWriteNoOutputFastq2() = { + val memFunc: (FastqPair => Boolean) = (recs) => true + val in1 = mock[FastqReader] + val in2 = mock[FastqReader] + val out1 = mock[BasicFastqWriter] + val thrown = intercept[IllegalArgumentException] { + selectFastqReads(memFunc, in1, out1, in2) + } + thrown.getMessage should === ("Missing output FASTQ 2") + verify(out1, never).write(anyObject.asInstanceOf[FastqRecord]) + } + + @Test def testWriteNoInputFastq2() = { + val memFunc: (FastqPair => Boolean) = (recs) => true + val in1 = mock[FastqReader] + val out1 = mock[BasicFastqWriter] + val out2 = mock[BasicFastqWriter] + val thrown = intercept[IllegalArgumentException] { + selectFastqReads(memFunc, in1, out1, outputFastq2 = out2) + } + thrown.getMessage should === ("Output FASTQ 2 supplied but there is no input FASTQ 2") + verify(out1, never).write(anyObject.asInstanceOf[FastqRecord]) + verify(out2, never).write(anyObject.asInstanceOf[FastqRecord]) + } +}