ExtractAlignedFastqTest.scala 10.5 KB
Newer Older
bow's avatar
bow committed
1
2
3
4
5
6
/**
 * Copyright (c) 2014 Leiden University Medical Center - Sequencing Analysis Support Core <sasc@lumc.nl>
 * @author Wibowo Arindrarto <w.arindrarto@lumc.nl>
 */
package nl.lumc.sasc.biopet.tools

bow's avatar
bow committed
7
8
import java.io.File
import java.nio.file.Paths
bow's avatar
bow committed
9
10

import org.mockito.Matchers._
bow's avatar
bow committed
11
import org.mockito.Mockito.{ inOrder => inOrd, times, verify }
bow's avatar
bow committed
12
import org.scalatest.Matchers
bow's avatar
bow committed
13
import org.scalatest.mock.MockitoSugar
bow's avatar
bow committed
14
import org.scalatest.testng.TestNGSuite
15
import org.testng.annotations.{ DataProvider, Test }
bow's avatar
bow committed
16

bow's avatar
bow committed
17
import htsjdk.samtools.util.Interval
bow's avatar
bow committed
18
import htsjdk.samtools.fastq.{ BasicFastqWriter, FastqReader, FastqRecord }
bow's avatar
bow committed
19

bow's avatar
bow committed
20
class ExtractAlignedFastqTest extends TestNGSuite with MockitoSugar with Matchers {
bow's avatar
bow committed
21
22
23

  import ExtractAlignedFastq._

bow's avatar
bow committed
24
  private def resourceFile(p: String): File =
bow's avatar
bow committed
25
26
27
28
    new File(resourcePath(p))

  private def resourcePath(p: String): String =
    Paths.get(getClass.getResource(p).toURI).toString
bow's avatar
bow committed
29

30
31
32
33
34
  private def makeInterval(chr: String, start: Int, end: Int): Iterator[Interval] =
    Iterator(new Interval(chr, start, end))

  private def makeInterval(ivs: Iterable[(String, Int, Int)]): Iterator[Interval] =
    ivs.map(x => new Interval(x._1, x._2, x._3)).toIterator
35
36
37
38

  private def makeRecord(header: String): FastqRecord =
    new FastqRecord(header, "ATGC", "", "HIHI")

39
40
  private def makeSingleRecords(headers: String*): Map[String, FastqInput] =
    headers.map(x => (x, (makeRecord(x), None))).toMap
41

42
43
  private def makePairRecords(headers: (String, (String, String))*): Map[String, FastqInput] =
    headers.map(x => (x._1, (makeRecord(x._2._1), Some(makeRecord(x._2._2))))).toMap
44
45
46

  private def makeClue(tName: String, f: File, rName: String): String =
    tName + " on " + f.getName + ", read " + rName + ": "
bow's avatar
bow committed
47

bow's avatar
bow committed
48
  @Test def testIntervalStartEnd() = {
bow's avatar
bow committed
49
50
    val obs = makeIntervalFromString(List("chr5:1000-1100")).next()
    val exp = new Interval("chr5", 1000, 1100)
Peter van 't Hof's avatar
Peter van 't Hof committed
51
52
53
    obs.getSequence should ===(exp.getSequence)
    obs.getStart should ===(exp.getStart)
    obs.getEnd should ===(exp.getEnd)
bow's avatar
bow committed
54
55
  }

bow's avatar
bow committed
56
  @Test def testIntervalStartEndComma() = {
bow's avatar
bow committed
57
58
    val obs = makeIntervalFromString(List("chr5:1,000-1,100")).next()
    val exp = new Interval("chr5", 1000, 1100)
Peter van 't Hof's avatar
Peter van 't Hof committed
59
60
61
    obs.getSequence should ===(exp.getSequence)
    obs.getStart should ===(exp.getStart)
    obs.getEnd should ===(exp.getEnd)
bow's avatar
bow committed
62
63
64
  }

  @Test def testIntervalStartEndDot() = {
bow's avatar
bow committed
65
66
    val obs = makeIntervalFromString(List("chr5:1.000-1.100")).next()
    val exp = new Interval("chr5", 1000, 1100)
Peter van 't Hof's avatar
Peter van 't Hof committed
67
68
69
    obs.getSequence should ===(exp.getSequence)
    obs.getStart should ===(exp.getStart)
    obs.getEnd should ===(exp.getEnd)
bow's avatar
bow committed
70
71
72
  }

  @Test def testIntervalStart() = {
bow's avatar
bow committed
73
74
    val obs = makeIntervalFromString(List("chr5:1000")).next()
    val exp = new Interval("chr5", 1000, 1000)
Peter van 't Hof's avatar
Peter van 't Hof committed
75
76
77
    obs.getSequence should ===(exp.getSequence)
    obs.getStart should ===(exp.getStart)
    obs.getEnd should ===(exp.getEnd)
bow's avatar
bow committed
78
79
  }

bow's avatar
bow committed
80
81
82
  @Test def testIntervalError() = {
    val thrown = intercept[IllegalArgumentException] {
      makeIntervalFromString(List("chr5:1000-")).next()
bow's avatar
bow committed
83
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
84
    thrown.getMessage should ===("Invalid interval string: chr5:1000-")
bow's avatar
bow committed
85
86
87
88
89
90
91
92
  }

  @Test def testMemFuncIntervalError() = {
    val iv = Iterator(new Interval("chrP", 1, 1000))
    val inAln = resourceFile("/single01.bam")
    val thrown = intercept[IllegalArgumentException] {
      makeMembershipFunction(iv, inAln)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
93
    thrown.getMessage should ===("Chromosome chrP is not found in the alignment file")
bow's avatar
bow committed
94
  }
bow's avatar
bow committed
95

96
97
98
99
100
101
102
103
  @DataProvider(name = "singleAlnProvider1", parallel = true)
  def singleAlnProvider1() = {
    val sFastq1 = makeSingleRecords("r01", "r02", "r03", "r04", "r05")
    val sFastq1Default = sFastq1.keys.map(x => (x, false)).toMap
    val sBam01 = resourceFile("/single01.bam")

    Array(
      Array("adjacent left",
bow's avatar
bow committed
104
        makeInterval("chrQ", 30, 49), sBam01, sFastq1, sFastq1Default),
105
      Array("adjacent right",
bow's avatar
bow committed
106
        makeInterval("chrQ", 200, 210), sBam01, sFastq1, sFastq1Default),
107
      Array("no overlap",
bow's avatar
bow committed
108
        makeInterval("chrQ", 220, 230), sBam01, sFastq1, sFastq1Default),
109
      Array("partial overlap",
bow's avatar
bow committed
110
        makeInterval("chrQ", 430, 460), sBam01, sFastq1, sFastq1Default.updated("r04", true)),
111
      Array("enveloped",
112
113
114
115
        makeInterval("chrQ", 693, 698), sBam01, sFastq1, sFastq1Default.updated("r03", true)),
      Array("partial overlap and enveloped",
        makeInterval(List(("chrQ", 693, 698), ("chrQ", 430, 460))), sBam01,
        sFastq1, sFastq1Default.updated("r03", true).updated("r04", true))
116
    )
bow's avatar
bow committed
117
118
  }

119
  @Test(dataProvider = "singleAlnProvider1")
120
  def testSingleBamDefault(name: String, feats: Iterator[Interval], inAln: File,
121
                           fastqMap: Map[String, FastqInput], resultMap: Map[String, Boolean]) = {
122
    require(resultMap.keySet == fastqMap.keySet)
123
    val memFunc = makeMembershipFunction(feats, inAln)
124
125
126
127
128
    for ((key, (rec1, rec2)) <- fastqMap) {
      withClue(makeClue(name, inAln, key)) {
        memFunc(rec1, rec2) shouldBe resultMap(key)
      }
    }
bow's avatar
bow committed
129
  }
bow's avatar
bow committed
130

bow's avatar
bow committed
131
132
133
134
135
136
137
138
  @DataProvider(name = "singleAlnProvider2", parallel = true)
  def singleAlnProvider2() = {
    val sFastq2 = makeSingleRecords("r01", "r02", "r04", "r07", "r06", "r08")
    val sFastq2Default = sFastq2.keys.map(x => (x, false)).toMap
    val sBam02 = resourceFile("/single02.bam")

    Array(
      Array("less than minimum MAPQ",
bow's avatar
bow committed
139
        makeInterval("chrQ", 830, 890), sBam02, 60, sFastq2, sFastq2Default),
bow's avatar
bow committed
140
      Array("greater than minimum MAPQ",
bow's avatar
bow committed
141
        makeInterval("chrQ", 830, 890), sBam02, 20, sFastq2, sFastq2Default.updated("r07", true)),
bow's avatar
bow committed
142
      Array("equal to minimum MAPQ",
bow's avatar
bow committed
143
        makeInterval("chrQ", 260, 320), sBam02, 30, sFastq2, sFastq2Default.updated("r01", true))
bow's avatar
bow committed
144
145
146
147
    )
  }

  @Test(dataProvider = "singleAlnProvider2")
148
  def testSingleBamMinMapQ(name: String, feats: Iterator[Interval], inAln: File, minMapQ: Int,
149
                           fastqMap: Map[String, FastqInput], resultMap: Map[String, Boolean]) = {
bow's avatar
bow committed
150
    require(resultMap.keySet == fastqMap.keySet)
151
    val memFunc = makeMembershipFunction(feats, inAln, minMapQ)
bow's avatar
bow committed
152
153
154
155
156
157
    for ((key, (rec1, rec2)) <- fastqMap) {
      withClue(makeClue(name, inAln, key)) {
        memFunc(rec1, rec2) shouldBe resultMap(key)
      }
    }
  }
158
159
160
161
162
163
164
165
166
167
168
169
170
  @DataProvider(name = "pairAlnProvider1", parallel = true)
  def pairAlnProvider1() = {
    val pFastq1 = makePairRecords(
      ("r01", ("r01/1", "r01/2")),
      ("r02", ("r02/1", "r02/2")),
      ("r03", ("r03/1", "r03/2")),
      ("r04", ("r04/1", "r04/2")),
      ("r05", ("r05/1", "r05/2")))
    val pFastq1Default = pFastq1.keys.map(x => (x, false)).toMap
    val pBam01 = resourceFile("/paired01.bam")

    Array(
      Array("adjacent left",
bow's avatar
bow committed
171
        makeInterval("chrQ", 30, 49), pBam01, pFastq1, pFastq1Default),
172
      Array("adjacent right",
bow's avatar
bow committed
173
        makeInterval("chrQ", 200, 210), pBam01, pFastq1, pFastq1Default),
174
      Array("no overlap",
bow's avatar
bow committed
175
        makeInterval("chrQ", 220, 230), pBam01, pFastq1, pFastq1Default),
176
      Array("partial overlap",
bow's avatar
bow committed
177
        makeInterval("chrQ", 430, 460), pBam01, pFastq1, pFastq1Default.updated("r04", true)),
178
      Array("enveloped",
bow's avatar
bow committed
179
        makeInterval("chrQ", 693, 698), pBam01, pFastq1, pFastq1Default.updated("r03", true)),
180
      Array("in intron",
181
182
183
184
        makeInterval("chrQ", 900, 999), pBam01, pFastq1, pFastq1Default.updated("r05", true)),
      Array("partial overlap and enveloped",
        makeInterval(List(("chrQ", 693, 698), ("chrQ", 430, 460))), pBam01,
        pFastq1, pFastq1Default.updated("r03", true).updated("r04", true))
185
186
187
188
    )
  }

  @Test(dataProvider = "pairAlnProvider1")
189
  def testPairBamDefault(name: String, feats: Iterator[Interval], inAln: File,
190
                         fastqMap: Map[String, FastqInput], resultMap: Map[String, Boolean]) = {
191
    require(resultMap.keySet == fastqMap.keySet)
192
    val memFunc = makeMembershipFunction(feats, inAln, commonSuffixLength = 2)
193
194
195
196
197
198
    for ((key, (rec1, rec2)) <- fastqMap) {
      withClue(makeClue(name, inAln, key)) {
        memFunc(rec1, rec2) shouldBe resultMap(key)
      }
    }
  }
bow's avatar
bow committed
199

bow's avatar
bow committed
200
  @Test def testWriteSingleFastqDefault() = {
201
    val memFunc = (recs: FastqInput) => Set("r01", "r03").contains(fastqId(recs._1))
bow's avatar
bow committed
202
203
    val in1 = new FastqReader(resourceFile("/single01.fq"))
    val mo1 = mock[BasicFastqWriter]
bow's avatar
bow committed
204
    val obs = inOrd(mo1)
205
    extractReads(memFunc, in1, mo1)
bow's avatar
bow committed
206
    verify(mo1, times(2)).write(anyObject.asInstanceOf[FastqRecord])
bow's avatar
bow committed
207
208
    obs.verify(mo1).write(new FastqRecord("r01", "A", "", "H"))
    obs.verify(mo1).write(new FastqRecord("r03", "G", "", "H"))
bow's avatar
bow committed
209
210
  }

bow's avatar
bow committed
211
212
  @Test def testWritePairFastqDefault() = {
    val mockSet = Set("r01/1", "r01/2", "r03/1", "r03/2")
213
    val memFunc = (recs: FastqInput) => mockSet.contains(fastqId(recs._1)) || mockSet.contains(fastqId(recs._2.get))
bow's avatar
bow committed
214
    val in1 = new FastqReader(resourceFile("/paired01a.fq"))
215
    val in2 = new FastqReader(resourceFile("/paired01b.fq"))
bow's avatar
bow committed
216
    val mo1 = mock[BasicFastqWriter]
217
    val mo2 = mock[BasicFastqWriter]
bow's avatar
bow committed
218
    val obs = inOrd(mo1, mo2)
219
    extractReads(memFunc, in1, mo1, in2, mo2)
220
221
    obs.verify(mo1).write(new FastqRecord("r01/1 hello", "A", "", "H"))
    obs.verify(mo2).write(new FastqRecord("r01/2 hello", "T", "", "I"))
bow's avatar
bow committed
222
223
    obs.verify(mo1).write(new FastqRecord("r03/1", "G", "", "H"))
    obs.verify(mo2).write(new FastqRecord("r03/2", "C", "", "I"))
bow's avatar
bow committed
224
    verify(mo1, times(2)).write(anyObject.asInstanceOf[FastqRecord])
225
    verify(mo2, times(2)).write(anyObject.asInstanceOf[FastqRecord])
bow's avatar
bow committed
226
  }
bow's avatar
bow committed
227

bow's avatar
bow committed
228
  @Test def testArgsMinimum() = {
bow's avatar
bow committed
229
230
231
232
233
234
235
    val args = Array(
      "-I", resourcePath("/single01.bam"),
      "--interval", "chrQ:1-400",
      "-i", resourcePath("/single01.fq"),
      "-o", "/tmp/tm1.fq"
    )
    val parsed = parseArgs(args)
bow's avatar
bow committed
236
    parsed.inputBam shouldBe resourceFile("/single01.bam")
bow's avatar
bow committed
237
    parsed.intervals shouldBe List("chrQ:1-400")
bow's avatar
bow committed
238
239
    parsed.inputFastq1 shouldBe resourceFile("/single01.fq")
    parsed.outputFastq1 shouldBe new File("/tmp/tm1.fq")
bow's avatar
bow committed
240
  }
bow's avatar
bow committed
241

bow's avatar
bow committed
242
  @Test def testArgsMaximum() = {
bow's avatar
bow committed
243
244
245
    val args = Array(
      "-I", resourcePath("/paired01.bam"),
      "--interval", "chrQ:1-400",
bow's avatar
bow committed
246
      "--interval", "chrP:1000-4000",
bow's avatar
bow committed
247
248
249
250
251
252
253
254
      "-i", resourcePath("/paired01a.fq"),
      "-j", resourcePath("/paired01b.fq"),
      "-o", "/tmp/tm1.fq",
      "-p", "/tmp/tm2.fq",
      "-s", "2",
      "-Q", "30"
    )
    val parsed = parseArgs(args)
bow's avatar
bow committed
255
256
257
258
259
260
    parsed.inputBam shouldBe resourceFile("/paired01.bam")
    parsed.intervals shouldBe List("chrQ:1-400", "chrP:1000-4000")
    parsed.inputFastq1 shouldBe resourceFile("/paired01a.fq")
    parsed.inputFastq2.get shouldBe resourceFile("/paired01b.fq")
    parsed.outputFastq1 shouldBe new File("/tmp/tm1.fq")
    parsed.outputFastq2.get shouldBe new File("/tmp/tm2.fq")
bow's avatar
bow committed
261
262
263
    parsed.commonSuffixLength shouldBe 2
    parsed.minMapQ shouldBe 30
  }
264
}