ShivaVariantcallingTest.scala 14.8 KB
Newer Older
1
/**
2
3
4
5
6
7
8
9
10
11
12
13
14
  * Biopet is built on top of GATK Queue for building bioinformatic
  * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
  * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
  * should also be able to execute Biopet tools and pipelines.
  *
  * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
  *
  * Contact us at: sasc@lumc.nl
  *
  * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
  * license; For commercial users or users who do not want to follow the AGPL
  * license, please contact us to obtain a separate license.
  */
bow's avatar
bow committed
15
/**
16
17
18
19
  * Due to the license issue with GATK, this part of Biopet can only be used inside the
  * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions
  * on how to use this protected part of biopet or contact us at sasc@lumc.nl
  */
Peter van 't Hof's avatar
Peter van 't Hof committed
20
package nl.lumc.sasc.biopet.pipelines.shiva
21

Peter van 't Hof's avatar
Peter van 't Hof committed
22
import java.io.{File, FileOutputStream, IOException}
Peter van 't Hof's avatar
Peter van 't Hof committed
23
24

import com.google.common.io.Files
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import nl.lumc.sasc.biopet.core.BiopetPipe
Peter van 't Hof's avatar
Peter van 't Hof committed
26
import nl.lumc.sasc.biopet.extensions.Freebayes
27
import nl.lumc.sasc.biopet.extensions.bcftools.{BcftoolsCall, BcftoolsMerge}
pjvan_thof's avatar
pjvan_thof committed
28
import nl.lumc.sasc.biopet.extensions.gatk._
29
import nl.lumc.sasc.biopet.utils.config.Config
30
31
import nl.lumc.sasc.biopet.extensions.tools.{MpileupToVcf, VcfFilter, VcfStats}
import nl.lumc.sasc.biopet.extensions.vt.{VtDecompose, VtNormalize}
pjvan_thof's avatar
pjvan_thof committed
32
33
import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers.somatic.TumorNormalPair
import nl.lumc.sasc.biopet.utils.{ConfigUtils, Logging}
Peter van 't Hof's avatar
Peter van 't Hof committed
34
import org.apache.commons.io.FileUtils
Peter van 't Hof's avatar
Peter van 't Hof committed
35
36
37
import org.broadinstitute.gatk.queue.QSettings
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
38
import org.testng.annotations.{AfterClass, DataProvider, Test}
Peter van 't Hof's avatar
Peter van 't Hof committed
39

Peter van 't Hof's avatar
Peter van 't Hof committed
40
41
import scala.collection.mutable.ListBuffer

Peter van 't Hof's avatar
Peter van 't Hof committed
42
/**
43
44
45
46
  * Class for testing ShivaVariantcalling
  *
  * Created by pjvan_thof on 3/2/15.
  */
Peter van 't Hof's avatar
Peter van 't Hof committed
47
trait ShivaVariantcallingTestTrait extends TestNGSuite with Matchers {
Peter van 't Hof's avatar
Peter van 't Hof committed
48
  def initPipeline(map: Map[String, Any], dir: File): ShivaVariantcalling = {
49
    new ShivaVariantcalling() {
Sander Bollen's avatar
Sander Bollen committed
50
      override def configNamespace = "shivavariantcalling"
51
52
      override def globalConfig =
        new Config(ConfigUtils.mergeMaps(map, ShivaVariantcallingTest.config(dir)))
Peter van 't Hof's avatar
Peter van 't Hof committed
53
54
55
56
57
      qSettings = new QSettings
      qSettings.runName = "test"
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
58
  def raw: Boolean = false
pjvan_thof's avatar
pjvan_thof committed
59
  def mutect2: Boolean = false
Peter van 't Hof's avatar
Peter van 't Hof committed
60
61
62
63
64
65
66
67
68
  def bcftools: Boolean = false
  def bcftools_singlesample: Boolean = false
  def haplotypeCallerGvcf: Boolean = false
  def haplotypeCallerAllele: Boolean = false
  def unifiedGenotyperAllele: Boolean = false
  def unifiedGenotyper: Boolean = false
  def haplotypeCaller: Boolean = false
  def freebayes: Boolean = false
  def varscanCnsSinglesample: Boolean = false
Peter van 't Hof's avatar
Peter van 't Hof committed
69
  def referenceVcf: Option[File] = None
Peter van 't Hof's avatar
Peter van 't Hof committed
70
71
  def roiBedFiles: List[File] = Nil
  def ampliconBedFile: Option[File] = None
pjvan_thof's avatar
pjvan_thof committed
72
  def runContest: Option[Boolean] = None
Peter van 't Hof's avatar
Peter van 't Hof committed
73

74
75
76
  def normalize = false
  def decompose = false

pjvan_thof's avatar
pjvan_thof committed
77
78
79
80
  def bamRange: List[Int] = (0 to 2).toList

  def tumorNormalPairs: List[TumorNormalPair] = Nil

Peter van 't Hof's avatar
Peter van 't Hof committed
81
  @DataProvider(name = "shivaVariantcallingOptions")
pjvan_thof's avatar
pjvan_thof committed
82
83
  def shivaVariantcallingOptions: Array[Array[Any]] = {
    (for (bams <- bamRange)
84
85
86
87
      yield
        Array[Any](
          bams,
          raw,
pjvan_thof's avatar
pjvan_thof committed
88
          mutect2,
89
90
91
92
93
94
95
96
          bcftools,
          bcftools_singlesample,
          unifiedGenotyper,
          haplotypeCaller,
          haplotypeCallerGvcf,
          haplotypeCallerAllele,
          unifiedGenotyperAllele,
          freebayes,
pjvan_thof's avatar
pjvan_thof committed
97
98
          varscanCnsSinglesample,
          tumorNormalPairs
99
        )).toArray
Peter van 't Hof's avatar
Peter van 't Hof committed
100
101
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
102
103
  private var dirs: List[File] = Nil

Peter van 't Hof's avatar
Peter van 't Hof committed
104
  @Test(dataProvider = "shivaVariantcallingOptions")
105
106
  def testShivaVariantcalling(bams: Int,
                              raw: Boolean,
pjvan_thof's avatar
pjvan_thof committed
107
                              mutect2: Boolean,
108
                              bcftools: Boolean,
109
                              bcftoolsSinglesample: Boolean,
110
111
112
113
114
                              unifiedGenotyper: Boolean,
                              haplotypeCaller: Boolean,
                              haplotypeCallerGvcf: Boolean,
                              haplotypeCallerAllele: Boolean,
                              unifiedGenotyperAllele: Boolean,
Peter van 't Hof's avatar
Peter van 't Hof committed
115
                              freebayes: Boolean,
pjvan_thof's avatar
pjvan_thof committed
116
117
                              varscanCnsSinglesample: Boolean,
                              tumorNormalPairs: List[TumorNormalPair]): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
118
    val outputDir = ShivaVariantcallingTest.outputDir
pjvan_thof's avatar
pjvan_thof committed
119
    dirs :+= outputDir
Peter van 't Hof's avatar
Peter van 't Hof committed
120
121
    val callers: ListBuffer[String] = ListBuffer()
    if (raw) callers.append("raw")
pjvan_thof's avatar
pjvan_thof committed
122
    if (mutect2) callers.append("mutect2")
Peter van 't Hof's avatar
Peter van 't Hof committed
123
    if (bcftools) callers.append("bcftools")
124
    if (bcftoolsSinglesample) callers.append("bcftools_singlesample")
125
126
127
128
129
    if (unifiedGenotyper) callers.append("unifiedgenotyper")
    if (haplotypeCallerGvcf) callers.append("haplotypecaller_gvcf")
    if (haplotypeCallerAllele) callers.append("haplotypecaller_allele")
    if (unifiedGenotyperAllele) callers.append("unifiedgenotyper_allele")
    if (haplotypeCaller) callers.append("haplotypecaller")
Peter van 't Hof's avatar
Peter van 't Hof committed
130
    if (freebayes) callers.append("freebayes")
131
    if (varscanCnsSinglesample) callers.append("varscan_cns_singlesample")
pjvan_thof's avatar
pjvan_thof committed
132
133
134
135
136
137
138
139
    val sampleTags: Map[String, Any] = tumorNormalPairs.foldLeft(Map[String, Any]()) {
      case (m, pair) =>
        val tag = Map(
          "samples" -> Map(pair.tumorSample -> Map(
            "tags" -> Map("type" -> "tumor", "normal" -> pair.normalSample))))
        ConfigUtils.mergeMaps(m, tag)
    }
    val map = sampleTags ++ Map(
140
141
      "variantcallers" -> callers.toList,
      "execute_vt_normalize" -> normalize,
Peter van 't Hof's avatar
Peter van 't Hof committed
142
143
      "execute_vt_decompose" -> decompose,
      "regions_of_interest" -> roiBedFiles.map(_.getAbsolutePath)
144
    ) ++ referenceVcf.map("reference_vcf" -> _) ++ ampliconBedFile.map(
pjvan_thof's avatar
pjvan_thof committed
145
      "amplicon_bed" -> _.getAbsolutePath) ++ runContest.map("run_contest" -> _)
Peter van 't Hof's avatar
Peter van 't Hof committed
146
    val pipeline = initPipeline(map, outputDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
147

148
    pipeline.inputBams = (for (n <- 1 to bams)
pjvan_thof's avatar
pjvan_thof committed
149
150
      yield
        s"sample_${n.toString}" -> ShivaVariantcallingTest.inputTouch("bam_" + n + ".bam")).toMap
Peter van 't Hof's avatar
Peter van 't Hof committed
151

Peter van 't Hof's avatar
Peter van 't Hof committed
152
    val illegalArgumentException = pipeline.inputBams.isEmpty || callers.isEmpty
pjvan_thof's avatar
pjvan_thof committed
153
    val illegalStateException = mutect2 && bams == 1
Peter van 't Hof's avatar
Peter van 't Hof committed
154
155
156

    if (illegalArgumentException) intercept[IllegalArgumentException] {
      pipeline.script()
pjvan_thof's avatar
pjvan_thof committed
157
158
159
    } else if (illegalStateException) intercept[IllegalStateException] {
      pipeline.script()
    } else {
Peter van 't Hof's avatar
Peter van 't Hof committed
160
161
      pipeline.script()

162
163
164
      val pipesJobs = pipeline.functions
        .filter(_.isInstanceOf[BiopetPipe])
        .flatMap(_.asInstanceOf[BiopetPipe].pipesJobs)
Peter van 't Hof's avatar
Peter van 't Hof committed
165

166
167
168
169
170
171
172
173
174
175
      pipeline.functions.count(_.isInstanceOf[CombineVariants]) shouldBe (1 + (if (raw) 1 else 0) + (if (varscanCnsSinglesample)
                                                                                                       1
                                                                                                     else
                                                                                                       0))
      pipesJobs.count(_.isInstanceOf[BcftoolsCall]) shouldBe (if (bcftools) 1 else 0) + (if (bcftoolsSinglesample)
                                                                                           bams
                                                                                         else 0)
      pipeline.functions.count(_.isInstanceOf[BcftoolsMerge]) shouldBe (if (bcftoolsSinglesample && bams > 1)
                                                                          1
                                                                        else 0)
Peter van 't Hof's avatar
Peter van 't Hof committed
176
177
      pipesJobs.count(_.isInstanceOf[Freebayes]) shouldBe (if (freebayes) 1 else 0)
      pipesJobs.count(_.isInstanceOf[MpileupToVcf]) shouldBe (if (raw) bams else 0)
Peter van 't Hof's avatar
Peter van 't Hof committed
178
      pipeline.functions.count(_.isInstanceOf[VcfFilter]) shouldBe (if (raw) bams else 0)
179
180
      pipeline.functions.count(_.isInstanceOf[HaplotypeCaller]) shouldBe (if (haplotypeCaller) 1
                                                                          else 0) +
181
        (if (haplotypeCallerAllele) 1 else 0) + (if (haplotypeCallerGvcf) bams else 0)
182
183
      pipeline.functions.count(_.isInstanceOf[UnifiedGenotyper]) shouldBe (if (unifiedGenotyper) 1
                                                                           else 0) +
184
        (if (unifiedGenotyperAllele) 1 else 0)
185
      pipeline.functions.count(_.isInstanceOf[VcfStats]) shouldBe (1 + callers.size + (roiBedFiles ++ ampliconBedFile).length * (1 + callers.size))
186
187
188
189
190
191
192
      pipeline.functions.count(_.isInstanceOf[VtNormalize]) shouldBe (if (normalize) callers.size
                                                                      else 0)
      pipeline.functions.count(_.isInstanceOf[VtDecompose]) shouldBe (if (decompose) callers.size
                                                                      else 0)
      pipeline.functions.count(_.isInstanceOf[GenotypeConcordance]) shouldBe (if (referenceVcf.isDefined)
                                                                                1 + callers.size
                                                                              else 0)
pjvan_thof's avatar
pjvan_thof committed
193
194
195
196
197
      pipesJobs.count(_.isInstanceOf[MuTect2]) shouldBe (if (mutect2) tumorNormalPairs.size else 0)
      pipeline.functions.count(_.isInstanceOf[ContEst]) shouldBe (if (mutect2 && runContest
                                                                        .getOrElse(false))
                                                                    tumorNormalPairs.size
                                                                  else 0)
Peter van 't Hof's avatar
Peter van 't Hof committed
198

199
200
201
202
203
204
205
      pipeline.summarySettings
        .get("variantcallers")
        .map(_.asInstanceOf[List[String]].toSet) shouldBe Some(callers.toSet)
      pipeline.summarySettings.get("amplicon_bed") shouldBe Some(
        ampliconBedFile.map(_.getAbsolutePath))
      pipeline.summarySettings.get("regions_of_interest") shouldBe Some(
        roiBedFiles.map(_.getAbsolutePath))
Peter van 't Hof's avatar
Peter van 't Hof committed
206
    }
pjvan_thof's avatar
pjvan_thof committed
207
    Logging.errors.clear()
Peter van 't Hof's avatar
Peter van 't Hof committed
208
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
209
210

  // remove temporary run directory all tests in the class have been run
Peter van 't Hof's avatar
Peter van 't Hof committed
211
212
213
214
215
216
217
218
219
  @AfterClass def removeTempOutputDir() = {
    dirs.filter(_.exists()).foreach { dir =>
      try {
        FileUtils.deleteDirectory(dir)
      } catch {
        case e: IOException if e.getMessage.startsWith("Unable to delete directory") =>
          Logging.logger.error(e.getMessage)
      }
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
220
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
221
222
}

Peter van 't Hof's avatar
Peter van 't Hof committed
223
224
class ShivaVariantcallingNoVariantcallersTest extends ShivaVariantcallingTestTrait
class ShivaVariantcallingAllTest extends ShivaVariantcallingTestTrait {
Peter van 't Hof's avatar
Peter van 't Hof committed
225
226
227
228
229
230
231
232
233
234
  override def raw: Boolean = true
  override def bcftools: Boolean = true
  override def bcftools_singlesample: Boolean = true
  override def haplotypeCallerGvcf: Boolean = true
  override def haplotypeCallerAllele: Boolean = true
  override def unifiedGenotyperAllele: Boolean = true
  override def unifiedGenotyper: Boolean = true
  override def haplotypeCaller: Boolean = true
  override def freebayes: Boolean = true
  override def varscanCnsSinglesample: Boolean = true
Peter van 't Hof's avatar
Peter van 't Hof committed
235
236
}
class ShivaVariantcallingRawTest extends ShivaVariantcallingTestTrait {
Peter van 't Hof's avatar
Peter van 't Hof committed
237
  override def raw: Boolean = true
Peter van 't Hof's avatar
Peter van 't Hof committed
238
}
pjvan_thof's avatar
pjvan_thof committed
239
240
241
242
243
244
245
246
247
248
249
250
251
class ShivaVariantcallingMuTect2Test extends ShivaVariantcallingTestTrait {
  override def mutect2: Boolean = true
  override def haplotypeCaller: Boolean = true
  override def tumorNormalPairs: List[TumorNormalPair] =
    TumorNormalPair("sample_1", "sample_2") :: Nil
}
class ShivaVariantcallingMuTect2ContestTest extends ShivaVariantcallingTestTrait {
  override def mutect2: Boolean = true
  override def haplotypeCaller: Boolean = true
  override def runContest = Some(true)
  override def tumorNormalPairs: List[TumorNormalPair] =
    TumorNormalPair("sample_1", "sample_2") :: Nil
}
Peter van 't Hof's avatar
Peter van 't Hof committed
252
class ShivaVariantcallingBcftoolsTest extends ShivaVariantcallingTestTrait {
Peter van 't Hof's avatar
Peter van 't Hof committed
253
  override def bcftools: Boolean = true
Peter van 't Hof's avatar
Peter van 't Hof committed
254
255
}
class ShivaVariantcallingBcftoolsSinglesampleTest extends ShivaVariantcallingTestTrait {
Peter van 't Hof's avatar
Peter van 't Hof committed
256
  override def bcftools_singlesample: Boolean = true
Peter van 't Hof's avatar
Peter van 't Hof committed
257
258
}
class ShivaVariantcallingHaplotypeCallerGvcfTest extends ShivaVariantcallingTestTrait {
Peter van 't Hof's avatar
Peter van 't Hof committed
259
  override def haplotypeCallerGvcf: Boolean = true
Peter van 't Hof's avatar
Peter van 't Hof committed
260
261
}
class ShivaVariantcallingHaplotypeCallerAlleleTest extends ShivaVariantcallingTestTrait {
Peter van 't Hof's avatar
Peter van 't Hof committed
262
  override def haplotypeCallerAllele: Boolean = true
Peter van 't Hof's avatar
Peter van 't Hof committed
263
264
}
class ShivaVariantcallingUnifiedGenotyperAlleleTest extends ShivaVariantcallingTestTrait {
Peter van 't Hof's avatar
Peter van 't Hof committed
265
  override def unifiedGenotyperAllele: Boolean = true
Peter van 't Hof's avatar
Peter van 't Hof committed
266
267
}
class ShivaVariantcallingUnifiedGenotyperTest extends ShivaVariantcallingTestTrait {
Peter van 't Hof's avatar
Peter van 't Hof committed
268
  override def unifiedGenotyper: Boolean = true
Peter van 't Hof's avatar
Peter van 't Hof committed
269
270
}
class ShivaVariantcallingHaplotypeCallerTest extends ShivaVariantcallingTestTrait {
Peter van 't Hof's avatar
Peter van 't Hof committed
271
  override def haplotypeCaller: Boolean = true
Peter van 't Hof's avatar
Peter van 't Hof committed
272
273
}
class ShivaVariantcallingFreebayesTest extends ShivaVariantcallingTestTrait {
Peter van 't Hof's avatar
Peter van 't Hof committed
274
  override def freebayes: Boolean = true
Peter van 't Hof's avatar
Peter van 't Hof committed
275
276
}
class ShivaVariantcallingVarscanCnsSinglesampleTest extends ShivaVariantcallingTestTrait {
Peter van 't Hof's avatar
Peter van 't Hof committed
277
  override def varscanCnsSinglesample: Boolean = true
Peter van 't Hof's avatar
Peter van 't Hof committed
278
}
279
280
281
282
283
284
285
286
287
288
289
290
291
class ShivaVariantcallingNormalizeTest extends ShivaVariantcallingTestTrait {
  override def unifiedGenotyper: Boolean = true
  override def normalize = true
}
class ShivaVariantcallingDecomposeTest extends ShivaVariantcallingTestTrait {
  override def unifiedGenotyper: Boolean = true
  override def decompose = true
}
class ShivaVariantcallingNormalizeDecomposeTest extends ShivaVariantcallingTestTrait {
  override def unifiedGenotyper: Boolean = true
  override def normalize = true
  override def decompose = true
}
Peter van 't Hof's avatar
Peter van 't Hof committed
292
class ShivaVariantcallingReferenceVcfTest extends ShivaVariantcallingTestTrait {
Peter van 't Hof's avatar
Peter van 't Hof committed
293
294
295
  override def unifiedGenotyper: Boolean = true
  override def referenceVcf = Some(new File("ref.vcf"))
}
Peter van 't Hof's avatar
Peter van 't Hof committed
296
297
298
299
class ShivaVariantcallingAmpliconTest extends ShivaVariantcallingTestTrait {
  override def unifiedGenotyper: Boolean = true
  override def ampliconBedFile = Some(new File("amplicon.bed"))
}
Peter van 't Hof's avatar
Peter van 't Hof committed
300

Peter van 't Hof's avatar
Peter van 't Hof committed
301
object ShivaVariantcallingTest {
pjvan_thof's avatar
pjvan_thof committed
302
303
  def outputDir: File = Files.createTempDir()
  val inputDir: File = Files.createTempDir()
Peter van 't Hof's avatar
Peter van 't Hof committed
304

305
  def inputTouch(name: String): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
306
    val file = new File(inputDir, name).getAbsoluteFile
Peter van 't Hof's avatar
Peter van 't Hof committed
307
308
309
    Files.touch(file)
    file
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
310

Peter van 't Hof's avatar
Peter van 't Hof committed
311
312
  private def copyFile(name: String): Unit = {
    val is = getClass.getResourceAsStream("/" + name)
Peter van 't Hof's avatar
Peter van 't Hof committed
313
    val os = new FileOutputStream(new File(inputDir, name))
Peter van 't Hof's avatar
Peter van 't Hof committed
314
315
316
317
318
319
320
321
    org.apache.commons.io.IOUtils.copy(is, os)
    os.close()
  }

  copyFile("ref.fa")
  copyFile("ref.dict")
  copyFile("ref.fa.fai")

Peter van 't Hof's avatar
Peter van 't Hof committed
322
  def config(outputDir: File) = Map(
323
    "skip_write_dependencies" -> true,
Peter van 't Hof's avatar
Peter van 't Hof committed
324
325
    "name_prefix" -> "test",
    "output_dir" -> outputDir,
Peter van 't Hof's avatar
Peter van 't Hof committed
326
327
328
    "cache" -> true,
    "dir" -> "test",
    "vep_script" -> "test",
Peter van 't Hof's avatar
Peter van 't Hof committed
329
    "reference_fasta" -> (inputDir + File.separator + "ref.fa"),
Peter van 't Hof's avatar
Peter van 't Hof committed
330
331
    "gatk_jar" -> "test",
    "samtools" -> Map("exe" -> "test"),
Peter van 't Hof's avatar
Peter van 't Hof committed
332
    "bcftools" -> Map("exe" -> "test"),
333
334
    "md5sum" -> Map("exe" -> "test"),
    "bgzip" -> Map("exe" -> "test"),
335
    "tabix" -> Map("exe" -> "test"),
Peter van 't Hof's avatar
Peter van 't Hof committed
336
    "input_alleles" -> "test.vcf.gz",
337
    "varscan_jar" -> "test",
pjvan_thof's avatar
pjvan_thof committed
338
339
    "vt" -> Map("exe" -> "test"),
    "popfile" -> "test"
Peter van 't Hof's avatar
Peter van 't Hof committed
340
  )
341
}