BastyTest.scala 8.42 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15 16
package nl.lumc.sasc.biopet.pipelines.basty

Peter van 't Hof's avatar
Peter van 't Hof committed
17
import java.io.{ File, FileOutputStream }
18 19

import com.google.common.io.Files
Peter van 't Hof's avatar
Peter van 't Hof committed
20 21
import nl.lumc.sasc.biopet.extensions.{ Raxml, RunGubbins }
import nl.lumc.sasc.biopet.extensions.gatk.{ BaseRecalibrator, IndelRealigner, PrintReads, RealignerTargetCreator }
22
import nl.lumc.sasc.biopet.extensions.picard.MarkDuplicates
Peter van 't Hof's avatar
Peter van 't Hof committed
23 24
import nl.lumc.sasc.biopet.extensions.tools.{ BastyGenerateFasta, VcfStats }
import nl.lumc.sasc.biopet.utils.{ ConfigUtils, Logging }
25
import nl.lumc.sasc.biopet.utils.config.Config
26
import org.apache.commons.io.FileUtils
27 28 29
import org.broadinstitute.gatk.queue.QSettings
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
Peter van 't Hof's avatar
Peter van 't Hof committed
30
import org.testng.annotations.{ AfterClass, DataProvider, Test }
31 32

/**
Peter van 't Hof's avatar
Peter van 't Hof committed
33 34
 * Created by pjvanthof on 27/09/16.
 */
35 36 37 38
class BastyTest extends TestNGSuite with Matchers {
  def initPipeline(map: Map[String, Any]): Basty = {
    new Basty() {
      override def configNamespace = "shiva"
39
      override def globalConfig = new Config(map)
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
      qSettings = new QSettings
      qSettings.runName = "test"
    }
  }

  @DataProvider(name = "bastyOptions")
  def bastyOptions = {
    for (
      s1 <- sample1; s2 <- sample2
    ) yield Array("", s1, s2)
  }

  def sample1 = Array(false, true)
  def sample2 = Array(false, true)
  def realign = true
  def baseRecalibration = true
  def multisampleCalling: Boolean = true
  def sampleCalling = false
  def libraryCalling = false
59
  def dbsnp = false
60 61 62
  def svCalling = false
  def cnvCalling = false
  def annotation = false
63
  def bootRuns: Option[Int] = None
64

65 66
  private var dirs: List[File] = Nil

67 68
  @Test(dataProvider = "bastyOptions")
  def testBasty(f: String, sample1: Boolean, sample2: Boolean): Unit = {
69 70
    val outputDir = BastyTest.outputDir
    dirs :+= outputDir
71
    val map = {
72
      var m: Map[String, Any] = BastyTest.config(outputDir)
73 74
      if (sample1) m = ConfigUtils.mergeMaps(BastyTest.sample1, m)
      if (sample2) m = ConfigUtils.mergeMaps(BastyTest.sample2, m)
Peter van 't Hof's avatar
Peter van 't Hof committed
75
      if (dbsnp) m = ConfigUtils.mergeMaps(Map("dbsnp_vcf" -> "test.vcf.gz"), m)
76 77 78 79 80 81 82 83
      ConfigUtils.mergeMaps(Map(
        "multisample_variantcalling" -> multisampleCalling,
        "single_sample_variantcalling" -> sampleCalling,
        "library_variantcalling" -> libraryCalling,
        "use_indel_realigner" -> realign,
        "use_base_recalibration" -> baseRecalibration,
        "sv_calling" -> svCalling,
        "cnv_calling" -> cnvCalling,
84 85
        "annotation" -> annotation,
        "boot_runs" -> bootRuns), m)
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100

    }

    if (!sample1 && !sample2) { // When no samples
      intercept[IllegalArgumentException] {
        initPipeline(map).script()
      }
      Logging.errors.clear()
    } else {
      val pipeline = initPipeline(map)
      pipeline.script()

      val numberLibs = (if (sample1) 1 else 0) + (if (sample2) 2 else 0)
      val numberSamples = (if (sample1) 1 else 0) + (if (sample2) 1 else 0)

Peter van 't Hof's avatar
Peter van 't Hof committed
101
      pipeline.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (numberLibs + numberSamples)
102 103 104 105 106 107 108

      // Gatk preprocess
      pipeline.functions.count(_.isInstanceOf[IndelRealigner]) shouldBe (numberLibs * (if (realign) 1 else 0) + (if (sample2 && realign) 1 else 0))
      pipeline.functions.count(_.isInstanceOf[RealignerTargetCreator]) shouldBe (numberLibs * (if (realign) 1 else 0) + (if (sample2 && realign) 1 else 0))
      pipeline.functions.count(_.isInstanceOf[BaseRecalibrator]) shouldBe (if (dbsnp && baseRecalibration) (numberLibs * 2) else 0)
      pipeline.functions.count(_.isInstanceOf[PrintReads]) shouldBe (if (dbsnp && baseRecalibration) numberLibs else 0)

109
      pipeline.summarySettings.get("boot_runs") shouldBe Some(bootRuns.getOrElse(100))
110

111 112
      pipeline.summaryFiles shouldBe Map()

113 114
      pipeline.samples foreach {
        case (sampleId, sample) =>
115 116 117 118 119 120 121 122
          sample.summarySettings shouldBe Map()
          sample.summaryFiles.get("variants_fasta") should not be None
          sample.summaryFiles.get("consensus_fasta") should not be None
          sample.summaryFiles.get("consensus_variants_fasta") should not be None
          sample.summaryFiles.get("snps_only_variants_fasta") should not be None
          sample.summaryFiles.get("snps_only_consensus_fasta") should not be None
          sample.summaryFiles.get("snps_only_consensus_variants_fasta") should not be None
          sample.summaryStats shouldBe Map()
123 124
          sample.libraries.foreach {
            case (libId, lib) =>
125 126 127
              lib.summarySettings shouldBe Map()
              lib.summaryFiles shouldBe Map()
              lib.summaryStats shouldBe Map()
128 129 130 131 132
          }
      }

      pipeline.functions.count(_.isInstanceOf[VcfStats]) shouldBe (
        (if (multisampleCalling) 2 else 0) +
Peter van 't Hof's avatar
Peter van 't Hof committed
133 134
        (if (sampleCalling) numberSamples * 2 else 0) +
        (if (libraryCalling) numberLibs * 2 else 0))
135 136 137 138

      pipeline.functions.count(_.isInstanceOf[BastyGenerateFasta]) shouldBe 2 + (2 * numberSamples)
      pipeline.functions.count(_.isInstanceOf[Raxml]) shouldBe (2 * (2 + bootRuns.getOrElse(100)))
      pipeline.functions.count(_.isInstanceOf[RunGubbins]) shouldBe 2
139 140
    }
  }
141 142 143 144 145

  // remove temporary run directory all tests in the class have been run
  @AfterClass def removeTempOutputDir() = {
    dirs.foreach(FileUtils.deleteDirectory)
  }
146 147 148
}

object BastyTest {
149 150 151
  def outputDir = Files.createTempDir()
  val inputDir = Files.createTempDir()

152
  def inputTouch(name: String): String = {
153
    val file = new File(inputDir, name)
154 155 156 157 158 159
    Files.touch(file)
    file.getAbsolutePath
  }

  private def copyFile(name: String): Unit = {
    val is = getClass.getResourceAsStream("/" + name)
160
    val os = new FileOutputStream(new File(inputDir, name))
161 162 163 164 165 166 167 168
    org.apache.commons.io.IOUtils.copy(is, os)
    os.close()
  }

  copyFile("ref.fa")
  copyFile("ref.dict")
  copyFile("ref.fa.fai")

169
  def config(outputDir: File) = Map(
170
    "skip_write_dependencies" -> true,
171 172 173 174 175
    "name_prefix" -> "test",
    "cache" -> true,
    "dir" -> "test",
    "vep_script" -> "test",
    "output_dir" -> outputDir,
176
    "reference_fasta" -> (inputDir + File.separator + "ref.fa"),
177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236
    "gatk_jar" -> "test",
    "samtools" -> Map("exe" -> "test"),
    "bcftools" -> Map("exe" -> "test"),
    "fastqc" -> Map("exe" -> "test"),
    "input_alleles" -> "test",
    "variantcallers" -> "raw",
    "fastqc" -> Map("exe" -> "test"),
    "seqtk" -> Map("exe" -> "test"),
    "sickle" -> Map("exe" -> "test"),
    "cutadapt" -> Map("exe" -> "test"),
    "bwa" -> Map("exe" -> "test"),
    "samtools" -> Map("exe" -> "test"),
    "macs2" -> Map("exe" -> "test"),
    "igvtools" -> Map("exe" -> "test", "igvtools_jar" -> "test"),
    "wigtobigwig" -> Map("exe" -> "test"),
    "md5sum" -> Map("exe" -> "test"),
    "bgzip" -> Map("exe" -> "test"),
    "tabix" -> Map("exe" -> "test"),
    "breakdancerconfig" -> Map("exe" -> "test"),
    "breakdancercaller" -> Map("exe" -> "test"),
    "pindelconfig" -> Map("exe" -> "test"),
    "pindelcaller" -> Map("exe" -> "test"),
    "pindelvcf" -> Map("exe" -> "test"),
    "clever" -> Map("exe" -> "test"),
    "delly" -> Map("exe" -> "test"),
    "rungubbins" -> Map("exe" -> "test"),
    "raxml" -> Map("exe" -> "test"),
    "pysvtools" -> Map(
      "exe" -> "test",
      "exclusion_regions" -> "test",
      "translocations_only" -> false),
    "freec" -> Map(
      "exe" -> "test",
      "chrFiles" -> "test",
      "chrLenFile" -> "test"
    )
  )

  val sample1 = Map(
    "samples" -> Map("sample1" -> Map("libraries" -> Map(
      "lib1" -> Map(
        "R1" -> inputTouch("1_1_R1.fq"),
        "R2" -> inputTouch("1_1_R2.fq")
      )
    )
    )))

  val sample2 = Map(
    "samples" -> Map("sample3" -> Map("libraries" -> Map(
      "lib1" -> Map(
        "R1" -> inputTouch("2_1_R1.fq"),
        "R2" -> inputTouch("2_1_R2.fq")
      ),
      "lib2" -> Map(
        "R1" -> inputTouch("2_2_R1.fq"),
        "R2" -> inputTouch("2_2_R2.fq")
      )
    )
    )))
}