BastyTest.scala 9.09 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
/**
2 3 4 5 6 7 8 9 10 11 12 13 14
  * Biopet is built on top of GATK Queue for building bioinformatic
  * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
  * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
  * should also be able to execute Biopet tools and pipelines.
  *
  * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
  *
  * Contact us at: sasc@lumc.nl
  *
  * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
  * license; For commercial users or users who do not want to follow the AGPL
  * license, please contact us to obtain a separate license.
  */
Peter van 't Hof's avatar
Peter van 't Hof committed
15 16
package nl.lumc.sasc.biopet.pipelines.basty

17
import java.io.{File, FileOutputStream}
Peter van 't Hof's avatar
Peter van 't Hof committed
18 19

import com.google.common.io.Files
20 21 22 23 24 25 26
import nl.lumc.sasc.biopet.extensions.{Raxml, RunGubbins}
import nl.lumc.sasc.biopet.extensions.gatk.{
  BaseRecalibrator,
  IndelRealigner,
  PrintReads,
  RealignerTargetCreator
}
Peter van 't Hof's avatar
Peter van 't Hof committed
27
import nl.lumc.sasc.biopet.extensions.picard.MarkDuplicates
28 29
import nl.lumc.sasc.biopet.extensions.tools.{BastyGenerateFasta, VcfStats}
import nl.lumc.sasc.biopet.utils.{ConfigUtils, Logging}
Peter van 't Hof's avatar
Peter van 't Hof committed
30
import nl.lumc.sasc.biopet.utils.config.Config
31
import org.apache.commons.io.FileUtils
Peter van 't Hof's avatar
Peter van 't Hof committed
32 33 34
import org.broadinstitute.gatk.queue.QSettings
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
35
import org.testng.annotations.{AfterClass, DataProvider, Test}
Peter van 't Hof's avatar
Peter van 't Hof committed
36 37

/**
38 39
  * Created by pjvanthof on 27/09/16.
  */
Peter van 't Hof's avatar
Peter van 't Hof committed
40 41 42 43
class BastyTest extends TestNGSuite with Matchers {
  def initPipeline(map: Map[String, Any]): Basty = {
    new Basty() {
      override def configNamespace = "shiva"
Peter van 't Hof's avatar
Peter van 't Hof committed
44
      override def globalConfig = new Config(map)
Peter van 't Hof's avatar
Peter van 't Hof committed
45 46 47 48 49 50
      qSettings = new QSettings
      qSettings.runName = "test"
    }
  }

  @DataProvider(name = "bastyOptions")
51
  def bastyOptions: Array[Array[Any]] = {
52
    for (s1 <- sample1; s2 <- sample2) yield Array("", s1, s2)
Peter van 't Hof's avatar
Peter van 't Hof committed
53 54 55 56 57 58 59 60 61
  }

  def sample1 = Array(false, true)
  def sample2 = Array(false, true)
  def realign = true
  def baseRecalibration = true
  def multisampleCalling: Boolean = true
  def sampleCalling = false
  def libraryCalling = false
Peter van 't Hof's avatar
Peter van 't Hof committed
62
  def dbsnp = false
Peter van 't Hof's avatar
Peter van 't Hof committed
63 64 65
  def svCalling = false
  def cnvCalling = false
  def annotation = false
Peter van 't Hof's avatar
Peter van 't Hof committed
66
  def bootRuns: Option[Int] = None
Peter van 't Hof's avatar
Peter van 't Hof committed
67

68 69
  private var dirs: List[File] = Nil

Peter van 't Hof's avatar
Peter van 't Hof committed
70 71
  @Test(dataProvider = "bastyOptions")
  def testBasty(f: String, sample1: Boolean, sample2: Boolean): Unit = {
72 73
    val outputDir = BastyTest.outputDir
    dirs :+= outputDir
Peter van 't Hof's avatar
Peter van 't Hof committed
74
    val map = {
75
      var m: Map[String, Any] = BastyTest.config(outputDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
76 77
      if (sample1) m = ConfigUtils.mergeMaps(BastyTest.sample1, m)
      if (sample2) m = ConfigUtils.mergeMaps(BastyTest.sample2, m)
Peter van 't Hof's avatar
Peter van 't Hof committed
78
      if (dbsnp) m = ConfigUtils.mergeMaps(Map("dbsnp_vcf" -> "test.vcf.gz"), m)
79 80 81 82 83 84 85 86 87 88 89 90 91 92
      ConfigUtils.mergeMaps(
        Map(
          "multisample_variantcalling" -> multisampleCalling,
          "single_sample_variantcalling" -> sampleCalling,
          "library_variantcalling" -> libraryCalling,
          "use_indel_realigner" -> realign,
          "use_base_recalibration" -> baseRecalibration,
          "sv_calling" -> svCalling,
          "cnv_calling" -> cnvCalling,
          "annotation" -> annotation,
          "boot_runs" -> bootRuns
        ),
        m
      )
Peter van 't Hof's avatar
Peter van 't Hof committed
93 94 95 96 97 98 99 100 101 102 103 104 105 106 107

    }

    if (!sample1 && !sample2) { // When no samples
      intercept[IllegalArgumentException] {
        initPipeline(map).script()
      }
      Logging.errors.clear()
    } else {
      val pipeline = initPipeline(map)
      pipeline.script()

      val numberLibs = (if (sample1) 1 else 0) + (if (sample2) 2 else 0)
      val numberSamples = (if (sample1) 1 else 0) + (if (sample2) 1 else 0)

Peter van 't Hof's avatar
Peter van 't Hof committed
108
      pipeline.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (numberLibs + numberSamples)
Peter van 't Hof's avatar
Peter van 't Hof committed
109 110

      // Gatk preprocess
111 112 113 114 115 116
      pipeline.functions.count(_.isInstanceOf[IndelRealigner]) shouldBe (if (realign) numberSamples
                                                                         else 0)
      pipeline.functions.count(_.isInstanceOf[RealignerTargetCreator]) shouldBe (if (realign)
                                                                                   numberSamples
                                                                                 else 0)
      pipeline.functions.count(_.isInstanceOf[BaseRecalibrator]) shouldBe (if (dbsnp && baseRecalibration)
117
                                                                             numberLibs * 2
118 119 120 121
                                                                           else 0)
      pipeline.functions.count(_.isInstanceOf[PrintReads]) shouldBe (if (dbsnp && baseRecalibration)
                                                                       numberLibs
                                                                     else 0)
Peter van 't Hof's avatar
Peter van 't Hof committed
122

123
      pipeline.summarySettings.get("boot_runs") shouldBe Some(bootRuns.getOrElse(100))
Peter van 't Hof's avatar
Peter van 't Hof committed
124

125 126
      pipeline.summaryFiles shouldBe Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
127
      pipeline.samples foreach {
128
        case (_, sample) =>
129 130 131 132 133 134 135 136
          sample.summarySettings shouldBe Map()
          sample.summaryFiles.get("variants_fasta") should not be None
          sample.summaryFiles.get("consensus_fasta") should not be None
          sample.summaryFiles.get("consensus_variants_fasta") should not be None
          sample.summaryFiles.get("snps_only_variants_fasta") should not be None
          sample.summaryFiles.get("snps_only_consensus_fasta") should not be None
          sample.summaryFiles.get("snps_only_consensus_variants_fasta") should not be None
          sample.summaryStats shouldBe Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
137
          sample.libraries.foreach {
138
            case (_, lib) =>
139 140 141
              lib.summarySettings shouldBe Map()
              lib.summaryFiles shouldBe Map()
              lib.summaryStats shouldBe Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
142 143 144
          }
      }

145 146
      pipeline.functions.count(_.isInstanceOf[VcfStats]) shouldBe ((if (multisampleCalling) 2
                                                                    else 0) +
Peter van 't Hof's avatar
Peter van 't Hof committed
147 148
        (if (sampleCalling) numberSamples * 2 else 0) +
        (if (libraryCalling) numberLibs * 2 else 0))
Peter van 't Hof's avatar
Peter van 't Hof committed
149 150 151 152

      pipeline.functions.count(_.isInstanceOf[BastyGenerateFasta]) shouldBe 2 + (2 * numberSamples)
      pipeline.functions.count(_.isInstanceOf[Raxml]) shouldBe (2 * (2 + bootRuns.getOrElse(100)))
      pipeline.functions.count(_.isInstanceOf[RunGubbins]) shouldBe 2
Peter van 't Hof's avatar
Peter van 't Hof committed
153 154
    }
  }
155 156

  // remove temporary run directory all tests in the class have been run
157
  @AfterClass def removeTempOutputDir(): Unit = {
158 159
    dirs.foreach(FileUtils.deleteDirectory)
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
160 161 162
}

object BastyTest {
163 164
  def outputDir: File = Files.createTempDir()
  val inputDir: File = Files.createTempDir()
165

Peter van 't Hof's avatar
Peter van 't Hof committed
166
  def inputTouch(name: String): String = {
167
    val file = new File(inputDir, name)
Peter van 't Hof's avatar
Peter van 't Hof committed
168 169 170 171 172 173
    Files.touch(file)
    file.getAbsolutePath
  }

  private def copyFile(name: String): Unit = {
    val is = getClass.getResourceAsStream("/" + name)
174
    val os = new FileOutputStream(new File(inputDir, name))
Peter van 't Hof's avatar
Peter van 't Hof committed
175 176 177 178 179 180 181 182
    org.apache.commons.io.IOUtils.copy(is, os)
    os.close()
  }

  copyFile("ref.fa")
  copyFile("ref.dict")
  copyFile("ref.fa.fai")

183
  def config(outputDir: File) = Map(
184
    "skip_write_dependencies" -> true,
Peter van 't Hof's avatar
Peter van 't Hof committed
185 186 187 188 189
    "name_prefix" -> "test",
    "cache" -> true,
    "dir" -> "test",
    "vep_script" -> "test",
    "output_dir" -> outputDir,
190
    "reference_fasta" -> (inputDir + File.separator + "ref.fa"),
Peter van 't Hof's avatar
Peter van 't Hof committed
191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
    "gatk_jar" -> "test",
    "samtools" -> Map("exe" -> "test"),
    "bcftools" -> Map("exe" -> "test"),
    "fastqc" -> Map("exe" -> "test"),
    "input_alleles" -> "test",
    "variantcallers" -> "raw",
    "fastqc" -> Map("exe" -> "test"),
    "seqtk" -> Map("exe" -> "test"),
    "sickle" -> Map("exe" -> "test"),
    "cutadapt" -> Map("exe" -> "test"),
    "bwa" -> Map("exe" -> "test"),
    "samtools" -> Map("exe" -> "test"),
    "macs2" -> Map("exe" -> "test"),
    "igvtools" -> Map("exe" -> "test", "igvtools_jar" -> "test"),
    "wigtobigwig" -> Map("exe" -> "test"),
    "md5sum" -> Map("exe" -> "test"),
    "bgzip" -> Map("exe" -> "test"),
    "tabix" -> Map("exe" -> "test"),
    "breakdancerconfig" -> Map("exe" -> "test"),
    "breakdancercaller" -> Map("exe" -> "test"),
    "pindelconfig" -> Map("exe" -> "test"),
    "pindelcaller" -> Map("exe" -> "test"),
    "pindelvcf" -> Map("exe" -> "test"),
    "clever" -> Map("exe" -> "test"),
    "delly" -> Map("exe" -> "test"),
    "rungubbins" -> Map("exe" -> "test"),
    "raxml" -> Map("exe" -> "test"),
218 219 220
    "pysvtools" -> Map("exe" -> "test",
                       "exclusion_regions" -> "test",
                       "translocations_only" -> false),
Peter van 't Hof's avatar
Peter van 't Hof committed
221 222 223 224 225 226 227 228
    "freec" -> Map(
      "exe" -> "test",
      "chrFiles" -> "test",
      "chrLenFile" -> "test"
    )
  )

  val sample1 = Map(
229 230 231 232 233 234 235 236
    "samples" -> Map(
      "sample1" -> Map(
        "libraries" -> Map(
          "lib1" -> Map(
            "R1" -> inputTouch("1_1_R1.fq"),
            "R2" -> inputTouch("1_1_R2.fq")
          )
        ))))
Peter van 't Hof's avatar
Peter van 't Hof committed
237 238

  val sample2 = Map(
239 240 241 242 243 244 245 246 247 248 249 250 251
    "samples" -> Map(
      "sample3" -> Map(
        "libraries" -> Map(
          "lib1" -> Map(
            "R1" -> inputTouch("2_1_R1.fq"),
            "R2" -> inputTouch("2_1_R2.fq")
          ),
          "lib2" -> Map(
            "R1" -> inputTouch("2_2_R1.fq"),
            "R2" -> inputTouch("2_2_R2.fq")
          )
        ))))
}