BastyTest.scala 8.42 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
15 16
package nl.lumc.sasc.biopet.pipelines.basty

Peter van 't Hof's avatar
Peter van 't Hof committed
17
import java.io.{ File, FileOutputStream }
Peter van 't Hof's avatar
Peter van 't Hof committed
18 19

import com.google.common.io.Files
Peter van 't Hof's avatar
Peter van 't Hof committed
20 21
import nl.lumc.sasc.biopet.extensions.{ Raxml, RunGubbins }
import nl.lumc.sasc.biopet.extensions.gatk.{ BaseRecalibrator, IndelRealigner, PrintReads, RealignerTargetCreator }
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.extensions.picard.MarkDuplicates
Peter van 't Hof's avatar
Peter van 't Hof committed
23 24
import nl.lumc.sasc.biopet.extensions.tools.{ BastyGenerateFasta, VcfStats }
import nl.lumc.sasc.biopet.utils.{ ConfigUtils, Logging }
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import nl.lumc.sasc.biopet.utils.config.Config
Peter van 't Hof's avatar
Peter van 't Hof committed
26
import org.apache.commons.io.FileUtils
Peter van 't Hof's avatar
Peter van 't Hof committed
27 28 29
import org.broadinstitute.gatk.queue.QSettings
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
Peter van 't Hof's avatar
Peter van 't Hof committed
30
import org.testng.annotations.{ AfterClass, DataProvider, Test }
Peter van 't Hof's avatar
Peter van 't Hof committed
31 32

/**
Peter van 't Hof's avatar
Peter van 't Hof committed
33 34
 * Created by pjvanthof on 27/09/16.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
35 36 37 38
class BastyTest extends TestNGSuite with Matchers {
  def initPipeline(map: Map[String, Any]): Basty = {
    new Basty() {
      override def configNamespace = "shiva"
Peter van 't Hof's avatar
Peter van 't Hof committed
39
      override def globalConfig = new Config(map)
Peter van 't Hof's avatar
Peter van 't Hof committed
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
      qSettings = new QSettings
      qSettings.runName = "test"
    }
  }

  @DataProvider(name = "bastyOptions")
  def bastyOptions = {
    for (
      s1 <- sample1; s2 <- sample2
    ) yield Array("", s1, s2)
  }

  def sample1 = Array(false, true)
  def sample2 = Array(false, true)
  def realign = true
  def baseRecalibration = true
  def multisampleCalling: Boolean = true
  def sampleCalling = false
  def libraryCalling = false
Peter van 't Hof's avatar
Peter van 't Hof committed
59
  def dbsnp = false
Peter van 't Hof's avatar
Peter van 't Hof committed
60 61 62
  def svCalling = false
  def cnvCalling = false
  def annotation = false
Peter van 't Hof's avatar
Peter van 't Hof committed
63
  def bootRuns: Option[Int] = None
Peter van 't Hof's avatar
Peter van 't Hof committed
64

Peter van 't Hof's avatar
Peter van 't Hof committed
65 66
  private var dirs: List[File] = Nil

Peter van 't Hof's avatar
Peter van 't Hof committed
67 68
  @Test(dataProvider = "bastyOptions")
  def testBasty(f: String, sample1: Boolean, sample2: Boolean): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
69 70
    val outputDir = BastyTest.outputDir
    dirs :+= outputDir
Peter van 't Hof's avatar
Peter van 't Hof committed
71
    val map = {
Peter van 't Hof's avatar
Peter van 't Hof committed
72
      var m: Map[String, Any] = BastyTest.config(outputDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
73 74
      if (sample1) m = ConfigUtils.mergeMaps(BastyTest.sample1, m)
      if (sample2) m = ConfigUtils.mergeMaps(BastyTest.sample2, m)
Peter van 't Hof's avatar
Peter van 't Hof committed
75
      if (dbsnp) m = ConfigUtils.mergeMaps(Map("dbsnp_vcf" -> "test.vcf.gz"), m)
Peter van 't Hof's avatar
Peter van 't Hof committed
76 77 78 79 80 81 82 83
      ConfigUtils.mergeMaps(Map(
        "multisample_variantcalling" -> multisampleCalling,
        "single_sample_variantcalling" -> sampleCalling,
        "library_variantcalling" -> libraryCalling,
        "use_indel_realigner" -> realign,
        "use_base_recalibration" -> baseRecalibration,
        "sv_calling" -> svCalling,
        "cnv_calling" -> cnvCalling,
Peter van 't Hof's avatar
Peter van 't Hof committed
84 85
        "annotation" -> annotation,
        "boot_runs" -> bootRuns), m)
Peter van 't Hof's avatar
Peter van 't Hof committed
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100

    }

    if (!sample1 && !sample2) { // When no samples
      intercept[IllegalArgumentException] {
        initPipeline(map).script()
      }
      Logging.errors.clear()
    } else {
      val pipeline = initPipeline(map)
      pipeline.script()

      val numberLibs = (if (sample1) 1 else 0) + (if (sample2) 2 else 0)
      val numberSamples = (if (sample1) 1 else 0) + (if (sample2) 1 else 0)

Peter van 't Hof's avatar
Peter van 't Hof committed
101
      pipeline.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (numberLibs + numberSamples)
Peter van 't Hof's avatar
Peter van 't Hof committed
102 103 104 105 106 107 108

      // Gatk preprocess
      pipeline.functions.count(_.isInstanceOf[IndelRealigner]) shouldBe (numberLibs * (if (realign) 1 else 0) + (if (sample2 && realign) 1 else 0))
      pipeline.functions.count(_.isInstanceOf[RealignerTargetCreator]) shouldBe (numberLibs * (if (realign) 1 else 0) + (if (sample2 && realign) 1 else 0))
      pipeline.functions.count(_.isInstanceOf[BaseRecalibrator]) shouldBe (if (dbsnp && baseRecalibration) (numberLibs * 2) else 0)
      pipeline.functions.count(_.isInstanceOf[PrintReads]) shouldBe (if (dbsnp && baseRecalibration) numberLibs else 0)

Peter van 't Hof's avatar
Peter van 't Hof committed
109
      pipeline.summarySettings.get("boot_runs") shouldBe Some(bootRuns.getOrElse(100))
Peter van 't Hof's avatar
Peter van 't Hof committed
110

111 112
      pipeline.summaryFiles shouldBe Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
113 114
      pipeline.samples foreach {
        case (sampleId, sample) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
115 116 117 118 119 120 121 122
          sample.summarySettings shouldBe Map()
          sample.summaryFiles.get("variants_fasta") should not be None
          sample.summaryFiles.get("consensus_fasta") should not be None
          sample.summaryFiles.get("consensus_variants_fasta") should not be None
          sample.summaryFiles.get("snps_only_variants_fasta") should not be None
          sample.summaryFiles.get("snps_only_consensus_fasta") should not be None
          sample.summaryFiles.get("snps_only_consensus_variants_fasta") should not be None
          sample.summaryStats shouldBe Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
123 124
          sample.libraries.foreach {
            case (libId, lib) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
125 126 127
              lib.summarySettings shouldBe Map()
              lib.summaryFiles shouldBe Map()
              lib.summaryStats shouldBe Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
128 129 130 131 132
          }
      }

      pipeline.functions.count(_.isInstanceOf[VcfStats]) shouldBe (
        (if (multisampleCalling) 2 else 0) +
Peter van 't Hof's avatar
Peter van 't Hof committed
133 134
        (if (sampleCalling) numberSamples * 2 else 0) +
        (if (libraryCalling) numberLibs * 2 else 0))
Peter van 't Hof's avatar
Peter van 't Hof committed
135 136 137 138

      pipeline.functions.count(_.isInstanceOf[BastyGenerateFasta]) shouldBe 2 + (2 * numberSamples)
      pipeline.functions.count(_.isInstanceOf[Raxml]) shouldBe (2 * (2 + bootRuns.getOrElse(100)))
      pipeline.functions.count(_.isInstanceOf[RunGubbins]) shouldBe 2
Peter van 't Hof's avatar
Peter van 't Hof committed
139 140
    }
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
141 142 143 144 145

  // remove temporary run directory all tests in the class have been run
  @AfterClass def removeTempOutputDir() = {
    dirs.foreach(FileUtils.deleteDirectory)
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
146 147 148
}

object BastyTest {
Peter van 't Hof's avatar
Peter van 't Hof committed
149 150 151
  def outputDir = Files.createTempDir()
  val inputDir = Files.createTempDir()

Peter van 't Hof's avatar
Peter van 't Hof committed
152
  def inputTouch(name: String): String = {
Peter van 't Hof's avatar
Peter van 't Hof committed
153
    val file = new File(inputDir, name)
Peter van 't Hof's avatar
Peter van 't Hof committed
154 155 156 157 158 159
    Files.touch(file)
    file.getAbsolutePath
  }

  private def copyFile(name: String): Unit = {
    val is = getClass.getResourceAsStream("/" + name)
Peter van 't Hof's avatar
Peter van 't Hof committed
160
    val os = new FileOutputStream(new File(inputDir, name))
Peter van 't Hof's avatar
Peter van 't Hof committed
161 162 163 164 165 166 167 168
    org.apache.commons.io.IOUtils.copy(is, os)
    os.close()
  }

  copyFile("ref.fa")
  copyFile("ref.dict")
  copyFile("ref.fa.fai")

Peter van 't Hof's avatar
Peter van 't Hof committed
169
  def config(outputDir: File) = Map(
170
    "skip_write_dependencies" -> true,
Peter van 't Hof's avatar
Peter van 't Hof committed
171 172 173 174 175
    "name_prefix" -> "test",
    "cache" -> true,
    "dir" -> "test",
    "vep_script" -> "test",
    "output_dir" -> outputDir,
Peter van 't Hof's avatar
Peter van 't Hof committed
176
    "reference_fasta" -> (inputDir + File.separator + "ref.fa"),
Peter van 't Hof's avatar
Peter van 't Hof committed
177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236
    "gatk_jar" -> "test",
    "samtools" -> Map("exe" -> "test"),
    "bcftools" -> Map("exe" -> "test"),
    "fastqc" -> Map("exe" -> "test"),
    "input_alleles" -> "test",
    "variantcallers" -> "raw",
    "fastqc" -> Map("exe" -> "test"),
    "seqtk" -> Map("exe" -> "test"),
    "sickle" -> Map("exe" -> "test"),
    "cutadapt" -> Map("exe" -> "test"),
    "bwa" -> Map("exe" -> "test"),
    "samtools" -> Map("exe" -> "test"),
    "macs2" -> Map("exe" -> "test"),
    "igvtools" -> Map("exe" -> "test", "igvtools_jar" -> "test"),
    "wigtobigwig" -> Map("exe" -> "test"),
    "md5sum" -> Map("exe" -> "test"),
    "bgzip" -> Map("exe" -> "test"),
    "tabix" -> Map("exe" -> "test"),
    "breakdancerconfig" -> Map("exe" -> "test"),
    "breakdancercaller" -> Map("exe" -> "test"),
    "pindelconfig" -> Map("exe" -> "test"),
    "pindelcaller" -> Map("exe" -> "test"),
    "pindelvcf" -> Map("exe" -> "test"),
    "clever" -> Map("exe" -> "test"),
    "delly" -> Map("exe" -> "test"),
    "rungubbins" -> Map("exe" -> "test"),
    "raxml" -> Map("exe" -> "test"),
    "pysvtools" -> Map(
      "exe" -> "test",
      "exclusion_regions" -> "test",
      "translocations_only" -> false),
    "freec" -> Map(
      "exe" -> "test",
      "chrFiles" -> "test",
      "chrLenFile" -> "test"
    )
  )

  val sample1 = Map(
    "samples" -> Map("sample1" -> Map("libraries" -> Map(
      "lib1" -> Map(
        "R1" -> inputTouch("1_1_R1.fq"),
        "R2" -> inputTouch("1_1_R2.fq")
      )
    )
    )))

  val sample2 = Map(
    "samples" -> Map("sample3" -> Map("libraries" -> Map(
      "lib1" -> Map(
        "R1" -> inputTouch("2_1_R1.fq"),
        "R2" -> inputTouch("2_1_R2.fq")
      ),
      "lib2" -> Map(
        "R1" -> inputTouch("2_2_R1.fq"),
        "R2" -> inputTouch("2_2_R2.fq")
      )
    )
    )))
}