BastyTest.scala 8.27 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
15 16
package nl.lumc.sasc.biopet.pipelines.basty

Peter van 't Hof's avatar
Peter van 't Hof committed
17
import java.io.{ File, FileOutputStream }
Peter van 't Hof's avatar
Peter van 't Hof committed
18 19

import com.google.common.io.Files
Peter van 't Hof's avatar
Peter van 't Hof committed
20 21
import nl.lumc.sasc.biopet.extensions.{ Raxml, RunGubbins }
import nl.lumc.sasc.biopet.extensions.gatk.{ BaseRecalibrator, IndelRealigner, PrintReads, RealignerTargetCreator }
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.extensions.picard.MarkDuplicates
Peter van 't Hof's avatar
Peter van 't Hof committed
23 24
import nl.lumc.sasc.biopet.extensions.tools.{ BastyGenerateFasta, VcfStats }
import nl.lumc.sasc.biopet.utils.{ ConfigUtils, Logging }
Peter van 't Hof's avatar
Peter van 't Hof committed
25 26 27 28
import nl.lumc.sasc.biopet.utils.config.Config
import org.broadinstitute.gatk.queue.QSettings
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
Peter van 't Hof's avatar
Peter van 't Hof committed
29
import org.testng.annotations.{ DataProvider, Test }
Peter van 't Hof's avatar
Peter van 't Hof committed
30 31

/**
Peter van 't Hof's avatar
Peter van 't Hof committed
32 33
 * Created by pjvanthof on 27/09/16.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
class BastyTest extends TestNGSuite with Matchers {
  def initPipeline(map: Map[String, Any]): Basty = {
    new Basty() {
      override def configNamespace = "shiva"
      override def globalConfig = new Config(ConfigUtils.mergeMaps(map, BastyTest.config))
      qSettings = new QSettings
      qSettings.runName = "test"
    }
  }

  @DataProvider(name = "bastyOptions")
  def bastyOptions = {
    for (
      s1 <- sample1; s2 <- sample2
    ) yield Array("", s1, s2)
  }

  def sample1 = Array(false, true)
  def sample2 = Array(false, true)
  def realign = true
  def baseRecalibration = true
  def multisampleCalling: Boolean = true
  def sampleCalling = false
  def libraryCalling = false
Peter van 't Hof's avatar
Peter van 't Hof committed
58
  def dbsnp = false
Peter van 't Hof's avatar
Peter van 't Hof committed
59 60 61
  def svCalling = false
  def cnvCalling = false
  def annotation = false
Peter van 't Hof's avatar
Peter van 't Hof committed
62
  def bootRuns: Option[Int] = None
Peter van 't Hof's avatar
Peter van 't Hof committed
63 64 65 66 67 68 69

  @Test(dataProvider = "bastyOptions")
  def testBasty(f: String, sample1: Boolean, sample2: Boolean): Unit = {
    val map = {
      var m: Map[String, Any] = BastyTest.config
      if (sample1) m = ConfigUtils.mergeMaps(BastyTest.sample1, m)
      if (sample2) m = ConfigUtils.mergeMaps(BastyTest.sample2, m)
Peter van 't Hof's avatar
Peter van 't Hof committed
70
      if (dbsnp) m = ConfigUtils.mergeMaps(Map("dbsnp_vcf" -> "test.vcf.gz"), m)
Peter van 't Hof's avatar
Peter van 't Hof committed
71 72 73 74 75 76 77 78
      ConfigUtils.mergeMaps(Map(
        "multisample_variantcalling" -> multisampleCalling,
        "single_sample_variantcalling" -> sampleCalling,
        "library_variantcalling" -> libraryCalling,
        "use_indel_realigner" -> realign,
        "use_base_recalibration" -> baseRecalibration,
        "sv_calling" -> svCalling,
        "cnv_calling" -> cnvCalling,
Peter van 't Hof's avatar
Peter van 't Hof committed
79 80
        "annotation" -> annotation,
        "boot_runs" -> bootRuns), m)
Peter van 't Hof's avatar
Peter van 't Hof committed
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103

    }

    if (!sample1 && !sample2) { // When no samples
      intercept[IllegalArgumentException] {
        initPipeline(map).script()
      }
      Logging.errors.clear()
    } else {
      val pipeline = initPipeline(map)
      pipeline.script()

      val numberLibs = (if (sample1) 1 else 0) + (if (sample2) 2 else 0)
      val numberSamples = (if (sample1) 1 else 0) + (if (sample2) 1 else 0)

      pipeline.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (numberLibs + (if (sample2) 1 else 0))

      // Gatk preprocess
      pipeline.functions.count(_.isInstanceOf[IndelRealigner]) shouldBe (numberLibs * (if (realign) 1 else 0) + (if (sample2 && realign) 1 else 0))
      pipeline.functions.count(_.isInstanceOf[RealignerTargetCreator]) shouldBe (numberLibs * (if (realign) 1 else 0) + (if (sample2 && realign) 1 else 0))
      pipeline.functions.count(_.isInstanceOf[BaseRecalibrator]) shouldBe (if (dbsnp && baseRecalibration) (numberLibs * 2) else 0)
      pipeline.functions.count(_.isInstanceOf[PrintReads]) shouldBe (if (dbsnp && baseRecalibration) numberLibs else 0)

104
      pipeline.summarySettings.get("boot_runs") shouldBe Some(bootRuns.getOrElse(100))
Peter van 't Hof's avatar
Peter van 't Hof committed
105

106 107 108
      pipeline.summaryFile shouldBe new File(BastyTest.outputDir, "Basty.summary.json")
      pipeline.summaryFiles shouldBe Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
109 110
      pipeline.samples foreach {
        case (sampleId, sample) =>
111 112 113 114 115 116 117 118
          sample.summarySettings shouldBe Map()
          sample.summaryFiles.get("variants_fasta") should not be None
          sample.summaryFiles.get("consensus_fasta") should not be None
          sample.summaryFiles.get("consensus_variants_fasta") should not be None
          sample.summaryFiles.get("snps_only_variants_fasta") should not be None
          sample.summaryFiles.get("snps_only_consensus_fasta") should not be None
          sample.summaryFiles.get("snps_only_consensus_variants_fasta") should not be None
          sample.summaryStats shouldBe Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
119 120
          sample.libraries.foreach {
            case (libId, lib) =>
121 122 123
              lib.summarySettings shouldBe Map()
              lib.summaryFiles shouldBe Map()
              lib.summaryStats shouldBe Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
124 125 126 127 128
          }
      }

      pipeline.functions.count(_.isInstanceOf[VcfStats]) shouldBe (
        (if (multisampleCalling) 2 else 0) +
Peter van 't Hof's avatar
Peter van 't Hof committed
129 130
        (if (sampleCalling) numberSamples * 2 else 0) +
        (if (libraryCalling) numberLibs * 2 else 0))
Peter van 't Hof's avatar
Peter van 't Hof committed
131 132 133 134

      pipeline.functions.count(_.isInstanceOf[BastyGenerateFasta]) shouldBe 2 + (2 * numberSamples)
      pipeline.functions.count(_.isInstanceOf[Raxml]) shouldBe (2 * (2 + bootRuns.getOrElse(100)))
      pipeline.functions.count(_.isInstanceOf[RunGubbins]) shouldBe 2
Peter van 't Hof's avatar
Peter van 't Hof committed
135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
    }
  }
}

object BastyTest {
  val outputDir = Files.createTempDir()
  outputDir.deleteOnExit()
  new File(outputDir, "input").mkdirs()
  def inputTouch(name: String): String = {
    val file = new File(outputDir, "input" + File.separator + name)
    Files.touch(file)
    file.getAbsolutePath
  }

  private def copyFile(name: String): Unit = {
    val is = getClass.getResourceAsStream("/" + name)
    val os = new FileOutputStream(new File(outputDir, name))
    org.apache.commons.io.IOUtils.copy(is, os)
    os.close()
  }

  copyFile("ref.fa")
  copyFile("ref.dict")
  copyFile("ref.fa.fai")

  val config = Map(
161
    "skip_write_dependencies" -> true,
Peter van 't Hof's avatar
Peter van 't Hof committed
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
    "name_prefix" -> "test",
    "cache" -> true,
    "dir" -> "test",
    "vep_script" -> "test",
    "output_dir" -> outputDir,
    "reference_fasta" -> (outputDir + File.separator + "ref.fa"),
    "gatk_jar" -> "test",
    "samtools" -> Map("exe" -> "test"),
    "bcftools" -> Map("exe" -> "test"),
    "fastqc" -> Map("exe" -> "test"),
    "input_alleles" -> "test",
    "variantcallers" -> "raw",
    "fastqc" -> Map("exe" -> "test"),
    "seqtk" -> Map("exe" -> "test"),
    "sickle" -> Map("exe" -> "test"),
    "cutadapt" -> Map("exe" -> "test"),
    "bwa" -> Map("exe" -> "test"),
    "samtools" -> Map("exe" -> "test"),
    "macs2" -> Map("exe" -> "test"),
    "igvtools" -> Map("exe" -> "test", "igvtools_jar" -> "test"),
    "wigtobigwig" -> Map("exe" -> "test"),
    "md5sum" -> Map("exe" -> "test"),
    "bgzip" -> Map("exe" -> "test"),
    "tabix" -> Map("exe" -> "test"),
    "breakdancerconfig" -> Map("exe" -> "test"),
    "breakdancercaller" -> Map("exe" -> "test"),
    "pindelconfig" -> Map("exe" -> "test"),
    "pindelcaller" -> Map("exe" -> "test"),
    "pindelvcf" -> Map("exe" -> "test"),
    "clever" -> Map("exe" -> "test"),
    "delly" -> Map("exe" -> "test"),
    "rungubbins" -> Map("exe" -> "test"),
    "raxml" -> Map("exe" -> "test"),
    "pysvtools" -> Map(
      "exe" -> "test",
      "exclusion_regions" -> "test",
      "translocations_only" -> false),
    "freec" -> Map(
      "exe" -> "test",
      "chrFiles" -> "test",
      "chrLenFile" -> "test"
    )
  )

  val sample1 = Map(
    "samples" -> Map("sample1" -> Map("libraries" -> Map(
      "lib1" -> Map(
        "R1" -> inputTouch("1_1_R1.fq"),
        "R2" -> inputTouch("1_1_R2.fq")
      )
    )
    )))

  val sample2 = Map(
    "samples" -> Map("sample3" -> Map("libraries" -> Map(
      "lib1" -> Map(
        "R1" -> inputTouch("2_1_R1.fq"),
        "R2" -> inputTouch("2_1_R2.fq")
      ),
      "lib2" -> Map(
        "R1" -> inputTouch("2_2_R1.fq"),
        "R2" -> inputTouch("2_2_R2.fq")
      )
    )
    )))
}