MultisampleMappingTest.scala 8.83 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
15 16
package nl.lumc.sasc.biopet.pipelines.mapping

Peter van 't Hof's avatar
Peter van 't Hof committed
17
import java.io.{ File, FileOutputStream }
Peter van 't Hof's avatar
Peter van 't Hof committed
18 19

import com.google.common.io.Files
20
import nl.lumc.sasc.biopet.extensions.kraken.Kraken
Peter van 't Hof's avatar
Peter van 't Hof committed
21 22
import nl.lumc.sasc.biopet.extensions.picard.{ MarkDuplicates, MergeSamFiles }
import nl.lumc.sasc.biopet.utils.{ ConfigUtils, Logging }
Peter van 't Hof's avatar
Peter van 't Hof committed
23 24 25 26
import nl.lumc.sasc.biopet.utils.config.Config
import org.broadinstitute.gatk.queue.QSettings
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
Peter van 't Hof's avatar
Peter van 't Hof committed
27
import org.testng.annotations.{ DataProvider, Test }
Peter van 't Hof's avatar
Peter van 't Hof committed
28 29

/**
30 31
 * Created by pjvanthof on 15/05/16.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
32
trait MultisampleMappingTestTrait extends TestNGSuite with Matchers {
Peter van 't Hof's avatar
Peter van 't Hof committed
33 34 35 36 37 38 39 40 41 42 43 44
  def initPipeline(map: Map[String, Any]): MultisampleMapping = {
    new MultisampleMapping() {
      override def configNamespace = "multisamplemapping"
      override def globalConfig = new Config(ConfigUtils.mergeMaps(map, MultisampleMappingTestTrait.config))
      qSettings = new QSettings
      qSettings.runName = "test"
    }
  }

  def mergeStrategies = MultisampleMapping.MergeStrategy.values
  def bamToFastq = false
  def correctReadgroups = false
Peter van 't Hof's avatar
Peter van 't Hof committed
45
  def unmappedToGears = false
Peter van 't Hof's avatar
Peter van 't Hof committed
46 47
  def sample1 = Array(true, false)
  def sample2 = Array(true, false)
48 49
  def sample3 = false
  def sample4 = false
Peter van 't Hof's avatar
Peter van 't Hof committed
50 51 52 53

  @DataProvider(name = "mappingOptions")
  def mappingOptions = {
    for (
54
      merge <- mergeStrategies.toArray; s1 <- sample1; s2 <- sample2
Peter van 't Hof's avatar
Peter van 't Hof committed
55
    ) yield Array(merge, s1, s2)
Peter van 't Hof's avatar
Peter van 't Hof committed
56 57 58 59 60 61 62 63
  }

  @Test(dataProvider = "mappingOptions")
  def testMultisampleMapping(merge: MultisampleMapping.MergeStrategy.Value, sample1: Boolean, sample2: Boolean): Unit = {
    val map: Map[String, Any] = {
      var m: Map[String, Any] = MultisampleMappingTestTrait.config
      if (sample1) m = ConfigUtils.mergeMaps(MultisampleMappingTestTrait.sample1, m)
      if (sample2) m = ConfigUtils.mergeMaps(MultisampleMappingTestTrait.sample2, m)
64 65
      if (sample3) m = ConfigUtils.mergeMaps(MultisampleMappingTestTrait.sample3, m)
      if (sample4) m = ConfigUtils.mergeMaps(MultisampleMappingTestTrait.sample4, m)
Peter van 't Hof's avatar
Peter van 't Hof committed
66 67 68 69 70 71
      m ++ Map(
        "merge_strategy" -> merge.toString,
        "bam_to_fastq" -> bamToFastq,
        "correct_readgroups" -> correctReadgroups,
        "unmapped_to_gears" -> unmappedToGears
      )
Peter van 't Hof's avatar
Peter van 't Hof committed
72 73
    }

74
    if (!sample1 && !sample2 && !sample3 && !sample4) { // When no samples
Peter van 't Hof's avatar
Peter van 't Hof committed
75
      intercept[IllegalStateException] {
Peter van 't Hof's avatar
Peter van 't Hof committed
76 77
        initPipeline(map).script()
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
78
      Logging.errors.clear()
79 80 81 82
    } else if (sample4 && !bamToFastq && !correctReadgroups) {
      intercept[IllegalStateException] {
        initPipeline(map).script()
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
83 84 85 86
    } else {
      val pipeline = initPipeline(map)
      pipeline.script()

87
      val numberFastqLibs = (if (sample1) 1 else 0) + (if (sample2) 2 else 0) + (if (sample3 && bamToFastq) 1 else 0) + (if (sample4 && bamToFastq) 1 else 0)
Peter van 't Hof's avatar
Peter van 't Hof committed
88 89 90
      val numberSamples = (if (sample1) 1 else 0) + (if (sample2) 1 else 0)

      import MultisampleMapping.MergeStrategy
91
      pipeline.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (numberFastqLibs +
Peter van 't Hof's avatar
Peter van 't Hof committed
92 93 94
        (if (sample2 && (merge == MergeStrategy.MarkDuplicates || merge == MergeStrategy.PreProcessMarkDuplicates)) 1 else 0))
      pipeline.functions.count(_.isInstanceOf[MergeSamFiles]) shouldBe (
        (if (sample2 && (merge == MergeStrategy.MergeSam || merge == MergeStrategy.PreProcessMergeSam)) 1 else 0))
95 96 97 98 99 100 101 102
      pipeline.samples.foreach {
        case (sampleName, sample) =>
          if (merge == MergeStrategy.None) sample.bamFile shouldBe None
          sample.summaryStats shouldBe Map()
          sample.libraries.foreach {
            case (libraryId, library) =>
              library.summaryStats shouldBe Map()
          }
Peter van 't Hof's avatar
Peter van 't Hof committed
103
      }
104 105 106

      pipeline.functions.count(_.isInstanceOf[Kraken]) shouldBe (if (unmappedToGears) (numberFastqLibs + numberSamples) else 0)

Peter van 't Hof's avatar
Peter van 't Hof committed
107
      pipeline.summarySettings.get("merge_strategy") shouldBe Some(merge.toString)
Peter van 't Hof's avatar
Peter van 't Hof committed
108 109 110 111
    }
  }
}

Peter van 't Hof's avatar
Peter van 't Hof committed
112 113 114 115
class MultisampleMappingTest extends MultisampleMappingTestTrait {
  override def sample1 = Array(true)
}

116 117 118 119 120 121
class MultisampleMappingNoSamplesTest extends MultisampleMappingTestTrait {
  override def sample1 = Array(false)
  override def sample2 = Array(false)
  override def mergeStrategies = MultisampleMapping.MergeStrategy.values.filter(_ == MultisampleMapping.MergeStrategy.PreProcessMarkDuplicates)
}

122 123 124 125 126 127 128
class MultisampleMappingGearsTest extends MultisampleMappingTestTrait {
  override def sample1 = Array(true)
  override def sample2 = Array(false)
  override def unmappedToGears = true
  override def mergeStrategies = MultisampleMapping.MergeStrategy.values.filter(_ == MultisampleMapping.MergeStrategy.PreProcessMarkDuplicates)
}

129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
class MultisampleMappingBamTest extends MultisampleMappingTestTrait {
  override def sample1 = Array(false)
  override def sample2 = Array(false)
  override def sample3 = true
  override def mergeStrategies = MultisampleMapping.MergeStrategy.values.filter(_ == MultisampleMapping.MergeStrategy.PreProcessMarkDuplicates)
}

class MultisampleMappingWrongBamTest extends MultisampleMappingTestTrait {
  override def sample1 = Array(false)
  override def sample2 = Array(false)
  override def sample4 = true
  override def mergeStrategies = MultisampleMapping.MergeStrategy.values.filter(_ == MultisampleMapping.MergeStrategy.PreProcessMarkDuplicates)
}

class MultisampleMappingCorrectBamTest extends MultisampleMappingTestTrait {
  override def sample1 = Array(false)
  override def sample2 = Array(false)
  override def correctReadgroups = true
  override def sample4 = true
  override def mergeStrategies = MultisampleMapping.MergeStrategy.values.filter(_ == MultisampleMapping.MergeStrategy.PreProcessMarkDuplicates)
}

class MultisampleMappingBamToFastqTest extends MultisampleMappingTestTrait {
  override def sample1 = Array(false)
  override def sample2 = Array(false)
  override def bamToFastq = true
  override def sample3 = true
  override def sample4 = true
  override def mergeStrategies = MultisampleMapping.MergeStrategy.values.filter(_ == MultisampleMapping.MergeStrategy.PreProcessMarkDuplicates)
}

Peter van 't Hof's avatar
Peter van 't Hof committed
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
object MultisampleMappingTestTrait {
  val outputDir = Files.createTempDir()
  outputDir.deleteOnExit()
  new File(outputDir, "input").mkdirs()
  def inputTouch(name: String): File = {
    val file = new File(outputDir, "input" + File.separator + name).getAbsoluteFile
    Files.touch(file)
    file
  }

  private def copyFile(name: String): Unit = {
    val is = getClass.getResourceAsStream("/" + name)
    val os = new FileOutputStream(new File(outputDir, name))
    org.apache.commons.io.IOUtils.copy(is, os)
    os.close()
  }

  copyFile("ref.fa")
  copyFile("ref.dict")
  copyFile("ref.fa.fai")
180
  copyFile("empty.sam")
Peter van 't Hof's avatar
Peter van 't Hof committed
181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196

  val config = Map(
    "name_prefix" -> "test",
    "cache" -> true,
    "dir" -> "test",
    "vep_script" -> "test",
    "output_dir" -> outputDir,
    "reference_fasta" -> (outputDir + File.separator + "ref.fa"),
    "fastqc" -> Map("exe" -> "test"),
    "input_alleles" -> "test",
    "fastqc" -> Map("exe" -> "test"),
    "seqtk" -> Map("exe" -> "test"),
    "sickle" -> Map("exe" -> "test"),
    "cutadapt" -> Map("exe" -> "test"),
    "bwa" -> Map("exe" -> "test"),
    "samtools" -> Map("exe" -> "test"),
197
    "igvtools" -> Map("exe" -> "test", "igvtools_jar" -> "test"),
Peter van 't Hof's avatar
Peter van 't Hof committed
198
    "wigtobigwig" -> Map("exe" -> "test"),
199 200
    "kraken" -> Map("exe" -> "test", "db" -> "test"),
    "krakenreport" -> Map("exe" -> "test", "db" -> "test"),
Peter van 't Hof's avatar
Peter van 't Hof committed
201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
    "md5sum" -> Map("exe" -> "test")
  )

  val sample1 = Map(
    "samples" -> Map("sample1" -> Map("libraries" -> Map(
      "lib1" -> Map(
        "R1" -> inputTouch("1_1_R1.fq"),
        "R2" -> inputTouch("1_1_R2.fq")
      )
    )
    )))

  val sample2 = Map(
    "samples" -> Map("sample3" -> Map("libraries" -> Map(
      "lib1" -> Map(
        "R1" -> inputTouch("2_1_R1.fq"),
        "R2" -> inputTouch("2_1_R2.fq")
      ),
      "lib2" -> Map(
        "R1" -> inputTouch("2_2_R1.fq"),
        "R2" -> inputTouch("2_2_R2.fq")
      )
    )
    )))
225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240

  val sample3 = Map(
    "samples" -> Map("sample3" -> Map("libraries" -> Map(
      "lib1" -> Map(
        "bam" -> (outputDir + File.separator + "empty.sam")
      )
    )
    )))

  val sample4 = Map(
    "samples" -> Map("sample4" -> Map("libraries" -> Map(
      "lib1" -> Map(
        "bam" -> (outputDir + File.separator + "empty.sam")
      )
    )
    )))
Peter van 't Hof's avatar
Peter van 't Hof committed
241
}