GentrapTest.scala 9.2 KB
Newer Older
1
/**
bow's avatar
bow committed
2 3 4 5 6 7 8 9 10
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
bow's avatar
bow committed
12 13
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
14 15 16
 */
package nl.lumc.sasc.biopet.pipelines.gentrap

Peter van 't Hof's avatar
Peter van 't Hof committed
17
import java.io.{ File, FileOutputStream }
bow's avatar
bow committed
18 19

import com.google.common.io.Files
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.core.{ BiopetFifoPipe, BiopetPipe }
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.extensions._
22 23
import nl.lumc.sasc.biopet.extensions.gmap.Gsnap
import nl.lumc.sasc.biopet.extensions.hisat.Hisat2
Peter van 't Hof's avatar
Peter van 't Hof committed
24 25
import nl.lumc.sasc.biopet.extensions.tools.{ BaseCounter, WipeReads }
import nl.lumc.sasc.biopet.utils.{ ConfigUtils, Logging }
26
import nl.lumc.sasc.biopet.utils.config.Config
Peter van 't Hof's avatar
Peter van 't Hof committed
27
import nl.lumc.sasc.biopet.utils.camelize
28
import org.apache.commons.io.FileUtils
bow's avatar
bow committed
29
import org.broadinstitute.gatk.queue.QSettings
30 31
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
Peter van 't Hof's avatar
Peter van 't Hof committed
32
import org.testng.annotations.{ AfterClass, DataProvider, Test }
33

34
abstract class GentrapTestAbstract(val expressionMeasures: List[String]) extends TestNGSuite with Matchers {
35

bow's avatar
bow committed
36 37
  def initPipeline(map: Map[String, Any]): Gentrap = {
    new Gentrap() {
38
      override def configNamespace = "gentrap"
bow's avatar
bow committed
39
      override def globalConfig = new Config(map)
40 41
      // disable dict file check since it is based on the reference file name (which we can't modify here since
      // we use the mock /usr/bin/test file
bow's avatar
bow committed
42 43 44
      qSettings = new QSettings
      qSettings.runName = "test"
    }
45 46
  }

47
  def strandProtocols = Array("non_specific", "dutp")
48

49 50 51 52
  def aligner: Option[String] = None
  def removeRiboReads: Option[Boolean] = Some(false)
  def sample1: Boolean = true
  def sample2: Boolean = true
53
  def callVariants: Option[Boolean] = None
54

55 56
  @DataProvider(name = "expMeasuresstrandProtocol")
  def expMeasuresStrandProtocolProvider = {
57 58
    for {
      strandProtocol <- strandProtocols
59
    } yield Array(strandProtocol)
bow's avatar
bow committed
60 61
  }

62 63
  private var dirs: List[File] = Nil

64
  @Test(dataProvider = "expMeasuresstrandProtocol")
65
  def testGentrap(strandProtocol: String) = {
66 67
    val outputDir = GentrapTest.outputDir
    dirs :+= outputDir
68
    val settings = Map(
69
      "output_dir" -> outputDir,
70
      "gsnap" -> Map("db" -> "test", "dir" -> "test"),
71 72 73 74
      "expression_measures" -> expressionMeasures,
      "strand_protocol" -> strandProtocol
    ) ++
      aligner.map("aligner" -> _) ++
75 76
      removeRiboReads.map("remove_ribosomal_reads" -> _) ++
      callVariants.map("call_variants" -> _)
77
    val configs: List[Option[Map[String, Any]]] = List(Some(settings), (if (sample1) Some(GentrapTest.sample1) else None), (if (sample2) Some(GentrapTest.sample2) else None))
Peter van 't Hof's avatar
Peter van 't Hof committed
78
    val config = configs.flatten.foldLeft(GentrapTest.executables)((a, b) => ConfigUtils.mergeMaps(a, b))
79
    val gentrap: Gentrap = initPipeline(config)
bow's avatar
bow committed
80

81 82
    val numSamples = (sample1, sample2) match {
      case (true, true) => 2
Peter van 't Hof's avatar
Peter van 't Hof committed
83 84 85
      case (_, true)    => 1
      case (true, _)    => 1
      case _            => 0
bow's avatar
bow committed
86 87
    }

88 89 90 91 92 93 94 95 96 97
    if (numSamples == 0) {
      intercept[IllegalArgumentException] {
        gentrap.script()
      }
      Logging.errors.clear()
    } else {
      gentrap.script()

      val functions = gentrap.functions.flatMap {
        case f: BiopetFifoPipe => f.pipesJobs
Peter van 't Hof's avatar
Peter van 't Hof committed
98 99
        case f: BiopetPipe     => f.pipesJobs
        case f                 => List(f)
100 101
      }.groupBy(_.getClass)

102 103 104
      gentrap.shivaVariantcalling.isDefined shouldBe callVariants.getOrElse(false)

      gentrap.summarySettings.getOrElse("expression_measures", List()).asInstanceOf[List[String]].sorted shouldBe
Peter van 't Hof's avatar
Peter van 't Hof committed
105
        expressionMeasures.map(camelize(_)).sorted
106 107
      gentrap.summarySettings.get("call_variants") shouldBe Some(callVariants.getOrElse(false))
      gentrap.summarySettings.get("remove_ribosomal_reads") shouldBe Some(removeRiboReads.getOrElse(false))
Peter van 't Hof's avatar
Peter van 't Hof committed
108
      gentrap.summarySettings.get("strand_protocol") shouldBe Some(camelize(strandProtocol))
109

110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
      if (expressionMeasures.contains("fragments_per_gene"))
        assert(gentrap.functions.exists(_.isInstanceOf[HtseqCount]))

      if (expressionMeasures.contains("fragments_per_exon"))
        assert(gentrap.functions.exists(_.isInstanceOf[HtseqCount]))

      if (expressionMeasures.contains("base_counts"))
        gentrap.functions.count(_.isInstanceOf[BaseCounter]) shouldBe numSamples

      if (expressionMeasures.contains("cufflinks_strict")) {
        assert(gentrap.functions.exists(_.isInstanceOf[Cufflinks]))
        assert(gentrap.functions.exists(_.isInstanceOf[Ln]))
      }

      if (expressionMeasures.contains("cufflinks_guided")) {
        assert(gentrap.functions.exists(_.isInstanceOf[Cufflinks]))
        assert(gentrap.functions.exists(_.isInstanceOf[Ln]))
      }

      if (expressionMeasures.contains("cufflinks_blind")) {
        assert(gentrap.functions.exists(_.isInstanceOf[Cufflinks]))
        assert(gentrap.functions.exists(_.isInstanceOf[Ln]))
      }

      gentrap.removeRibosomalReads shouldBe removeRiboReads.getOrElse(false)
      gentrap.functions.exists(_.isInstanceOf[WipeReads]) shouldBe removeRiboReads.getOrElse(false)

      val classMap = Map(
        "gsnap" -> classOf[Gsnap],
        "tophat" -> classOf[Tophat],
        "star" -> classOf[Star],
        "star-2pass" -> classOf[Star],
        "hisat2" -> classOf[Hisat2]
      )
144

145
      val alignerClass = classMap.get(aligner.getOrElse("gsnap"))
146

147 148 149
      alignerClass.foreach(c => assert(functions.keys.exists(_ == c)))
      classMap.values.filterNot(Some(_) == alignerClass).foreach(x => assert(!functions.keys.exists(_ == x)))
    }
bow's avatar
bow committed
150 151
  }

152 153 154 155
  // remove temporary run directory all tests in the class have been run
  @AfterClass def removeTempOutputDir() = {
    dirs.foreach(FileUtils.deleteDirectory)
  }
156
}
bow's avatar
bow committed
157

158 159 160 161 162 163 164 165 166 167 168 169 170 171
class GentrapFragmentsPerGeneTest extends GentrapTestAbstract(List("fragments_per_gene"))
//class GentrapFragmentsPerExonTest extends GentrapTestAbstract("fragments_per_exon")
class GentrapBaseCountsTest extends GentrapTestAbstract(List("base_counts"))
class GentrapCufflinksStrictTest extends GentrapTestAbstract(List("cufflinks_strict"))
class GentrapCufflinksGuidedTest extends GentrapTestAbstract(List("cufflinks_guided"))
class GentrapCufflinksBlindTest extends GentrapTestAbstract(List("cufflinks_blind"))
class GentrapAllTest extends GentrapTestAbstract(List("fragments_per_gene", "base_counts", "cufflinks_strict", "cufflinks_guided", "cufflinks_blind"))
class GentrapNoSamplesTest extends GentrapTestAbstract(List("fragments_per_gene")) {
  override def sample1 = false
  override def sample2 = false
}
class GentrapRemoveRibosomeTest extends GentrapTestAbstract(List("fragments_per_gene")) {
  override def removeRiboReads = Some(true)
}
172 173 174
class GentrapCallVariantsTest extends GentrapTestAbstract(List("fragments_per_gene")) {
  override def callVariants = Some(true)
}
175

bow's avatar
bow committed
176
object GentrapTest {
177 178 179
  def outputDir = Files.createTempDir()
  val inputDir = Files.createTempDir()

Peter van 't Hof's avatar
Peter van 't Hof committed
180
  def inputTouch(name: String): String = {
181
    val file = new File(inputDir, name)
Peter van 't Hof's avatar
Peter van 't Hof committed
182 183 184
    Files.touch(file)
    file.getAbsolutePath
  }
bow's avatar
bow committed
185

186 187
  private def copyFile(name: String): Unit = {
    val is = getClass.getResourceAsStream("/" + name)
188
    val os = new FileOutputStream(new File(inputDir, name))
189 190 191 192 193 194 195 196
    org.apache.commons.io.IOUtils.copy(is, os)
    os.close()
  }

  copyFile("ref.fa")
  copyFile("ref.dict")
  copyFile("ref.fa.fai")

197
  val executables: Map[String, Any] = Map(
198
    "skip_write_dependencies" -> true,
199 200 201 202 203 204
    "reference_fasta" -> (inputDir + File.separator + "ref.fa"),
    "refFlat" -> (inputDir + File.separator + "ref.fa"),
    "annotation_gtf" -> (inputDir + File.separator + "ref.fa"),
    "annotation_bed" -> (inputDir + File.separator + "ref.fa"),
    "annotation_refflat" -> (inputDir + File.separator + "ref.fa"),
    "ribosome_refflat" -> (inputDir + File.separator + "ref.fa"),
205
    "varscan_jar" -> "test",
206
    "rscript" -> Map("exe" -> "test"),
207
    "igvtools" -> Map("exe" -> "test", "igvtools_jar" -> "test"),
208
    "gatk_jar" -> "test"
bow's avatar
bow committed
209 210 211 212
  ) ++ Seq(
      // fastqc executables
      "fastqc", "seqtk", "sickle", "cutadapt",
      // mapping executables
bow's avatar
bow committed
213
      "star", "bowtie", "samtools", "gsnap", "tophat",
bow's avatar
bow committed
214
      // gentrap executables
215
      "cufflinks", "htseqcount", "grep", "pdflatex", "rscript", "tabix", "bgzip", "bedtoolscoverage", "md5sum",
216
      // bam2wig executables
217
      "wigtobigwig"
bow's avatar
bow committed
218
    ).map { case exe => exe -> Map("exe" -> "test") }.toMap
219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240

  val sample1: Map[String, Any] = Map(
    "samples" -> Map("sample1" -> Map("libraries" -> Map(
      "lib1" -> Map(
        "R1" -> inputTouch("1_1_R1.fq"),
        "R2" -> inputTouch("1_1_R2.fq")
      )
    )
    )))

  val sample2: Map[String, Any] = Map(
    "samples" -> Map("sample3" -> Map("libraries" -> Map(
      "lib1" -> Map(
        "R1" -> inputTouch("2_1_R1.fq"),
        "R2" -> inputTouch("2_1_R2.fq")
      ),
      "lib2" -> Map(
        "R1" -> inputTouch("2_2_R1.fq"),
        "R2" -> inputTouch("2_2_R2.fq")
      )
    )
    )))
bow's avatar
bow committed
241
}