GentrapTest.scala 7.46 KB
Newer Older
1
/**
bow's avatar
bow committed
2 3 4 5 6 7 8 9 10 11 12 13 14
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
15 16 17
 */
package nl.lumc.sasc.biopet.pipelines.gentrap

Peter van 't Hof's avatar
Peter van 't Hof committed
18
import java.io.{ File, FileOutputStream }
bow's avatar
bow committed
19 20

import com.google.common.io.Files
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.extensions._
22
import nl.lumc.sasc.biopet.extensions.tools.BaseCounter
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.utils.ConfigUtils
24
import nl.lumc.sasc.biopet.utils.config.Config
bow's avatar
bow committed
25
import org.broadinstitute.gatk.queue.QSettings
26 27
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
Peter van 't Hof's avatar
Peter van 't Hof committed
28
import org.testng.annotations.{ DataProvider, Test }
29

30
abstract class GentrapTestAbstract(val expressionMeasure: String) extends TestNGSuite with Matchers {
31

bow's avatar
bow committed
32 33 34 35
  def initPipeline(map: Map[String, Any]): Gentrap = {
    new Gentrap() {
      override def configName = "gentrap"
      override def globalConfig = new Config(map)
36 37
      // disable dict file check since it is based on the reference file name (which we can't modify here since
      // we use the mock /usr/bin/test file
bow's avatar
bow committed
38 39 40
      qSettings = new QSettings
      qSettings.runName = "test"
    }
41 42
  }

43 44
  /** Convenience method for making library config */
  private def makeLibConfig(idx: Int, paired: Boolean = true) = {
Peter van 't Hof's avatar
Peter van 't Hof committed
45 46
    val files = Map("R1" -> GentrapTest.inputTouch("test_R1.fq"))
    if (paired) (s"lib_$idx", files ++ Map("R2" -> GentrapTest.inputTouch("test_R2.fq")))
47 48 49
    else (s"lib_$idx", files)
  }

50 51 52 53 54 55 56
  /** Convenience type for sample config */
  private type SamplesConfig = Map[String, Map[String, Map[String, Map[String, Map[String, String]]]]]

  /** Convenience method for making a single sample config */
  private def makeSampleConfig(sampleIdx: Int, numLibs: Int, paired: Boolean) =
    (s"sample_$sampleIdx",
      Map("libraries" ->
bow's avatar
bow committed
57
        (1 to numLibs)
bow's avatar
bow committed
58 59
        .map(n => makeLibConfig(n, paired))
        .toMap
60 61 62
      )
    )

63 64 65
  /** Convenience method for making all samples config */
  private def makeSamplesConfig(numSamples: Int, numLibsEachSample: Int, pairMode: String): SamplesConfig =
    Map("samples" ->
bow's avatar
bow committed
66
      (1 to numSamples)
bow's avatar
bow committed
67 68 69
      // if paired == "mixed", alternate paired/not paired between each number
      .map(n => makeSampleConfig(n, numLibsEachSample, if (pairMode == "mixed") n % 2 == 0 else pairMode == "paired"))
      .toMap
70
    )
71

Peter van 't Hof's avatar
Peter van 't Hof committed
72
  val validExpressionMeasures = Set(
Peter van 't Hof's avatar
Peter van 't Hof committed
73
    "fragments_per_gene", "fragments_per_exon", "base_counts",
bow's avatar
bow committed
74 75
    "cufflinks_strict", "cufflinks_guided", "cufflinks_blind")

76 77 78 79 80
  @DataProvider(name = "expMeasuresstrandProtocol")
  def expMeasuresStrandProtocolProvider = {

    //val sampleConfigs = Array(pairedOneSampleOneLib, pairedOneSampleTwoLib, pairedOneSampleThreeLib)
    val sampleConfigs = for {
bow's avatar
bow committed
81 82 83 84 85
      (sampleNum, libNum) <- Seq(
        // check multiple libs for single run only ~ to trim down less-informative tests
        // need to check 2 and 3 samples since multi-sample plotting differs when sample is 1 or 2 and 3
        (1, 1), (1, 2), (2, 1), (3, 1)
      )
86 87
      libType <- Seq("paired", "single", "mixed")
    } yield makeSamplesConfig(sampleNum, libNum, libType)
bow's avatar
bow committed
88

89 90
    val strandProtocols = Array("non_specific", "dutp")
    // get all possible combinations of expression measures
bow's avatar
bow committed
91
    val expressionMeasures = validExpressionMeasures
92 93
      //.subsets
      //.map(_.toList)
bow's avatar
bow committed
94 95
      .toArray

96
    for {
97
      sampleConfig <- sampleConfigs.toArray
98
      //expressionMeasure <- expressionMeasures
99
      strandProtocol <- strandProtocols
100
    } yield Array(sampleConfig, List(expressionMeasure), strandProtocol)
bow's avatar
bow committed
101 102
  }

103 104
  @Test(dataProvider = "expMeasuresstrandProtocol")
  def testGentrap(sampleConfig: SamplesConfig, expMeasures: List[String], strandProtocol: String) = {
105 106 107 108 109 110 111 112

    val settings = Map(
      "output_dir" -> GentrapTest.outputDir,
      "gsnap" -> Map("db" -> "test", "dir" -> "test"),
      "aligner" -> "gsnap",
      "expression_measures" -> expMeasures,
      "strand_protocol" -> strandProtocol
    )
113
    val config = ConfigUtils.mergeMaps(settings ++ sampleConfig, Map(GentrapTest.executables.toSeq: _*))
114
    val gentrap: Gentrap = initPipeline(config)
bow's avatar
bow committed
115 116

    gentrap.script()
117
    val functions = gentrap.functions.groupBy(_.getClass)
bow's avatar
bow committed
118
    val numSamples = sampleConfig("samples").size
bow's avatar
bow committed
119

Peter van 't Hof's avatar
Peter van 't Hof committed
120 121
    if (expMeasures.contains("fragments_per_gene"))
      assert(gentrap.functions.exists(_.isInstanceOf[HtseqCount]))
bow's avatar
bow committed
122

Peter van 't Hof's avatar
Peter van 't Hof committed
123 124
    if (expMeasures.contains("fragments_per_exon"))
      assert(gentrap.functions.exists(_.isInstanceOf[HtseqCount]))
bow's avatar
bow committed
125

Peter van 't Hof's avatar
Peter van 't Hof committed
126 127
    if (expMeasures.contains("base_counts"))
      gentrap.functions.count(_.isInstanceOf[BaseCounter]) shouldBe numSamples
bow's avatar
bow committed
128 129

    if (expMeasures.contains("cufflinks_strict")) {
Peter van 't Hof's avatar
Peter van 't Hof committed
130 131
      assert(gentrap.functions.exists(_.isInstanceOf[Cufflinks]))
      assert(gentrap.functions.exists(_.isInstanceOf[Ln]))
bow's avatar
bow committed
132 133 134
    }

    if (expMeasures.contains("cufflinks_guided")) {
Peter van 't Hof's avatar
Peter van 't Hof committed
135 136
      assert(gentrap.functions.exists(_.isInstanceOf[Cufflinks]))
      assert(gentrap.functions.exists(_.isInstanceOf[Ln]))
bow's avatar
bow committed
137 138 139
    }

    if (expMeasures.contains("cufflinks_blind")) {
Peter van 't Hof's avatar
Peter van 't Hof committed
140 141
      assert(gentrap.functions.exists(_.isInstanceOf[Cufflinks]))
      assert(gentrap.functions.exists(_.isInstanceOf[Ln]))
bow's avatar
bow committed
142
    }
bow's avatar
bow committed
143 144
  }

145
}
bow's avatar
bow committed
146

147
class GentrapFragmentsPerGeneTest extends GentrapTestAbstract("fragments_per_gene")
Peter van 't Hof's avatar
Peter van 't Hof committed
148 149
//class GentrapFragmentsPerExonTest extends GentrapTestAbstract("fragments_per_exon")
class GentrapBaseCountsTest extends GentrapTestAbstract("base_counts")
150 151 152 153
class GentrapCufflinksStrictTest extends GentrapTestAbstract("cufflinks_strict")
class GentrapCufflinksGuidedTest extends GentrapTestAbstract("cufflinks_guided")
class GentrapCufflinksBlindTest extends GentrapTestAbstract("cufflinks_blind")

bow's avatar
bow committed
154 155
object GentrapTest {
  val outputDir = Files.createTempDir()
156
  outputDir.deleteOnExit()
Peter van 't Hof's avatar
Peter van 't Hof committed
157 158 159 160 161 162
  new File(outputDir, "input").mkdirs()
  def inputTouch(name: String): String = {
    val file = new File(outputDir, "input" + File.separator + name)
    Files.touch(file)
    file.getAbsolutePath
  }
bow's avatar
bow committed
163

164 165 166 167 168 169 170 171 172 173 174
  private def copyFile(name: String): Unit = {
    val is = getClass.getResourceAsStream("/" + name)
    val os = new FileOutputStream(new File(outputDir, name))
    org.apache.commons.io.IOUtils.copy(is, os)
    os.close()
  }

  copyFile("ref.fa")
  copyFile("ref.dict")
  copyFile("ref.fa.fai")

bow's avatar
bow committed
175
  val executables = Map(
176
    "reference_fasta" -> (outputDir + File.separator + "ref.fa"),
Peter van 't Hof's avatar
Peter van 't Hof committed
177 178 179 180
    "refFlat" -> (outputDir + File.separator + "ref.fa"),
    "annotation_gtf" -> (outputDir + File.separator + "ref.fa"),
    "annotation_bed" -> (outputDir + File.separator + "ref.fa"),
    "annotation_refflat" -> (outputDir + File.separator + "ref.fa"),
181 182
    "varscan_jar" -> "test",
    "rscript" -> Map("exe" -> "test")
bow's avatar
bow committed
183 184 185 186
  ) ++ Seq(
      // fastqc executables
      "fastqc", "seqtk", "sickle", "cutadapt",
      // mapping executables
bow's avatar
bow committed
187
      "star", "bowtie", "samtools", "gsnap", "tophat",
bow's avatar
bow committed
188
      // gentrap executables
189
      "cufflinks", "htseqcount", "grep", "pdflatex", "rscript", "tabix", "bgzip", "bedtoolscoverage", "md5sum",
190 191
      // bam2wig executables
      "igvtools", "wigtobigwig"
bow's avatar
bow committed
192 193
    ).map { case exe => exe -> Map("exe" -> "test") }.toMap
}