GentrapTest.scala 7.02 KB
Newer Older
1
2
3
4
5
6
/**
 * Copyright (c) 2015 Leiden University Medical Center - Sequencing Analysis Support Core <sasc@lumc.nl>
 * @author Wibowo Arindrarto <w.arindrarto@lumc.nl>
 */
package nl.lumc.sasc.biopet.pipelines.gentrap

bow's avatar
bow committed
7
8
9
import java.io.File

import com.google.common.io.Files
bow's avatar
bow committed
10
import nl.lumc.sasc.biopet.pipelines.gentrap.scripts.AggrBaseCount
bow's avatar
bow committed
11
12
import org.apache.commons.io.FileUtils
import org.broadinstitute.gatk.queue.QSettings
13
14
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
bow's avatar
bow committed
15
import org.testng.annotations.{ AfterClass, DataProvider, Test }
16
17

import nl.lumc.sasc.biopet.core.config.Config
bow's avatar
bow committed
18
19
import nl.lumc.sasc.biopet.extensions._
import nl.lumc.sasc.biopet.utils.ConfigUtils
20
21
22

class GentrapTest extends TestNGSuite with Matchers {

23
24
25
26
  import Gentrap._
  import Gentrap.ExpMeasures._
  import Gentrap.StrandProtocol._

bow's avatar
bow committed
27
28
29
  def initPipeline(map: Map[String, Any]): Gentrap = {
    new Gentrap() {
      override def configName = "gentrap"
30

bow's avatar
bow committed
31
      override def globalConfig = new Config(map)
32

bow's avatar
bow committed
33
34
35
      qSettings = new QSettings
      qSettings.runName = "test"
    }
36
37
  }

38
39
40
41
42
43
44
  /** Convenience method for making library config */
  private def makeLibConfig(idx: Int, paired: Boolean = true) = {
    val files = Map("R1" -> "test_R1.fq")
    if (paired) (s"lib_$idx", files ++ Map("R2" -> "test_R2.fq"))
    else (s"lib_$idx", files)
  }

45
46
47
48
49
50
51
  /** Convenience type for sample config */
  private type SamplesConfig = Map[String, Map[String, Map[String, Map[String, Map[String, String]]]]]

  /** Convenience method for making a single sample config */
  private def makeSampleConfig(sampleIdx: Int, numLibs: Int, paired: Boolean) =
    (s"sample_$sampleIdx",
      Map("libraries" ->
bow's avatar
bow committed
52
        (1 to numLibs)
bow's avatar
bow committed
53
54
        .map(n => makeLibConfig(n, paired))
        .toMap
55
56
57
      )
    )

58
59
60
  /** Convenience method for making all samples config */
  private def makeSamplesConfig(numSamples: Int, numLibsEachSample: Int, pairMode: String): SamplesConfig =
    Map("samples" ->
bow's avatar
bow committed
61
      (1 to numSamples)
bow's avatar
bow committed
62
63
64
      // if paired == "mixed", alternate paired/not paired between each number
      .map(n => makeSampleConfig(n, numLibsEachSample, if (pairMode == "mixed") n % 2 == 0 else pairMode == "paired"))
      .toMap
65
    )
66

bow's avatar
bow committed
67
68
69
70
  private lazy val validExpressionMeasures = Set(
    "fragments_per_gene", "fragments_per_exon", "bases_per_gene", "bases_per_exon",
    "cufflinks_strict", "cufflinks_guided", "cufflinks_blind")

71
72
73
74
75
  @DataProvider(name = "expMeasuresstrandProtocol")
  def expMeasuresStrandProtocolProvider = {

    //val sampleConfigs = Array(pairedOneSampleOneLib, pairedOneSampleTwoLib, pairedOneSampleThreeLib)
    val sampleConfigs = for {
bow's avatar
bow committed
76
77
78
79
80
      (sampleNum, libNum) <- Seq(
        // check multiple libs for single run only ~ to trim down less-informative tests
        // need to check 2 and 3 samples since multi-sample plotting differs when sample is 1 or 2 and 3
        (1, 1), (1, 2), (2, 1), (3, 1)
      )
81
82
      libType <- Seq("paired", "single", "mixed")
    } yield makeSamplesConfig(sampleNum, libNum, libType)
bow's avatar
bow committed
83

84
85
    val strandProtocols = Array("non_specific", "dutp")
    // get all possible combinations of expression measures
bow's avatar
bow committed
86
    val expressionMeasures = validExpressionMeasures
87
88
      //.subsets
      //.map(_.toList)
bow's avatar
bow committed
89
90
      .toArray

91
    for {
92
      sampleConfig <- sampleConfigs.toArray
93
94
      expressionMeasure <- expressionMeasures
      strandProtocol <- strandProtocols
95
    } yield Array(sampleConfig, List(expressionMeasure), strandProtocol)
bow's avatar
bow committed
96
97
  }

98
99
  @Test(dataProvider = "expMeasuresstrandProtocol")
  def testGentrap(sampleConfig: SamplesConfig, expMeasures: List[String], strandProtocol: String) = {
100
101
102
103
104
105
106
107

    val settings = Map(
      "output_dir" -> GentrapTest.outputDir,
      "gsnap" -> Map("db" -> "test", "dir" -> "test"),
      "aligner" -> "gsnap",
      "expression_measures" -> expMeasures,
      "strand_protocol" -> strandProtocol
    )
108
    val config = ConfigUtils.mergeMaps(settings ++ sampleConfig, Map(GentrapTest.executables.toSeq: _*))
109
    val gentrap: Gentrap = initPipeline(config)
bow's avatar
bow committed
110
111

    gentrap.script()
112
    val functions = gentrap.functions.groupBy(_.getClass)
bow's avatar
bow committed
113
    val numSamples = sampleConfig("samples").size
bow's avatar
bow committed
114

115
    functions(classOf[Gsnap]).size should be >= 1
116

bow's avatar
bow committed
117
118
119
    if (expMeasures.contains("fragments_per_gene")) {
      gentrap.functions
        .collect { case x: HtseqCount => x.output.toString.endsWith(".fragments_per_gene") }.size shouldBe numSamples
120
    }
bow's avatar
bow committed
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149

    if (expMeasures.contains("fragments_per_exon")) {
      gentrap.functions
        .collect { case x: HtseqCount => x.output.toString.endsWith(".fragments_per_exon") }.size shouldBe numSamples
    }

    if (expMeasures.contains("bases_per_gene")) {
      gentrap.functions
        .collect { case x: AggrBaseCount => x.output.toString.endsWith(".bases_per_gene") }.size shouldBe numSamples
    }

    if (expMeasures.contains("bases_per_exon")) {
      gentrap.functions
        .collect { case x: AggrBaseCount => x.output.toString.endsWith(".bases_per_exon") }.size shouldBe numSamples
    }

    if (expMeasures.contains("cufflinks_strict")) {
      gentrap.functions
        .collect {
          case x: Cufflinks => x.outputGenesFpkm.getParentFile.toString.endsWith("cufflinks_strict")
          case x: Ln => x.output.toString.endsWith(".genes_fpkm_cufflinks_strict") ||
            x.output.toString.endsWith(".isoforms_fpkm_cufflinks_strict")
        }
        .count(identity) shouldBe numSamples * 3 // three types of jobs per sample
    }

    if (expMeasures.contains("cufflinks_guided")) {
      gentrap.functions
        .collect {
150
151
152
153
          case x: Cufflinks => x.outputGenesFpkm.getParentFile.toString.endsWith("cufflinks_guided")
          case x: Ln => x.output.toString.endsWith(".genes_fpkm_cufflinks_guided") ||
            x.output.toString.endsWith(".isoforms_fpkm_cufflinks_guided")
        }
bow's avatar
bow committed
154
155
156
157
158
159
        .count(identity) shouldBe numSamples * 3 // three types of jobs per sample
    }

    if (expMeasures.contains("cufflinks_blind")) {
      gentrap.functions
        .collect {
160
161
162
163
          case x: Cufflinks => x.outputGenesFpkm.getParentFile.toString.endsWith("cufflinks_blind")
          case x: Ln => x.output.toString.endsWith(".genes_fpkm_cufflinks_blind") ||
            x.output.toString.endsWith(".isoforms_fpkm_cufflinks_blind")
        }
bow's avatar
bow committed
164
165
        .count(identity) shouldBe numSamples * 3 // three types of jobs per sample
    }
bow's avatar
bow committed
166
167
168
169
170
  }

  // remove temporary run directory all tests in the class have been run
  @AfterClass def removeTempOutputDir() = {
    FileUtils.deleteDirectory(GentrapTest.outputDir)
171
172
  }
}
bow's avatar
bow committed
173
174
175
176
177
178
179
180
181

object GentrapTest {
  val outputDir = Files.createTempDir()

  val executables = Map(
    "reference" -> "test",
    "annotation_gtf" -> "test",
    "annotation_bed" -> "test",
    "annotation_refflat" -> "test",
182
    "varscan_jar" -> "test"
bow's avatar
bow committed
183
184
185
186
  ) ++ Seq(
      // fastqc executables
      "fastqc", "seqtk", "sickle", "cutadapt",
      // mapping executables
bow's avatar
bow committed
187
      "star", "bowtie", "samtools", "gsnap", "tophat",
bow's avatar
bow committed
188
      // gentrap executables
bow's avatar
bow committed
189
      "cufflinks", "htseqcount", "grep", "pdflatex", "rscript", "tabix", "bgzip",
190
191
      // bam2wig executables
      "igvtools", "wigtobigwig"
bow's avatar
bow committed
192
193
    ).map { case exe => exe -> Map("exe" -> "test") }.toMap
}