GentrapTest.scala 8.28 KB
Newer Older
1
/**
bow's avatar
bow committed
2
3
4
5
6
7
8
9
10
11
12
13
14
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
15
16
17
 */
package nl.lumc.sasc.biopet.pipelines.gentrap

Peter van 't Hof's avatar
Peter van 't Hof committed
18
import java.io.{ File, FileOutputStream }
bow's avatar
bow committed
19
20

import com.google.common.io.Files
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.utils.config.Config
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.extensions._
bow's avatar
bow committed
23
import nl.lumc.sasc.biopet.pipelines.gentrap.scripts.AggrBaseCount
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import nl.lumc.sasc.biopet.utils.ConfigUtils
bow's avatar
bow committed
25
26
import org.apache.commons.io.FileUtils
import org.broadinstitute.gatk.queue.QSettings
27
28
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
bow's avatar
bow committed
29
import org.testng.annotations.{ AfterClass, DataProvider, Test }
30
31
32

class GentrapTest extends TestNGSuite with Matchers {

bow's avatar
bow committed
33
34
35
36
  def initPipeline(map: Map[String, Any]): Gentrap = {
    new Gentrap() {
      override def configName = "gentrap"
      override def globalConfig = new Config(map)
37
38
      // disable dict file check since it is based on the reference file name (which we can't modify here since
      // we use the mock /usr/bin/test file
bow's avatar
bow committed
39
40
41
      qSettings = new QSettings
      qSettings.runName = "test"
    }
42
43
  }

44
45
  /** Convenience method for making library config */
  private def makeLibConfig(idx: Int, paired: Boolean = true) = {
Peter van 't Hof's avatar
Peter van 't Hof committed
46
47
    val files = Map("R1" -> GentrapTest.inputTouch("test_R1.fq"))
    if (paired) (s"lib_$idx", files ++ Map("R2" -> GentrapTest.inputTouch("test_R2.fq")))
48
49
50
    else (s"lib_$idx", files)
  }

51
52
53
54
55
56
57
  /** Convenience type for sample config */
  private type SamplesConfig = Map[String, Map[String, Map[String, Map[String, Map[String, String]]]]]

  /** Convenience method for making a single sample config */
  private def makeSampleConfig(sampleIdx: Int, numLibs: Int, paired: Boolean) =
    (s"sample_$sampleIdx",
      Map("libraries" ->
bow's avatar
bow committed
58
        (1 to numLibs)
bow's avatar
bow committed
59
60
        .map(n => makeLibConfig(n, paired))
        .toMap
61
62
63
      )
    )

64
65
66
  /** Convenience method for making all samples config */
  private def makeSamplesConfig(numSamples: Int, numLibsEachSample: Int, pairMode: String): SamplesConfig =
    Map("samples" ->
bow's avatar
bow committed
67
      (1 to numSamples)
bow's avatar
bow committed
68
69
70
      // if paired == "mixed", alternate paired/not paired between each number
      .map(n => makeSampleConfig(n, numLibsEachSample, if (pairMode == "mixed") n % 2 == 0 else pairMode == "paired"))
      .toMap
71
    )
72

bow's avatar
bow committed
73
74
75
76
  private lazy val validExpressionMeasures = Set(
    "fragments_per_gene", "fragments_per_exon", "bases_per_gene", "bases_per_exon",
    "cufflinks_strict", "cufflinks_guided", "cufflinks_blind")

77
78
79
80
81
  @DataProvider(name = "expMeasuresstrandProtocol")
  def expMeasuresStrandProtocolProvider = {

    //val sampleConfigs = Array(pairedOneSampleOneLib, pairedOneSampleTwoLib, pairedOneSampleThreeLib)
    val sampleConfigs = for {
bow's avatar
bow committed
82
83
84
85
86
      (sampleNum, libNum) <- Seq(
        // check multiple libs for single run only ~ to trim down less-informative tests
        // need to check 2 and 3 samples since multi-sample plotting differs when sample is 1 or 2 and 3
        (1, 1), (1, 2), (2, 1), (3, 1)
      )
87
88
      libType <- Seq("paired", "single", "mixed")
    } yield makeSamplesConfig(sampleNum, libNum, libType)
bow's avatar
bow committed
89

90
91
    val strandProtocols = Array("non_specific", "dutp")
    // get all possible combinations of expression measures
bow's avatar
bow committed
92
    val expressionMeasures = validExpressionMeasures
93
94
      //.subsets
      //.map(_.toList)
bow's avatar
bow committed
95
96
      .toArray

97
    for {
98
      sampleConfig <- sampleConfigs.toArray
99
100
      expressionMeasure <- expressionMeasures
      strandProtocol <- strandProtocols
101
    } yield Array(sampleConfig, List(expressionMeasure), strandProtocol)
bow's avatar
bow committed
102
103
  }

104
105
  @Test(dataProvider = "expMeasuresstrandProtocol")
  def testGentrap(sampleConfig: SamplesConfig, expMeasures: List[String], strandProtocol: String) = {
106
107
108
109
110
111
112
113

    val settings = Map(
      "output_dir" -> GentrapTest.outputDir,
      "gsnap" -> Map("db" -> "test", "dir" -> "test"),
      "aligner" -> "gsnap",
      "expression_measures" -> expMeasures,
      "strand_protocol" -> strandProtocol
    )
114
    val config = ConfigUtils.mergeMaps(settings ++ sampleConfig, Map(GentrapTest.executables.toSeq: _*))
115
    val gentrap: Gentrap = initPipeline(config)
bow's avatar
bow committed
116
117

    gentrap.script()
118
    val functions = gentrap.functions.groupBy(_.getClass)
bow's avatar
bow committed
119
    val numSamples = sampleConfig("samples").size
bow's avatar
bow committed
120

121
    functions(classOf[Gsnap]).size should be >= 1
122

bow's avatar
bow committed
123
124
125
    if (expMeasures.contains("fragments_per_gene")) {
      gentrap.functions
        .collect { case x: HtseqCount => x.output.toString.endsWith(".fragments_per_gene") }.size shouldBe numSamples
126
    }
bow's avatar
bow committed
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155

    if (expMeasures.contains("fragments_per_exon")) {
      gentrap.functions
        .collect { case x: HtseqCount => x.output.toString.endsWith(".fragments_per_exon") }.size shouldBe numSamples
    }

    if (expMeasures.contains("bases_per_gene")) {
      gentrap.functions
        .collect { case x: AggrBaseCount => x.output.toString.endsWith(".bases_per_gene") }.size shouldBe numSamples
    }

    if (expMeasures.contains("bases_per_exon")) {
      gentrap.functions
        .collect { case x: AggrBaseCount => x.output.toString.endsWith(".bases_per_exon") }.size shouldBe numSamples
    }

    if (expMeasures.contains("cufflinks_strict")) {
      gentrap.functions
        .collect {
          case x: Cufflinks => x.outputGenesFpkm.getParentFile.toString.endsWith("cufflinks_strict")
          case x: Ln => x.output.toString.endsWith(".genes_fpkm_cufflinks_strict") ||
            x.output.toString.endsWith(".isoforms_fpkm_cufflinks_strict")
        }
        .count(identity) shouldBe numSamples * 3 // three types of jobs per sample
    }

    if (expMeasures.contains("cufflinks_guided")) {
      gentrap.functions
        .collect {
156
157
158
159
          case x: Cufflinks => x.outputGenesFpkm.getParentFile.toString.endsWith("cufflinks_guided")
          case x: Ln => x.output.toString.endsWith(".genes_fpkm_cufflinks_guided") ||
            x.output.toString.endsWith(".isoforms_fpkm_cufflinks_guided")
        }
bow's avatar
bow committed
160
161
162
163
164
165
        .count(identity) shouldBe numSamples * 3 // three types of jobs per sample
    }

    if (expMeasures.contains("cufflinks_blind")) {
      gentrap.functions
        .collect {
166
167
168
169
          case x: Cufflinks => x.outputGenesFpkm.getParentFile.toString.endsWith("cufflinks_blind")
          case x: Ln => x.output.toString.endsWith(".genes_fpkm_cufflinks_blind") ||
            x.output.toString.endsWith(".isoforms_fpkm_cufflinks_blind")
        }
bow's avatar
bow committed
170
171
        .count(identity) shouldBe numSamples * 3 // three types of jobs per sample
    }
bow's avatar
bow committed
172
173
174
175
176
  }

  // remove temporary run directory all tests in the class have been run
  @AfterClass def removeTempOutputDir() = {
    FileUtils.deleteDirectory(GentrapTest.outputDir)
177
178
  }
}
bow's avatar
bow committed
179
180
181

object GentrapTest {
  val outputDir = Files.createTempDir()
Peter van 't Hof's avatar
Peter van 't Hof committed
182
183
184
185
186
187
  new File(outputDir, "input").mkdirs()
  def inputTouch(name: String): String = {
    val file = new File(outputDir, "input" + File.separator + name)
    Files.touch(file)
    file.getAbsolutePath
  }
bow's avatar
bow committed
188

189
190
191
192
193
194
195
196
197
198
199
  private def copyFile(name: String): Unit = {
    val is = getClass.getResourceAsStream("/" + name)
    val os = new FileOutputStream(new File(outputDir, name))
    org.apache.commons.io.IOUtils.copy(is, os)
    os.close()
  }

  copyFile("ref.fa")
  copyFile("ref.dict")
  copyFile("ref.fa.fai")

bow's avatar
bow committed
200
  val executables = Map(
201
    "reference_fasta" -> (outputDir + File.separator + "ref.fa"),
Peter van 't Hof's avatar
Peter van 't Hof committed
202
    "refFlat" -> "test",
bow's avatar
bow committed
203
204
205
    "annotation_gtf" -> "test",
    "annotation_bed" -> "test",
    "annotation_refflat" -> "test",
206
    "varscan_jar" -> "test"
bow's avatar
bow committed
207
208
209
210
  ) ++ Seq(
      // fastqc executables
      "fastqc", "seqtk", "sickle", "cutadapt",
      // mapping executables
bow's avatar
bow committed
211
      "star", "bowtie", "samtools", "gsnap", "tophat",
bow's avatar
bow committed
212
      // gentrap executables
213
      "cufflinks", "htseqcount", "grep", "pdflatex", "rscript", "tabix", "bgzip", "bedtoolscoverage", "md5sum",
214
215
      // bam2wig executables
      "igvtools", "wigtobigwig"
bow's avatar
bow committed
216
217
    ).map { case exe => exe -> Map("exe" -> "test") }.toMap
}