Gears.scala 9.35 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
/**
2
3
4
5
6
7
8
9
10
11
12
13
14
  * Biopet is built on top of GATK Queue for building bioinformatic
  * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
  * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
  * should also be able to execute Biopet tools and pipelines.
  *
  * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
  *
  * Contact us at: sasc@lumc.nl
  *
  * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
  * license; For commercial users or users who do not want to follow the AGPL
  * license, please contact us to obtain a separate license.
  */
15
16
package nl.lumc.sasc.biopet.pipelines.gears

Peter van 't Hof's avatar
Peter van 't Hof committed
17
import nl.lumc.sasc.biopet.core.BiopetQScript.InputFile
18
import nl.lumc.sasc.biopet.core.{MultiSampleQScript, PipelineCommand}
19
import nl.lumc.sasc.biopet.extensions.tools.MergeOtuMaps
20
21
import nl.lumc.sasc.biopet.extensions.{Cat, Gzip, Ln, Zcat}
import nl.lumc.sasc.biopet.extensions.qiime.{MergeOtuTables, PickOpenReferenceOtus}
22
import nl.lumc.sasc.biopet.extensions.seqtk.SeqtkSample
23
import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
24
import nl.lumc.sasc.biopet.utils.Logging
25
26
27
28
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript

/**
29
30
  * Created by pjvanthof on 03/12/15.
  */
Peter van 't Hof's avatar
Peter van 't Hof committed
31
class Gears(val parent: Configurable) extends QScript with MultiSampleQScript { qscript =>
32
33
  def this() = this(null)

Peter van 't Hof's avatar
Peter van 't Hof committed
34
  override def reportClass: Some[GearsReport] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
35
36
    val gearsReport = new GearsReport(this)
    gearsReport.outputDir = new File(outputDir, "report")
37
    gearsReport.summaryDbFile = summaryDbFile
Peter van 't Hof's avatar
Peter van 't Hof committed
38
39
40
    Some(gearsReport)
  }

41
42
  override def defaults = Map("mergeotumaps" -> Map("skip_prefix" -> "New."))

43
44
  override def fixedValues = Map("gearssingle" -> Map("skip_flexiprep" -> true))

45
46
47
  val qiimeMultisampleOpenReference: Boolean =
    config("qiime_multisample_open_reference", default = false)

48
  /** Init for pipeline */
49
  def init(): Unit = {}
50
51
52
53

  /** Pipeline itself */
  def biopetScript(): Unit = {
    addSamplesJobs()
54
55
56
57
58

    if (qiimeMultisampleOpenReference && !samples.exists(_._2.gearsSingle.qiimeOpen.isDefined))
      Logging.addError(
        "You selected 'qiime_multisample_open_reference' but qiime_open_reference is not enabled")

59
60
61
    addSummaryJobs()
  }

62
  def qiimeClosedDir: Option[File] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
63
    if (samples.values.flatMap(_.gearsSingle.qiimeClosed).nonEmpty) {
64
65
      Some(new File(outputDir, "qiime_closed_reference"))
    } else None
Peter van 't Hof's avatar
Peter van 't Hof committed
66
  }
67

Peter van 't Hof's avatar
Peter van 't Hof committed
68
69
70
71
  def qiimeOpenDir: Option[File] = {
    if (samples.values.flatMap(_.gearsSingle.qiimeOpen).nonEmpty) {
      Some(new File(outputDir, "qiime_open_reference"))
    } else None
72
73
74
75
76
  }

  def qiimeClosedOtuTable: Option[File] = qiimeClosedDir.map(new File(_, "otu_table.biom"))
  def qiimeClosedOtuMap: Option[File] = qiimeClosedDir.map(new File(_, "otu_map.txt"))

Peter van 't Hof's avatar
Peter van 't Hof committed
77
78
79
  def qiimeOpenOtuTable: Option[File] = qiimeOpenDir.map(new File(_, "otu_table.biom"))
  def qiimeOpenOtuMap: Option[File] = qiimeOpenDir.map(new File(_, "otu_map.txt"))

80
  /**
81
82
    * Method where the multisample jobs should be added, this will be executed only when running the -sample argument is not given.
    */
83
  def addMultiSampleJobs(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
84
85
86
    val qiimeCloseds = samples.values.flatMap(_.gearsSingle.qiimeClosed).toList
    val closedOtuTables = qiimeCloseds.map(_.otuTable)
    val closedOtuMaps = qiimeCloseds.map(_.otuMap)
87
88
89
90
91
    require(closedOtuTables.size == closedOtuMaps.size)
    if (closedOtuTables.nonEmpty) {
      if (closedOtuTables.size > 1) {
        val mergeTables = new MergeOtuTables(qscript)
        mergeTables.input = closedOtuTables
92
        mergeTables.outputFile = qiimeClosedOtuTable.get
93
94
        add(mergeTables)

Peter van 't Hof's avatar
Peter van 't Hof committed
95
        val mergeMaps = new MergeOtuMaps(qscript)
96
        mergeMaps.input = closedOtuMaps
97
        mergeMaps.output = qiimeClosedOtuMap.get
98
99
100
        add(mergeMaps)

      } else {
101
102
        add(Ln(qscript, closedOtuMaps.head, qiimeClosedOtuMap.get))
        add(Ln(qscript, closedOtuTables.head, qiimeClosedOtuTable.get))
103
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
    }

    val qiimeOpens = samples.values.flatMap(_.gearsSingle.qiimeOpen).toList
    val openOtuTables = qiimeOpens.map(_.otuTable)
    val openOtuMaps = qiimeOpens.map(_.otuMap)
    require(openOtuTables.size == openOtuMaps.size)
    if (openOtuTables.nonEmpty) {
      if (openOtuTables.size > 1) {
        val mergeTables = new MergeOtuTables(qscript)
        mergeTables.input = openOtuTables
        mergeTables.outputFile = qiimeOpenOtuTable.get
        add(mergeTables)

        val mergeMaps = new MergeOtuMaps(qscript)
        mergeMaps.input = openOtuMaps
        mergeMaps.output = qiimeOpenOtuMap.get
        add(mergeMaps)
121

Peter van 't Hof's avatar
Peter van 't Hof committed
122
123
124
125
      } else {
        add(Ln(qscript, openOtuMaps.head, qiimeOpenOtuMap.get))
        add(Ln(qscript, openOtuTables.head, qiimeOpenOtuTable.get))
      }
126
127

    }
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142

    if (qiimeMultisampleOpenReference) {
      val dir = new File(outputDir, "qiime_open_reference_multisample")

      val cat = new Cat(this)
      cat.input = samples.flatMap(_._2.gearsSingle.qiimeOpen).map(_.fastaInput).toList
      cat.output = new File(dir, "combined.fna")
      add(cat)

      val openReference = new PickOpenReferenceOtus(this)
      openReference.inputFasta = cat.output
      openReference.outputDir = new File(dir, "pick_open_reference_otus")
      add(openReference)

    }
143
144
145
  }

  /**
146
147
148
149
150
    * Factory method for Sample class
    *
    * @param id SampleId
    * @return Sample class
    */
151
152
153
  def makeSample(id: String): Sample = new Sample(id)

  class Sample(sampleId: String) extends AbstractSample(sampleId) {
154

155
    /**
156
157
158
159
160
      * Factory method for Library class
      *
      * @param id SampleId
      * @return Sample class
      */
161
162
163
    def makeLibrary(id: String): Library = new Library(id)

    class Library(libId: String) extends AbstractLibrary(libId) {
164

Peter van 't Hof's avatar
Peter van 't Hof committed
165
166
167
168
169
      lazy val inputR1: File = config("R1")
      lazy val inputR2: Option[File] = config("R2")

      lazy val skipFlexiprep: Boolean = config("skip_flexiprep", default = false)

Peter van 't Hof's avatar
Peter van 't Hof committed
170
171
      lazy val flexiprep: Option[Flexiprep] =
        if (skipFlexiprep) None else Some(new Flexiprep(qscript))
Peter van 't Hof's avatar
Peter van 't Hof committed
172
173
174
175
176
177
178
179
      flexiprep.foreach(_.sampleId = Some(sampleId))
      flexiprep.foreach(_.libId = Some(libId))
      flexiprep.foreach(_.inputR1 = inputR1)
      flexiprep.foreach(_.inputR2 = inputR2)
      flexiprep.foreach(_.outputDir = new File(libDir, "flexiprep"))

      lazy val qcR1: File = flexiprep.map(_.fastqR1Qc).getOrElse(inputR1)
      lazy val qcR2: Option[File] = flexiprep.map(_.fastqR2Qc).getOrElse(inputR2)
180

181
182
      val libraryGears: Boolean = config("library_gears", default = false)

Peter van 't Hof's avatar
Peter van 't Hof committed
183
184
      lazy val gearsSingle: Option[GearsSingle] =
        if (libraryGears) Some(new GearsSingle(qscript)) else None
185
186
187

      /** Function that add library jobs */
      protected def addJobs(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
188
189
190
        inputFiles :+= InputFile(inputR1, config("R1_md5"))
        inputR2.foreach(inputFiles :+= InputFile(_, config("R2_md5")))
        flexiprep.foreach(add(_))
191

192
193
194
195
196
        gearsSingle.foreach { gs =>
          gs.sampleId = Some(sampleId)
          gs.libId = Some(libId)
          gs.outputDir = libDir

Peter van 't Hof's avatar
Peter van 't Hof committed
197
198
          gs.fastqR1 = List(addDownsample(qcR1, gs.outputDir))
          gs.fastqR2 = qcR2.map(addDownsample(_, gs.outputDir)).toList
199
200
          add(gs)
        }
201
202
203
204
205
206
207
208
209
      }

      /** Must return files to store into summary */
      def summaryFiles: Map[String, File] = Map()

      /** Must returns stats to store into summary */
      def summaryStats = Map()
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
210
211
212
    lazy val gearsSingle = new GearsSingle(qscript)
    gearsSingle.sampleId = Some(sampleId)
    gearsSingle.outputDir = sampleDir
213
214
215
216

    /** Function to add sample jobs */
    protected def addJobs(): Unit = {
      addPerLibJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
217

218
      val mergeR1: File = new File(sampleDir, s"$sampleId.R1.fq.gz")
Peter van 't Hof's avatar
Peter van 't Hof committed
219
      add(Zcat(qscript, libraries.values.map(_.qcR1).toList) | new Gzip(qscript) > mergeR1)
220

221
222
223
224
      val mergeR2 =
        if (libraries.values.exists(_.inputR2.isDefined))
          Some(new File(sampleDir, s"$sampleId.R2.fq.gz"))
        else None
225
      mergeR2.foreach { file =>
Peter van 't Hof's avatar
Peter van 't Hof committed
226
        add(Zcat(qscript, libraries.values.flatMap(_.qcR2).toList) | new Gzip(qscript) > file)
227
      }
228

Peter van 't Hof's avatar
Peter van 't Hof committed
229
230
      gearsSingle.fastqR1 = List(addDownsample(mergeR1, gearsSingle.outputDir))
      gearsSingle.fastqR2 = mergeR2.map(addDownsample(_, gearsSingle.outputDir)).toList
Peter van 't Hof's avatar
Peter van 't Hof committed
231
      add(gearsSingle)
232
233
    }

234
235
236
237
238
239
240
    /** Must return files to store into summary */
    def summaryFiles: Map[String, File] = Map()

    /** Must returns stats to store into summary */
    def summaryStats: Any = Map()
  }

241
  val downSample: Option[Double] = config("gears_downsample")
242
243
244
245
246
247
248
249

  def addDownsample(input: File, dir: File): File = {
    downSample match {
      case Some(x) =>
        val output = new File(dir, input.getName + ".fq.gz")
        val seqtk = new SeqtkSample(this)
        seqtk.input = input
        seqtk.sample = x
250
        add(seqtk | new Gzip(this) > output)
251
252
253
254
255
        output
      case _ => input
    }
  }

256
  /** Must return a map with used settings for this pipeline */
257
  def summarySettings: Map[String, Any] = Map("gears_downsample" -> downSample)
258
259

  /** File to put in the summary for thie pipeline */
260
261
262
263
264
265
266
  def summaryFiles: Map[String, File] =
    (
      qiimeOpenOtuTable.map("qiime_open_otu_table" -> _) ++
        qiimeOpenOtuMap.map("qiime_open_otu_map" -> _) ++
        qiimeClosedOtuTable.map("qiime_closed_otu_table" -> _) ++
        qiimeClosedOtuMap.map("qiime_closed_otu_map" -> _)
    ).toMap
267
268
}

269
object Gears extends PipelineCommand