Gears.scala 8.48 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
2
3
4
5
6
7
8
9
10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
Peter van 't Hof's avatar
Peter van 't Hof committed
12
13
14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15
16
package nl.lumc.sasc.biopet.pipelines.gears

Peter van 't Hof's avatar
Peter van 't Hof committed
17
import nl.lumc.sasc.biopet.core.BiopetQScript.InputFile
Peter van 't Hof's avatar
Peter van 't Hof committed
18
import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, PipelineCommand }
19
import nl.lumc.sasc.biopet.extensions.tools.MergeOtuMaps
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.extensions.{ Gzip, Ln, Zcat }
21
import nl.lumc.sasc.biopet.extensions.qiime.MergeOtuTables
22
import nl.lumc.sasc.biopet.extensions.seqtk.SeqtkSample
23
import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
24
25
26
27
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript

/**
Peter van 't Hof's avatar
Peter van 't Hof committed
28
29
 * Created by pjvanthof on 03/12/15.
 */
30
31
32
class Gears(val root: Configurable) extends QScript with MultiSampleQScript { qscript =>
  def this() = this(null)

Peter van 't Hof's avatar
Peter van 't Hof committed
33
34
35
36
37
38
39
  override def reportClass = {
    val gearsReport = new GearsReport(this)
    gearsReport.outputDir = new File(outputDir, "report")
    gearsReport.summaryFile = summaryFile
    Some(gearsReport)
  }

40
41
  override def defaults = Map("mergeotumaps" -> Map("skip_prefix" -> "New."))

42
43
  override def fixedValues = Map("gearssingle" -> Map("skip_flexiprep" -> true))

44
45
46
47
48
49
50
51
52
53
54
55
56
  /** Init for pipeline */
  def init(): Unit = {
  }

  /** Name of summary output file */
  def summaryFile: File = new File(outputDir, "gears.summary.json")

  /** Pipeline itself */
  def biopetScript(): Unit = {
    addSamplesJobs()
    addSummaryJobs()
  }

57
  def qiimeClosedDir: Option[File] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
58
    if (samples.values.flatMap(_.gearsSingle.qiimeClosed).nonEmpty) {
59
60
      Some(new File(outputDir, "qiime_closed_reference"))
    } else None
Peter van 't Hof's avatar
Peter van 't Hof committed
61
  }
62

Peter van 't Hof's avatar
Peter van 't Hof committed
63
64
65
66
  def qiimeOpenDir: Option[File] = {
    if (samples.values.flatMap(_.gearsSingle.qiimeOpen).nonEmpty) {
      Some(new File(outputDir, "qiime_open_reference"))
    } else None
67
68
69
70
71
  }

  def qiimeClosedOtuTable: Option[File] = qiimeClosedDir.map(new File(_, "otu_table.biom"))
  def qiimeClosedOtuMap: Option[File] = qiimeClosedDir.map(new File(_, "otu_map.txt"))

Peter van 't Hof's avatar
Peter van 't Hof committed
72
73
74
  def qiimeOpenOtuTable: Option[File] = qiimeOpenDir.map(new File(_, "otu_table.biom"))
  def qiimeOpenOtuMap: Option[File] = qiimeOpenDir.map(new File(_, "otu_map.txt"))

75
  /**
Peter van 't Hof's avatar
Peter van 't Hof committed
76
77
   * Method where the multisample jobs should be added, this will be executed only when running the -sample argument is not given.
   */
78
  def addMultiSampleJobs(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
79
80
81
    val qiimeCloseds = samples.values.flatMap(_.gearsSingle.qiimeClosed).toList
    val closedOtuTables = qiimeCloseds.map(_.otuTable)
    val closedOtuMaps = qiimeCloseds.map(_.otuMap)
82
83
84
85
86
    require(closedOtuTables.size == closedOtuMaps.size)
    if (closedOtuTables.nonEmpty) {
      if (closedOtuTables.size > 1) {
        val mergeTables = new MergeOtuTables(qscript)
        mergeTables.input = closedOtuTables
87
        mergeTables.outputFile = qiimeClosedOtuTable.get
88
89
        add(mergeTables)

Peter van 't Hof's avatar
Peter van 't Hof committed
90
        val mergeMaps = new MergeOtuMaps(qscript)
91
        mergeMaps.input = closedOtuMaps
92
        mergeMaps.output = qiimeClosedOtuMap.get
93
94
95
        add(mergeMaps)

      } else {
96
97
        add(Ln(qscript, closedOtuMaps.head, qiimeClosedOtuMap.get))
        add(Ln(qscript, closedOtuTables.head, qiimeClosedOtuTable.get))
98
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
    }

    val qiimeOpens = samples.values.flatMap(_.gearsSingle.qiimeOpen).toList
    val openOtuTables = qiimeOpens.map(_.otuTable)
    val openOtuMaps = qiimeOpens.map(_.otuMap)
    require(openOtuTables.size == openOtuMaps.size)
    if (openOtuTables.nonEmpty) {
      if (openOtuTables.size > 1) {
        val mergeTables = new MergeOtuTables(qscript)
        mergeTables.input = openOtuTables
        mergeTables.outputFile = qiimeOpenOtuTable.get
        add(mergeTables)

        val mergeMaps = new MergeOtuMaps(qscript)
        mergeMaps.input = openOtuMaps
        mergeMaps.output = qiimeOpenOtuMap.get
        add(mergeMaps)
116

Peter van 't Hof's avatar
Peter van 't Hof committed
117
118
119
120
      } else {
        add(Ln(qscript, openOtuMaps.head, qiimeOpenOtuMap.get))
        add(Ln(qscript, openOtuTables.head, qiimeOpenOtuTable.get))
      }
121
122

    }
123
124
125
  }

  /**
Peter van 't Hof's avatar
Peter van 't Hof committed
126
   * Factory method for Sample class
Peter van 't Hof's avatar
Peter van 't Hof committed
127
   *
Peter van 't Hof's avatar
Peter van 't Hof committed
128
129
130
   * @param id SampleId
   * @return Sample class
   */
131
132
133
134
  def makeSample(id: String): Sample = new Sample(id)

  class Sample(sampleId: String) extends AbstractSample(sampleId) {
    /**
Peter van 't Hof's avatar
Peter van 't Hof committed
135
     * Factory method for Library class
136
137
     *
     * @param id SampleId
Peter van 't Hof's avatar
Peter van 't Hof committed
138
139
     * @return Sample class
     */
140
141
142
    def makeLibrary(id: String): Library = new Library(id)

    class Library(libId: String) extends AbstractLibrary(libId) {
143

Peter van 't Hof's avatar
Peter van 't Hof committed
144
145
146
147
148
149
150
151
152
153
154
155
156
157
      lazy val inputR1: File = config("R1")
      lazy val inputR2: Option[File] = config("R2")

      lazy val skipFlexiprep: Boolean = config("skip_flexiprep", default = false)

      lazy val flexiprep = if (skipFlexiprep) None else Some(new Flexiprep(qscript))
      flexiprep.foreach(_.sampleId = Some(sampleId))
      flexiprep.foreach(_.libId = Some(libId))
      flexiprep.foreach(_.inputR1 = inputR1)
      flexiprep.foreach(_.inputR2 = inputR2)
      flexiprep.foreach(_.outputDir = new File(libDir, "flexiprep"))

      lazy val qcR1: File = flexiprep.map(_.fastqR1Qc).getOrElse(inputR1)
      lazy val qcR2: Option[File] = flexiprep.map(_.fastqR2Qc).getOrElse(inputR2)
158

159
160
161
      val libraryGears: Boolean = config("library_gears", default = false)

      lazy val gearsSingle = if (libraryGears) Some(new GearsSingle(qscript)) else None
162
163
164

      /** Function that add library jobs */
      protected def addJobs(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
165
166
167
        inputFiles :+= InputFile(inputR1, config("R1_md5"))
        inputR2.foreach(inputFiles :+= InputFile(_, config("R2_md5")))
        flexiprep.foreach(add(_))
168

169
170
171
172
173
        gearsSingle.foreach { gs =>
          gs.sampleId = Some(sampleId)
          gs.libId = Some(libId)
          gs.outputDir = libDir

Peter van 't Hof's avatar
Peter van 't Hof committed
174
175
          gs.fastqR1 = List(addDownsample(qcR1, gs.outputDir))
          gs.fastqR2 = qcR2.map(addDownsample(_, gs.outputDir)).toList
176
177
          add(gs)
        }
178
179
180
181
182
183
184
185
186
      }

      /** Must return files to store into summary */
      def summaryFiles: Map[String, File] = Map()

      /** Must returns stats to store into summary */
      def summaryStats = Map()
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
187
188
189
    lazy val gearsSingle = new GearsSingle(qscript)
    gearsSingle.sampleId = Some(sampleId)
    gearsSingle.outputDir = sampleDir
190
191
192
193

    /** Function to add sample jobs */
    protected def addJobs(): Unit = {
      addPerLibJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
194

195
196
197
      val flexipreps = libraries.values.map(_.flexiprep).toList

      val mergeR1: File = new File(sampleDir, s"$sampleId.R1.fq.gz")
Peter van 't Hof's avatar
Peter van 't Hof committed
198
      add(Zcat(qscript, libraries.values.map(_.qcR1).toList) | new Gzip(qscript) > mergeR1)
199

Peter van 't Hof's avatar
Peter van 't Hof committed
200
      val mergeR2 = if (libraries.values.exists(_.inputR2.isDefined)) Some(new File(sampleDir, s"$sampleId.R2.fq.gz")) else None
201
      mergeR2.foreach { file =>
Peter van 't Hof's avatar
Peter van 't Hof committed
202
        add(Zcat(qscript, libraries.values.flatMap(_.qcR2).toList) | new Gzip(qscript) > file)
203
      }
204

Peter van 't Hof's avatar
Peter van 't Hof committed
205
206
      gearsSingle.fastqR1 = List(addDownsample(mergeR1, gearsSingle.outputDir))
      gearsSingle.fastqR2 = mergeR2.map(addDownsample(_, gearsSingle.outputDir)).toList
Peter van 't Hof's avatar
Peter van 't Hof committed
207
      add(gearsSingle)
208
209
    }

210
211
212
213
214
215
216
    /** Must return files to store into summary */
    def summaryFiles: Map[String, File] = Map()

    /** Must returns stats to store into summary */
    def summaryStats: Any = Map()
  }

217
  val downSample: Option[Double] = config("gears_downsample")
218
219
220
221
222
223
224
225

  def addDownsample(input: File, dir: File): File = {
    downSample match {
      case Some(x) =>
        val output = new File(dir, input.getName + ".fq.gz")
        val seqtk = new SeqtkSample(this)
        seqtk.input = input
        seqtk.sample = x
226
        add(seqtk | new Gzip(this) > output)
227
228
229
230
231
        output
      case _ => input
    }
  }

232
  /** Must return a map with used settings for this pipeline */
233
  def summarySettings: Map[String, Any] = Map("gears_downsample" -> downSample)
234
235

  /** File to put in the summary for thie pipeline */
236
  def summaryFiles: Map[String, File] = (
237
238
    qiimeOpenOtuTable.map("qiime_open_otu_table" -> _) ++
    qiimeOpenOtuMap.map("qiime_open_otu_map" -> _) ++
Peter van 't Hof's avatar
Peter van 't Hof committed
239
240
241
    qiimeClosedOtuTable.map("qiime_closed_otu_table" -> _) ++
    qiimeClosedOtuMap.map("qiime_closed_otu_map" -> _)
  ).toMap
242
243
244
}

object Gears extends PipelineCommand