Gears.scala 8.49 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1 2 3 4 5 6 7 8 9 10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
Peter van 't Hof's avatar
Peter van 't Hof committed
12 13 14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15 16
package nl.lumc.sasc.biopet.pipelines.gears

Peter van 't Hof's avatar
Peter van 't Hof committed
17
import nl.lumc.sasc.biopet.core.BiopetQScript.InputFile
Peter van 't Hof's avatar
Peter van 't Hof committed
18
import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, PipelineCommand }
19
import nl.lumc.sasc.biopet.extensions.tools.MergeOtuMaps
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.extensions.{ Gzip, Ln, Zcat }
21
import nl.lumc.sasc.biopet.extensions.qiime.MergeOtuTables
22
import nl.lumc.sasc.biopet.extensions.seqtk.SeqtkSample
23
import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
24 25 26 27
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript

/**
Peter van 't Hof's avatar
Peter van 't Hof committed
28 29
 * Created by pjvanthof on 03/12/15.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
30
class Gears(val parent: Configurable) extends QScript with MultiSampleQScript { qscript =>
31 32
  def this() = this(null)

Peter van 't Hof's avatar
Peter van 't Hof committed
33 34 35
  override def reportClass = {
    val gearsReport = new GearsReport(this)
    gearsReport.outputDir = new File(outputDir, "report")
36
    gearsReport.summaryDbFile = summaryDbFile
Peter van 't Hof's avatar
Peter van 't Hof committed
37 38 39
    Some(gearsReport)
  }

40 41
  override def defaults = Map("mergeotumaps" -> Map("skip_prefix" -> "New."))

42 43
  override def fixedValues = Map("gearssingle" -> Map("skip_flexiprep" -> true))

44 45 46 47 48 49 50 51 52 53 54 55 56
  /** Init for pipeline */
  def init(): Unit = {
  }

  /** Name of summary output file */
  def summaryFile: File = new File(outputDir, "gears.summary.json")

  /** Pipeline itself */
  def biopetScript(): Unit = {
    addSamplesJobs()
    addSummaryJobs()
  }

57
  def qiimeClosedDir: Option[File] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
58
    if (samples.values.flatMap(_.gearsSingle.qiimeClosed).nonEmpty) {
59 60
      Some(new File(outputDir, "qiime_closed_reference"))
    } else None
Peter van 't Hof's avatar
Peter van 't Hof committed
61
  }
62

Peter van 't Hof's avatar
Peter van 't Hof committed
63 64 65 66
  def qiimeOpenDir: Option[File] = {
    if (samples.values.flatMap(_.gearsSingle.qiimeOpen).nonEmpty) {
      Some(new File(outputDir, "qiime_open_reference"))
    } else None
67 68 69 70 71
  }

  def qiimeClosedOtuTable: Option[File] = qiimeClosedDir.map(new File(_, "otu_table.biom"))
  def qiimeClosedOtuMap: Option[File] = qiimeClosedDir.map(new File(_, "otu_map.txt"))

Peter van 't Hof's avatar
Peter van 't Hof committed
72 73 74
  def qiimeOpenOtuTable: Option[File] = qiimeOpenDir.map(new File(_, "otu_table.biom"))
  def qiimeOpenOtuMap: Option[File] = qiimeOpenDir.map(new File(_, "otu_map.txt"))

75
  /**
Peter van 't Hof's avatar
Peter van 't Hof committed
76 77
   * Method where the multisample jobs should be added, this will be executed only when running the -sample argument is not given.
   */
78
  def addMultiSampleJobs(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
79 80 81
    val qiimeCloseds = samples.values.flatMap(_.gearsSingle.qiimeClosed).toList
    val closedOtuTables = qiimeCloseds.map(_.otuTable)
    val closedOtuMaps = qiimeCloseds.map(_.otuMap)
82 83 84 85 86
    require(closedOtuTables.size == closedOtuMaps.size)
    if (closedOtuTables.nonEmpty) {
      if (closedOtuTables.size > 1) {
        val mergeTables = new MergeOtuTables(qscript)
        mergeTables.input = closedOtuTables
87
        mergeTables.outputFile = qiimeClosedOtuTable.get
88 89
        add(mergeTables)

Peter van 't Hof's avatar
Peter van 't Hof committed
90
        val mergeMaps = new MergeOtuMaps(qscript)
91
        mergeMaps.input = closedOtuMaps
92
        mergeMaps.output = qiimeClosedOtuMap.get
93 94 95
        add(mergeMaps)

      } else {
96 97
        add(Ln(qscript, closedOtuMaps.head, qiimeClosedOtuMap.get))
        add(Ln(qscript, closedOtuTables.head, qiimeClosedOtuTable.get))
98
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
    }

    val qiimeOpens = samples.values.flatMap(_.gearsSingle.qiimeOpen).toList
    val openOtuTables = qiimeOpens.map(_.otuTable)
    val openOtuMaps = qiimeOpens.map(_.otuMap)
    require(openOtuTables.size == openOtuMaps.size)
    if (openOtuTables.nonEmpty) {
      if (openOtuTables.size > 1) {
        val mergeTables = new MergeOtuTables(qscript)
        mergeTables.input = openOtuTables
        mergeTables.outputFile = qiimeOpenOtuTable.get
        add(mergeTables)

        val mergeMaps = new MergeOtuMaps(qscript)
        mergeMaps.input = openOtuMaps
        mergeMaps.output = qiimeOpenOtuMap.get
        add(mergeMaps)
116

Peter van 't Hof's avatar
Peter van 't Hof committed
117 118 119 120
      } else {
        add(Ln(qscript, openOtuMaps.head, qiimeOpenOtuMap.get))
        add(Ln(qscript, openOtuTables.head, qiimeOpenOtuTable.get))
      }
121 122

    }
123 124 125
  }

  /**
Peter van 't Hof's avatar
Peter van 't Hof committed
126
   * Factory method for Sample class
Peter van 't Hof's avatar
Peter van 't Hof committed
127
   *
Peter van 't Hof's avatar
Peter van 't Hof committed
128 129 130
   * @param id SampleId
   * @return Sample class
   */
131 132 133 134
  def makeSample(id: String): Sample = new Sample(id)

  class Sample(sampleId: String) extends AbstractSample(sampleId) {
    /**
Peter van 't Hof's avatar
Peter van 't Hof committed
135
     * Factory method for Library class
136 137
     *
     * @param id SampleId
Peter van 't Hof's avatar
Peter van 't Hof committed
138 139
     * @return Sample class
     */
140 141 142
    def makeLibrary(id: String): Library = new Library(id)

    class Library(libId: String) extends AbstractLibrary(libId) {
143

Peter van 't Hof's avatar
Peter van 't Hof committed
144 145 146 147 148 149 150 151 152 153 154 155 156 157
      lazy val inputR1: File = config("R1")
      lazy val inputR2: Option[File] = config("R2")

      lazy val skipFlexiprep: Boolean = config("skip_flexiprep", default = false)

      lazy val flexiprep = if (skipFlexiprep) None else Some(new Flexiprep(qscript))
      flexiprep.foreach(_.sampleId = Some(sampleId))
      flexiprep.foreach(_.libId = Some(libId))
      flexiprep.foreach(_.inputR1 = inputR1)
      flexiprep.foreach(_.inputR2 = inputR2)
      flexiprep.foreach(_.outputDir = new File(libDir, "flexiprep"))

      lazy val qcR1: File = flexiprep.map(_.fastqR1Qc).getOrElse(inputR1)
      lazy val qcR2: Option[File] = flexiprep.map(_.fastqR2Qc).getOrElse(inputR2)
158

159 160 161
      val libraryGears: Boolean = config("library_gears", default = false)

      lazy val gearsSingle = if (libraryGears) Some(new GearsSingle(qscript)) else None
162 163 164

      /** Function that add library jobs */
      protected def addJobs(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
165 166 167
        inputFiles :+= InputFile(inputR1, config("R1_md5"))
        inputR2.foreach(inputFiles :+= InputFile(_, config("R2_md5")))
        flexiprep.foreach(add(_))
168

169 170 171 172 173
        gearsSingle.foreach { gs =>
          gs.sampleId = Some(sampleId)
          gs.libId = Some(libId)
          gs.outputDir = libDir

Peter van 't Hof's avatar
Peter van 't Hof committed
174 175
          gs.fastqR1 = List(addDownsample(qcR1, gs.outputDir))
          gs.fastqR2 = qcR2.map(addDownsample(_, gs.outputDir)).toList
176 177
          add(gs)
        }
178 179 180 181 182 183 184 185 186
      }

      /** Must return files to store into summary */
      def summaryFiles: Map[String, File] = Map()

      /** Must returns stats to store into summary */
      def summaryStats = Map()
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
187 188 189
    lazy val gearsSingle = new GearsSingle(qscript)
    gearsSingle.sampleId = Some(sampleId)
    gearsSingle.outputDir = sampleDir
190 191 192 193

    /** Function to add sample jobs */
    protected def addJobs(): Unit = {
      addPerLibJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
194

195 196 197
      val flexipreps = libraries.values.map(_.flexiprep).toList

      val mergeR1: File = new File(sampleDir, s"$sampleId.R1.fq.gz")
Peter van 't Hof's avatar
Peter van 't Hof committed
198
      add(Zcat(qscript, libraries.values.map(_.qcR1).toList) | new Gzip(qscript) > mergeR1)
199

Peter van 't Hof's avatar
Peter van 't Hof committed
200
      val mergeR2 = if (libraries.values.exists(_.inputR2.isDefined)) Some(new File(sampleDir, s"$sampleId.R2.fq.gz")) else None
201
      mergeR2.foreach { file =>
Peter van 't Hof's avatar
Peter van 't Hof committed
202
        add(Zcat(qscript, libraries.values.flatMap(_.qcR2).toList) | new Gzip(qscript) > file)
203
      }
204

Peter van 't Hof's avatar
Peter van 't Hof committed
205 206
      gearsSingle.fastqR1 = List(addDownsample(mergeR1, gearsSingle.outputDir))
      gearsSingle.fastqR2 = mergeR2.map(addDownsample(_, gearsSingle.outputDir)).toList
Peter van 't Hof's avatar
Peter van 't Hof committed
207
      add(gearsSingle)
208 209
    }

210 211 212 213 214 215 216
    /** Must return files to store into summary */
    def summaryFiles: Map[String, File] = Map()

    /** Must returns stats to store into summary */
    def summaryStats: Any = Map()
  }

217
  val downSample: Option[Double] = config("gears_downsample")
218 219 220 221 222 223 224 225

  def addDownsample(input: File, dir: File): File = {
    downSample match {
      case Some(x) =>
        val output = new File(dir, input.getName + ".fq.gz")
        val seqtk = new SeqtkSample(this)
        seqtk.input = input
        seqtk.sample = x
226
        add(seqtk | new Gzip(this) > output)
227 228 229 230 231
        output
      case _ => input
    }
  }

232
  /** Must return a map with used settings for this pipeline */
233
  def summarySettings: Map[String, Any] = Map("gears_downsample" -> downSample)
234 235

  /** File to put in the summary for thie pipeline */
236
  def summaryFiles: Map[String, File] = (
237 238
    qiimeOpenOtuTable.map("qiime_open_otu_table" -> _) ++
    qiimeOpenOtuMap.map("qiime_open_otu_map" -> _) ++
Peter van 't Hof's avatar
Peter van 't Hof committed
239 240 241
    qiimeClosedOtuTable.map("qiime_closed_otu_table" -> _) ++
    qiimeClosedOtuMap.map("qiime_closed_otu_map" -> _)
  ).toMap
242 243 244
}

object Gears extends PipelineCommand