GearsSingle.scala 7.67 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
12 13 14 15 16
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
package nl.lumc.sasc.biopet.pipelines.gears

Peter van 't Hof's avatar
Peter van 't Hof committed
17
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
18
import nl.lumc.sasc.biopet.core.BiopetQScript.InputFile
Peter van 't Hof's avatar
Peter van 't Hof committed
19 20
import nl.lumc.sasc.biopet.core.{ PipelineCommand, SampleLibraryTag }
import nl.lumc.sasc.biopet.extensions.{ Gzip, Zcat }
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.utils.Logging
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.utils.config.Configurable
24 25
import org.broadinstitute.gatk.queue.QScript

26
/**
Wai Yi Leung's avatar
Wai Yi Leung committed
27
 * Created by wyleung
28
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
29
class GearsSingle(val parent: Configurable) extends QScript with SummaryQScript with SampleLibraryTag {
30
  def this() = this(null)
31

Wai Yi Leung's avatar
Wai Yi Leung committed
32
  @Input(doc = "R1 reads in FastQ format", shortName = "R1", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
33
  var fastqR1: List[File] = Nil
34

Wai Yi Leung's avatar
Wai Yi Leung committed
35
  @Input(doc = "R2 reads in FastQ format", shortName = "R2", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
36
  var fastqR2: List[File] = Nil
37

Wai Yi Leung's avatar
Wai Yi Leung committed
38
  @Input(doc = "All unmapped reads will be extracted from this bam for analysis", shortName = "bam", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
39
  var bamFile: Option[File] = None
40

Peter van 't Hof's avatar
Peter van 't Hof committed
41 42
  @Argument(required = false)
  var outputName: String = _
43

Peter van 't Hof's avatar
Peter van 't Hof committed
44 45
  lazy val krakenScript = if (config("gears_use_kraken", default = false)) Some(new GearsKraken(this)) else None
  lazy val centrifugeScript = if (config("gears_use_centrifuge", default = true)) Some(new GearsCentrifuge(this)) else None
Peter van 't Hof's avatar
Peter van 't Hof committed
46 47
  lazy val qiimeRatx = if (config("gears_use_qiime_rtax", default = false)) Some(new GearsQiimeRtax(this)) else None
  lazy val qiimeClosed = if (config("gears_use_qiime_closed", default = false)) Some(new GearsQiimeClosed(this)) else None
Peter van 't Hof's avatar
Peter van 't Hof committed
48
  lazy val qiimeOpen = if (config("gears_use_qiime_open", default = false)) Some(new GearsQiimeOpen(this)) else None
Peter van 't Hof's avatar
Peter van 't Hof committed
49
  lazy val seqCount = if (config("gears_use_seq_count", default = false)) Some(new GearsSeqCount(this)) else None
Peter van 't Hof's avatar
Peter van 't Hof committed
50

Peter van 't Hof's avatar
Peter van 't Hof committed
51 52
  /** Executed before running the script */
  def init(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
53 54 55
    if (fastqR1.isEmpty && !bamFile.isDefined) Logging.addError("Please specify fastq-file(s) or bam file")
    if (fastqR1.nonEmpty == bamFile.isDefined) Logging.addError("Provide either a bam file or a R1/R2 file")
    if (fastqR2.nonEmpty && fastqR1.size != fastqR2.size) Logging.addError("R1 and R2 has not the same number of files")
Peter van 't Hof's avatar
Peter van 't Hof committed
56
    if (sampleId == null || sampleId == None) Logging.addError("Missing sample ID on GearsSingle module")
Peter van 't Hof's avatar
Peter van 't Hof committed
57 58

    if (outputName == null) {
Peter van 't Hof's avatar
Peter van 't Hof committed
59
      if (fastqR1.nonEmpty) outputName = fastqR1.headOption.map(_.getName
Peter van 't Hof's avatar
Peter van 't Hof committed
60 61 62 63 64
        .stripSuffix(".gz")
        .stripSuffix(".fastq")
        .stripSuffix(".fq"))
        .getOrElse("noName")
      else outputName = bamFile.map(_.getName.stripSuffix(".bam")).getOrElse("noName")
65
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
66

Peter van 't Hof's avatar
Peter van 't Hof committed
67
    if (fastqR1.nonEmpty) {
Peter van 't Hof's avatar
Peter van 't Hof committed
68 69
      fastqR1.foreach(inputFiles :+= InputFile(_))
      fastqR2.foreach(inputFiles :+= InputFile(_))
Peter van 't Hof's avatar
Peter van 't Hof committed
70
    } else bamFile.foreach(inputFiles :+= InputFile(_))
Peter van 't Hof's avatar
Peter van 't Hof committed
71
  }
72

Wai Yi Leung's avatar
Wai Yi Leung committed
73
  override def reportClass = {
74
    val gears = new GearsSingleReport(this)
Wai Yi Leung's avatar
Wai Yi Leung committed
75
    gears.outputDir = new File(outputDir, "report")
76
    gears.summaryDbFile = summaryDbFile
77 78
    sampleId.foreach(gears.args += "sampleId" -> _)
    libId.foreach(gears.args += "libId" -> _)
Wai Yi Leung's avatar
Wai Yi Leung committed
79 80
    Some(gears)
  }
81

Peter van 't Hof's avatar
Peter van 't Hof committed
82 83
  protected var skipFlexiprep: Boolean = config("skip_flexiprep", default = false)

Peter van 't Hof's avatar
Peter van 't Hof committed
84 85 86
  protected def executeFlexiprep(r1: List[File], r2: List[File]): (File, Option[File]) = {
    val read1: File = if (r1.size == 1) r1.head else {
      val outputFile = new File(outputDir, "merged.R1.fq.gz")
Peter van 't Hof's avatar
Peter van 't Hof committed
87
      add(Zcat(this, r1) | new Gzip(this) > outputFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
88 89 90 91 92
      outputFile
    }

    val read2: Option[File] = if (r2.size <= 1) r2.headOption else {
      val outputFile = new File(outputDir, "merged.R2.fq.gz")
Peter van 't Hof's avatar
Peter van 't Hof committed
93
      add(Zcat(this, r2) | new Gzip(this) > outputFile)
Peter van 't Hof's avatar
Peter van 't Hof committed
94 95 96
      Some(outputFile)
    }

97 98 99 100 101 102 103 104 105
    flexiprep.map { f =>
      f.inputR1 = read1
      f.inputR2 = read2
      f.sampleId = Some(sampleId.getOrElse("noSampleName"))
      f.libId = Some(libId.getOrElse("noLibName"))
      f.outputDir = new File(outputDir, "flexiprep")
      add(f)
      (f.fastqR1Qc, f.fastqR2Qc)
    }.getOrElse((read1, read2))
106 107
  }

108 109 110 111
  lazy protected val flexiprep: Option[Flexiprep] = if (!skipFlexiprep) {
    Some(new Flexiprep(this))
  } else None

Peter van 't Hof's avatar
Peter van 't Hof committed
112 113
  /** Method to add jobs */
  def biopetScript(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
114
    val (r1, r2): (File, Option[File]) = (fastqR1, fastqR2, bamFile) match {
Peter van 't Hof's avatar
Peter van 't Hof committed
115
      case (r1, _, _) if r1.nonEmpty => executeFlexiprep(r1, fastqR2)
Peter van 't Hof's avatar
Peter van 't Hof committed
116 117
      case (_, _, Some(bam)) =>
        val extract = new ExtractUnmappedReads(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
118
        extract.outputDir = outputDir
Peter van 't Hof's avatar
Peter van 't Hof committed
119
        extract.bamFile = bam
Peter van 't Hof's avatar
Peter van 't Hof committed
120
        extract.outputName = outputName
Peter van 't Hof's avatar
Peter van 't Hof committed
121
        add(extract)
Peter van 't Hof's avatar
Peter van 't Hof committed
122
        executeFlexiprep(extract.fastqUnmappedR1 :: Nil, extract.fastqUnmappedR2.toList)
Peter van 't Hof's avatar
Peter van 't Hof committed
123
      case _ => throw new IllegalArgumentException("Missing input files")
Peter van 't Hof's avatar
Peter van 't Hof committed
124 125
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
126 127 128
    lazy val combinedFastq = {
      r2 match {
        case Some(r2) =>
129 130 131 132 133 134
          val combineReads = new CombineReads(this)
          combineReads.outputDir = new File(outputDir, "combine_reads")
          combineReads.fastqR1 = r1
          combineReads.fastqR2 = r2
          add(combineReads)
          combineReads.combinedFastq
Peter van 't Hof's avatar
Peter van 't Hof committed
135 136 137 138
        case _ => r1
      }
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
139
    krakenScript foreach { kraken =>
Peter van 't Hof's avatar
Peter van 't Hof committed
140
      kraken.outputDir = new File(outputDir, "kraken")
141 142
      kraken.fastqR1 = r1
      kraken.fastqR2 = r2
Peter van 't Hof's avatar
Peter van 't Hof committed
143
      kraken.outputName = outputName
Peter van 't Hof's avatar
Peter van 't Hof committed
144
      add(kraken)
Peter van 't Hof's avatar
Peter van 't Hof committed
145 146
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
147 148 149 150 151 152 153 154
    centrifugeScript foreach { centrifuge =>
      centrifuge.outputDir = new File(outputDir, "centrifuge")
      centrifuge.fastqR1 = r1
      centrifuge.fastqR2 = r2
      centrifuge.outputName = outputName
      add(centrifuge)
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
155
    qiimeRatx foreach { qiimeRatx =>
Peter van 't Hof's avatar
Peter van 't Hof committed
156
      qiimeRatx.outputDir = new File(outputDir, "qiime_rtax")
157 158
      qiimeRatx.fastqR1 = r1
      qiimeRatx.fastqR2 = r2
Peter van 't Hof's avatar
Peter van 't Hof committed
159
      add(qiimeRatx)
Peter van 't Hof's avatar
Peter van 't Hof committed
160
    }
161

Peter van 't Hof's avatar
Peter van 't Hof committed
162
    qiimeClosed foreach { qiimeClosed =>
Peter van 't Hof's avatar
Peter van 't Hof committed
163
      qiimeClosed.outputDir = new File(outputDir, "qiime_closed")
Peter van 't Hof's avatar
Peter van 't Hof committed
164
      qiimeClosed.fastqInput = combinedFastq
Peter van 't Hof's avatar
Peter van 't Hof committed
165
      add(qiimeClosed)
166 167
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
168 169 170 171 172 173
    qiimeOpen foreach { qiimeOpen =>
      qiimeOpen.outputDir = new File(outputDir, "qiime_open")
      qiimeOpen.fastqInput = combinedFastq
      add(qiimeOpen)
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
174 175 176 177 178 179
    seqCount.foreach { seqCount =>
      seqCount.fastqInput = combinedFastq
      seqCount.outputDir = new File(outputDir, "seq_count")
      add(seqCount)
    }

Wai Yi Leung's avatar
Wai Yi Leung committed
180
    addSummaryJobs()
181
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
182 183

  /** Location of summary file */
Peter van 't Hof's avatar
Peter van 't Hof committed
184
  def summaryFile = new File(outputDir, sampleId.getOrElse("sampleName_unknown") + ".gears.summary.json")
Peter van 't Hof's avatar
Peter van 't Hof committed
185

186
  /** Pipeline settings shown in the summary file */
Peter van 't Hof's avatar
Peter van 't Hof committed
187 188 189 190
  def summarySettings: Map[String, Any] = Map(
    "skip_flexiprep" -> skipFlexiprep,
    "gears_use_kraken" -> krakenScript.isDefined,
    "gear_use_qiime_rtax" -> qiimeRatx.isDefined,
Peter van 't Hof's avatar
Peter van 't Hof committed
191 192
    "gear_use_qiime_closed" -> qiimeClosed.isDefined,
    "gear_use_qiime_open" -> qiimeOpen.isDefined
Peter van 't Hof's avatar
Peter van 't Hof committed
193
  )
Peter van 't Hof's avatar
Peter van 't Hof committed
194

195
  /** Statistics shown in the summary file */
Wai Yi Leung's avatar
Wai Yi Leung committed
196 197
  def summaryFiles: Map[String, File] = Map.empty ++
    (if (bamFile.isDefined) Map("input_bam" -> bamFile.get) else Map()) ++
Peter van 't Hof's avatar
Peter van 't Hof committed
198 199
    fastqR1.zipWithIndex.map(x => s"input_R1_${x._2}" -> x._1) ++
    fastqR2.zipWithIndex.map(x => s"input_R2_${x._2}" -> x._1) ++
Wai Yi Leung's avatar
Wai Yi Leung committed
200
    outputFiles
201 202 203
}

/** This object give a default main method to the pipelines */
204
object GearsSingle extends PipelineCommand