GearsSingle.scala 5.41 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
package nl.lumc.sasc.biopet.pipelines.gears

Peter van 't Hof's avatar
Peter van 't Hof committed
18
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.core.BiopetQScript.InputFile
Peter van 't Hof's avatar
Peter van 't Hof committed
20
21
import nl.lumc.sasc.biopet.core.{ PipelineCommand, SampleLibraryTag }
import nl.lumc.sasc.biopet.extensions.kraken.{ Kraken, KrakenReport }
Wai Yi Leung's avatar
Wai Yi Leung committed
22
import nl.lumc.sasc.biopet.extensions.picard.SamToFastq
23
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsView
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import nl.lumc.sasc.biopet.extensions.seqtk.SeqtkSeq
25
import nl.lumc.sasc.biopet.extensions.tools.KrakenReportToJson
Peter van 't Hof's avatar
Peter van 't Hof committed
26
import nl.lumc.sasc.biopet.utils.Logging
Peter van 't Hof's avatar
Peter van 't Hof committed
27
import nl.lumc.sasc.biopet.utils.config.Configurable
28
29
import org.broadinstitute.gatk.queue.QScript

30
/**
Wai Yi Leung's avatar
Wai Yi Leung committed
31
 * Created by wyleung
32
 */
33
class GearsSingle(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag {
34
  def this() = this(null)
35

Wai Yi Leung's avatar
Wai Yi Leung committed
36
37
  @Input(doc = "R1 reads in FastQ format", shortName = "R1", required = false)
  var fastqR1: Option[File] = None
38

Wai Yi Leung's avatar
Wai Yi Leung committed
39
40
  @Input(doc = "R2 reads in FastQ format", shortName = "R2", required = false)
  var fastqR2: Option[File] = None
41

Wai Yi Leung's avatar
Wai Yi Leung committed
42
  @Input(doc = "All unmapped reads will be extracted from this bam for analysis", shortName = "bam", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
43
  var bamFile: Option[File] = None
44

Peter van 't Hof's avatar
Peter van 't Hof committed
45
46
  @Argument(required = false)
  var outputName: String = _
47

Peter van 't Hof's avatar
Peter van 't Hof committed
48
  var gearsUseKraken: Boolean = config("gears_use_kraken", default = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
49
  var gearsUserQiimeRtax: Boolean = config("gear_use_qiime_rtax", default = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
50

Peter van 't Hof's avatar
Peter van 't Hof committed
51
52
  /** Executed before running the script */
  def init(): Unit = {
53
    require(fastqR1.isDefined || bamFile.isDefined, "Please specify fastq-file(s) or bam file")
Peter van 't Hof's avatar
Peter van 't Hof committed
54
    require(fastqR1.isDefined != bamFile.isDefined, "Provide either a bam file or a R1/R2 file")
Peter van 't Hof's avatar
Peter van 't Hof committed
55
56

    if (outputName == null) {
Wai Yi Leung's avatar
Wai Yi Leung committed
57
      if (fastqR1.isDefined) outputName = fastqR1.map(_.getName
Peter van 't Hof's avatar
Peter van 't Hof committed
58
59
60
61
62
        .stripSuffix(".gz")
        .stripSuffix(".fastq")
        .stripSuffix(".fq"))
        .getOrElse("noName")
      else outputName = bamFile.map(_.getName.stripSuffix(".bam")).getOrElse("noName")
63
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
64
65
66
67

    if (fastqR1.isDefined) {
      fastqR1.foreach(inputFiles :+= InputFile(_))
      fastqR2.foreach(inputFiles :+= InputFile(_))
Peter van 't Hof's avatar
Peter van 't Hof committed
68
    } else inputFiles :+= InputFile(bamFile.get)
Peter van 't Hof's avatar
Peter van 't Hof committed
69
  }
70

Wai Yi Leung's avatar
Wai Yi Leung committed
71
  override def reportClass = {
72
    val gears = new GearsSingleReport(this)
Wai Yi Leung's avatar
Wai Yi Leung committed
73
74
    gears.outputDir = new File(outputDir, "report")
    gears.summaryFile = summaryFile
75
76
    sampleId.foreach(gears.args += "sampleId" -> _)
    libId.foreach(gears.args += "libId" -> _)
Wai Yi Leung's avatar
Wai Yi Leung committed
77
78
    Some(gears)
  }
79

Peter van 't Hof's avatar
Peter van 't Hof committed
80
81
  /** Method to add jobs */
  def biopetScript(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
82
83
    val (r1, r2): (File, Option[File]) = (fastqR1, fastqR2, bamFile) match {
      case (Some(r1), _, _) => (r1, fastqR2)
Peter van 't Hof's avatar
Peter van 't Hof committed
84
85
      case (_, _, Some(bam)) =>
        val extract = new ExtractUnmappedReads(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
86
        extract.outputDir = outputDir
Peter van 't Hof's avatar
Peter van 't Hof committed
87
        extract.bamFile = bam
Peter van 't Hof's avatar
Peter van 't Hof committed
88
        extract.outputName = outputName
Peter van 't Hof's avatar
Peter van 't Hof committed
89
90
91
92
        extract.init()
        extract.biopetScript()
        addAll(extract.functions)
        (extract.fastqUnmappedR1, Some(extract.fastqUnmappedR2))
Peter van 't Hof's avatar
Peter van 't Hof committed
93
      case _ => throw new IllegalArgumentException("Missing input files")
Peter van 't Hof's avatar
Peter van 't Hof committed
94
95
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
96
97
    lazy val fastaR1 = fastqToFasta(r1, outputName + ".R1")
    lazy val fastaR2 = r2.map(fastqToFasta(_, outputName + ".R2"))
Peter van 't Hof's avatar
Peter van 't Hof committed
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112

    if (gearsUseKraken) {
      val kraken = new GearsKraken(this)
      kraken.outputDir = new File(outputDir, "kraken")
      kraken.fastqR1 = r1
      kraken.fastqR2 = r2
      kraken.outputName = outputName
      kraken.init()
      kraken.biopetScript()
      addAll(kraken.functions)
      addSummaryQScript(kraken)
    }

    if (gearsUserQiimeRtax) {
      val qiimeRatx = new GearsQiimeRatx(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
113
114
115
116
117
118
      qiimeRatx.outputDir = new File(outputDir, "qiime_ratx")
      qiimeRatx.fastaR1 = fastaR1
      qiimeRatx.fastqR2 = fastaR2
      qiimeRatx.init()
      qiimeRatx.biopetScript()
      addAll(qiimeRatx.functions)
Peter van 't Hof's avatar
Peter van 't Hof committed
119
    }
120

Wai Yi Leung's avatar
Wai Yi Leung committed
121
    addSummaryJobs()
122
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
123
124

  /** Location of summary file */
Peter van 't Hof's avatar
Peter van 't Hof committed
125
  def summaryFile = new File(outputDir, sampleId.getOrElse("sampleName_unknown") + ".gears.summary.json")
Peter van 't Hof's avatar
Peter van 't Hof committed
126

127
  /** Pipeline settings shown in the summary file */
128
  def summarySettings: Map[String, Any] = Map.empty
Peter van 't Hof's avatar
Peter van 't Hof committed
129

130
  /** Statistics shown in the summary file */
Wai Yi Leung's avatar
Wai Yi Leung committed
131
132
  def summaryFiles: Map[String, File] = Map.empty ++
    (if (bamFile.isDefined) Map("input_bam" -> bamFile.get) else Map()) ++
Wai Yi Leung's avatar
Wai Yi Leung committed
133
134
    (if (fastqR1.isDefined) Map("input_R1" -> fastqR1.get) else Map()) ++
    outputFiles
Peter van 't Hof's avatar
Peter van 't Hof committed
135
136
137
138
139
140
141
142
143
144
145
146

  def fastqToFasta(file: File, name: String): File = {
    val seqtk = new SeqtkSeq(this) {
      override def configName = "seqtkseq"
      override def fixedValues = Map("A" -> true)
    }
    seqtk.input = file
    seqtk.output = new File(outputDir, name + ".fasta")
    seqtk.isIntermediate = true
    add(seqtk)
    seqtk.output
  }
147
148
149
}

/** This object give a default main method to the pipelines */
150
object GearsSingle extends PipelineCommand