Gears.scala 6.91 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
package nl.lumc.sasc.biopet.pipelines.gears

Peter van 't Hof's avatar
Peter van 't Hof committed
18
19
import nl.lumc.sasc.biopet.core.PipelineCommand
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
20
import nl.lumc.sasc.biopet.extensions.kraken.{ Kraken, KrakenReport }
Wai Yi Leung's avatar
Wai Yi Leung committed
21
import nl.lumc.sasc.biopet.extensions.picard.{ SortSam, SamToFastq }
22
import nl.lumc.sasc.biopet.extensions.sambamba.SambambaView
Wai Yi Leung's avatar
Wai Yi Leung committed
23
import nl.lumc.sasc.biopet.extensions.tools.{ KrakenReportToJson, FastqSync }
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import nl.lumc.sasc.biopet.utils.config.Configurable
25
26
import org.broadinstitute.gatk.queue.QScript

27
/**
Wai Yi Leung's avatar
Wai Yi Leung committed
28
 * Created by wyleung
29
 */
30
class Gears(val root: Configurable) extends QScript with SummaryQScript {
31
  def this() = this(null)
32

Peter van 't Hof's avatar
Peter van 't Hof committed
33
34
  @Input(shortName = "R1", required = false)
  var fastqFileR1: Option[File] = None
35

Peter van 't Hof's avatar
Peter van 't Hof committed
36
37
  @Input(shortName = "R2", required = false)
  var fastqFileR2: Option[File] = None
38

Peter van 't Hof's avatar
Peter van 't Hof committed
39
40
  @Input(doc = "From the bam all the upmapped reads are used for kraken", shortName = "bam", required = false)
  var bamFile: Option[File] = None
41

Peter van 't Hof's avatar
Peter van 't Hof committed
42
43
  @Argument(required = false)
  var outputName: String = _
44

Wai Yi Leung's avatar
Wai Yi Leung committed
45
  var GearsOutputFiles: scala.collection.mutable.Map[String, File] = scala.collection.mutable.Map.empty
46

Peter van 't Hof's avatar
Peter van 't Hof committed
47
48
49
50
51
52
53
54
55
56
57
58
  /** Executed before running the script */
  def init(): Unit = {
    require(fastqFileR1.isDefined || bamFile.isDefined, "Must define fastq file(s) or a bam file")
    require(fastqFileR1.isDefined != bamFile.isDefined, "Can't define a bam file and a R1 file")

    if (outputName == null) {
      if (fastqFileR1.isDefined) outputName = fastqFileR1.map(_.getName
        .stripSuffix(".gz")
        .stripSuffix(".fastq")
        .stripSuffix(".fq"))
        .getOrElse("noName")
      else outputName = bamFile.map(_.getName.stripSuffix(".bam")).getOrElse("noName")
59
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
60
  }
61

Wai Yi Leung's avatar
Wai Yi Leung committed
62
63
64
65
66
67
  override def reportClass = {
    val gears = new GearsReport(this)
    gears.outputDir = new File(outputDir, "report")
    gears.summaryFile = summaryFile
    Some(gears)
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
68
69
  /** Method to add jobs */
  def biopetScript(): Unit = {
70

Peter van 't Hof's avatar
Peter van 't Hof committed
71
    val fastqFiles: List[File] = bamFile.map { bamfile =>
72
73

      // sambamba view -f bam -F "unmapped or mate_is_unmapped" <alnFile> > <extracted.bam>
Wai Yi Leung's avatar
Wai Yi Leung committed
74
      val samFilterUnmapped = new SambambaView(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
75
      samFilterUnmapped.input = bamfile
76
      samFilterUnmapped.filter = Some("(unmapped or mate_is_unmapped) and not (secondary_alignment) and [XH] == null")
Peter van 't Hof's avatar
Peter van 't Hof committed
77
      samFilterUnmapped.output = new File(outputDir, s"$outputName.unmapped.bam")
78
      samFilterUnmapped.isIntermediate = false
Peter van 't Hof's avatar
Peter van 't Hof committed
79
      add(samFilterUnmapped)
80
81

      // start bam to fastq (only on unaligned reads) also extract the matesam
Wai Yi Leung's avatar
Wai Yi Leung committed
82
      val samToFastq = new SamToFastq(this)
Wai Yi Leung's avatar
Wai Yi Leung committed
83
      samToFastq.input = samFilterUnmapped.output
84
      samToFastq.stringency = Some("LENIENT")
85
86
87
      samToFastq.fastqR1 = new File(outputDir, s"$outputName.unmapped.R1.fq.gz")
      samToFastq.fastqR2 = new File(outputDir, s"$outputName.unmapped.R2.fq.gz")
      samToFastq.fastqUnpaired = new File(outputDir, s"$outputName.unmapped.singleton.fq.gz")
88
      samToFastq.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
89
      add(samToFastq)
90
91

      // sync the fastq records
Wai Yi Leung's avatar
Wai Yi Leung committed
92
      val fastqSync = new FastqSync(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
93
94
95
      fastqSync.refFastq = samToFastq.fastqR1
      fastqSync.inputFastq1 = samToFastq.fastqR1
      fastqSync.inputFastq2 = samToFastq.fastqR2
Peter van 't Hof's avatar
Peter van 't Hof committed
96
      fastqSync.outputFastq1 = new File(outputDir, s"$outputName.unmapped.R1.sync.fq.gz")
Wai Yi Leung's avatar
Wai Yi Leung committed
97
98

      // TODO: need some sanity check on whether R2 is really containing reads (e.g. Single End libraries)
Peter van 't Hof's avatar
Peter van 't Hof committed
99
100
      fastqSync.outputFastq2 = new File(outputDir, s"$outputName.unmapped.R2.sync.fq.gz")
      fastqSync.outputStats = new File(outputDir, s"$outputName.sync.stats.json")
Peter van 't Hof's avatar
Peter van 't Hof committed
101
      add(fastqSync)
102

Wai Yi Leung's avatar
Wai Yi Leung committed
103
104
105
      GearsOutputFiles += ("fastqsync_stats" -> fastqSync.outputStats)
      GearsOutputFiles += ("fastqsync_R1" -> fastqSync.outputFastq1)
      GearsOutputFiles += ("fastqsync_R2" -> fastqSync.outputFastq2)
106

Peter van 't Hof's avatar
Peter van 't Hof committed
107
108
      List(fastqSync.outputFastq1, fastqSync.outputFastq2)
    }.getOrElse(List(fastqFileR1, fastqFileR2).flatten)
109

Peter van 't Hof's avatar
Peter van 't Hof committed
110
    // start kraken
Wai Yi Leung's avatar
Wai Yi Leung committed
111
    val krakenAnalysis = new Kraken(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
112
113
    krakenAnalysis.input = fastqFiles
    krakenAnalysis.output = new File(outputDir, s"$outputName.krkn.raw")
Wai Yi Leung's avatar
Wai Yi Leung committed
114
115
116

    krakenAnalysis.paired = (fastqFiles.length == 2)

Peter van 't Hof's avatar
Peter van 't Hof committed
117
118
    krakenAnalysis.classified_out = Option(new File(outputDir, s"$outputName.krkn.classified.fastq"))
    krakenAnalysis.unclassified_out = Option(new File(outputDir, s"$outputName.krkn.unclassified.fastq"))
Peter van 't Hof's avatar
Peter van 't Hof committed
119
    add(krakenAnalysis)
120

Wai Yi Leung's avatar
Wai Yi Leung committed
121
122
123
    GearsOutputFiles += ("kraken_output_raw" -> krakenAnalysis.output)
    GearsOutputFiles += ("kraken_classified_out" -> krakenAnalysis.classified_out.getOrElse(""))
    GearsOutputFiles += ("kraken_unclassified_out" -> krakenAnalysis.unclassified_out.getOrElse(""))
124

Peter van 't Hof's avatar
Peter van 't Hof committed
125
126
    // create kraken summary file

Wai Yi Leung's avatar
Wai Yi Leung committed
127
    val krakenReport = new KrakenReport(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
128
129
130
    krakenReport.input = krakenAnalysis.output
    krakenReport.show_zeros = true
    krakenReport.output = new File(outputDir, s"$outputName.krkn.full")
Peter van 't Hof's avatar
Peter van 't Hof committed
131
    add(krakenReport)
Peter van 't Hof's avatar
Peter van 't Hof committed
132

Wai Yi Leung's avatar
Wai Yi Leung committed
133
134
    GearsOutputFiles += ("kraken_report_input" -> krakenReport.input)
    GearsOutputFiles += ("kraken_report_output" -> krakenReport.output)
135

Wai Yi Leung's avatar
Wai Yi Leung committed
136
    val krakenReportJSON = new KrakenReportToJson(this)
137
    krakenReportJSON.inputReport = krakenReport.output
138
    krakenReportJSON.output = new File(outputDir, s"$outputName.krkn.json")
139
    krakenReportJSON.skipNames = config("skipNames", default = false)
140
    add(krakenReportJSON)
Wai Yi Leung's avatar
Wai Yi Leung committed
141
    addSummarizable(krakenReportJSON, "krakenreport")
142

Wai Yi Leung's avatar
Wai Yi Leung committed
143
144
    GearsOutputFiles += ("kraken_report_json_input" -> krakenReportJSON.inputReport)
    GearsOutputFiles += ("kraken_report_json_output" -> krakenReportJSON.output)
145

Wai Yi Leung's avatar
Wai Yi Leung committed
146
    addSummaryJobs()
147
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
148
149
150
151

  /** Location of summary file */
  def summaryFile = new File(outputDir, "gears.summary.json")

152
153
154
155
  /** Pipeline settings shown in the summary file */
  def summarySettings: Map[String, Any] = Map.empty ++
    (if (bamFile.isDefined) Map("input_bam" -> bamFile.get) else Map()) ++
    (if (fastqFileR1.isDefined) Map("input_R1" -> fastqFileR1.get) else Map())
Peter van 't Hof's avatar
Peter van 't Hof committed
156

157
  /** Statistics shown in the summary file */
Wai Yi Leung's avatar
Wai Yi Leung committed
158
159
  def summaryFiles: Map[String, File] = Map.empty ++
    (if (bamFile.isDefined) Map("input_bam" -> bamFile.get) else Map()) ++
160
161
    (if (fastqFileR1.isDefined) Map("input_R1" -> fastqFileR1.get) else Map()) ++
    GearsOutputFiles
162
163
164
165
}

/** This object give a default main method to the pipelines */
object Gears extends PipelineCommand