Gears.scala 6.73 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
package nl.lumc.sasc.biopet.pipelines.gears

Peter van 't Hof's avatar
Peter van 't Hof committed
18
19
import nl.lumc.sasc.biopet.core.PipelineCommand
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
20
import nl.lumc.sasc.biopet.extensions.kraken.{ Kraken, KrakenReport }
Wai Yi Leung's avatar
Wai Yi Leung committed
21
import nl.lumc.sasc.biopet.extensions.picard.{ SortSam, SamToFastq }
22
import nl.lumc.sasc.biopet.extensions.sambamba.SambambaView
Wai Yi Leung's avatar
Wai Yi Leung committed
23
import nl.lumc.sasc.biopet.extensions.tools.{ KrakenReportToJson, FastqSync }
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import nl.lumc.sasc.biopet.utils.config.Configurable
25
26
import org.broadinstitute.gatk.queue.QScript

27
28
29
30
/**
 * This is a trait for the Gears pipeline
 * The ShivaTrait is used as template for this pipeline
 */
31
32
class Gears(val root: Configurable) extends QScript with SummaryQScript {
  qscript =>
33
  def this() = this(null)
34

Peter van 't Hof's avatar
Peter van 't Hof committed
35
36
  @Input(shortName = "R1", required = false)
  var fastqFileR1: Option[File] = None
37

Peter van 't Hof's avatar
Peter van 't Hof committed
38
39
  @Input(shortName = "R2", required = false)
  var fastqFileR2: Option[File] = None
40

Peter van 't Hof's avatar
Peter van 't Hof committed
41
42
  @Input(doc = "From the bam all the upmapped reads are used for kraken", shortName = "bam", required = false)
  var bamFile: Option[File] = None
43

Peter van 't Hof's avatar
Peter van 't Hof committed
44
45
  @Argument(required = false)
  var outputName: String = _
46

47
48
  var GearsOutputFiles: Map[String, File] = Map.empty

Peter van 't Hof's avatar
Peter van 't Hof committed
49
50
51
52
53
54
55
56
57
58
59
60
  /** Executed before running the script */
  def init(): Unit = {
    require(fastqFileR1.isDefined || bamFile.isDefined, "Must define fastq file(s) or a bam file")
    require(fastqFileR1.isDefined != bamFile.isDefined, "Can't define a bam file and a R1 file")

    if (outputName == null) {
      if (fastqFileR1.isDefined) outputName = fastqFileR1.map(_.getName
        .stripSuffix(".gz")
        .stripSuffix(".fastq")
        .stripSuffix(".fq"))
        .getOrElse("noName")
      else outputName = bamFile.map(_.getName.stripSuffix(".bam")).getOrElse("noName")
61
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
62
  }
63

Peter van 't Hof's avatar
Peter van 't Hof committed
64
65
  /** Method to add jobs */
  def biopetScript(): Unit = {
66

Peter van 't Hof's avatar
Peter van 't Hof committed
67
    val fastqFiles: List[File] = bamFile.map { bamfile =>
68
69
70

      // sambamba view -f bam -F "unmapped or mate_is_unmapped" <alnFile> > <extracted.bam>
      val samFilterUnmapped = new SambambaView(qscript)
Peter van 't Hof's avatar
Peter van 't Hof committed
71
      samFilterUnmapped.input = bamfile
72
      samFilterUnmapped.filter = Some("(unmapped or mate_is_unmapped) and not (secondary_alignment)")
Peter van 't Hof's avatar
Peter van 't Hof committed
73
      samFilterUnmapped.output = new File(outputDir, s"$outputName.unmapped.bam")
74
      samFilterUnmapped.isIntermediate = false
Peter van 't Hof's avatar
Peter van 't Hof committed
75
      add(samFilterUnmapped)
76
77

      // start bam to fastq (only on unaligned reads) also extract the matesam
78
      val samToFastq = new SamToFastq(qscript)
Wai Yi Leung's avatar
Wai Yi Leung committed
79
      samToFastq.input = samFilterUnmapped.output
80
      samToFastq.stringency = Some("LENIENT")
81
82
83
      samToFastq.fastqR1 = new File(outputDir, s"$outputName.unmapped.R1.fq.gz")
      samToFastq.fastqR2 = new File(outputDir, s"$outputName.unmapped.R2.fq.gz")
      samToFastq.fastqUnpaired = new File(outputDir, s"$outputName.unmapped.singleton.fq.gz")
84
      samToFastq.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
85
      add(samToFastq)
86
87

      // sync the fastq records
Peter van 't Hof's avatar
Peter van 't Hof committed
88
89
90
91
      val fastqSync = new FastqSync(qscript)
      fastqSync.refFastq = samToFastq.fastqR1
      fastqSync.inputFastq1 = samToFastq.fastqR1
      fastqSync.inputFastq2 = samToFastq.fastqR2
Peter van 't Hof's avatar
Peter van 't Hof committed
92
      fastqSync.outputFastq1 = new File(outputDir, s"$outputName.unmapped.R1.sync.fq.gz")
Wai Yi Leung's avatar
Wai Yi Leung committed
93
94

      // TODO: need some sanity check on whether R2 is really containing reads (e.g. Single End libraries)
Peter van 't Hof's avatar
Peter van 't Hof committed
95
96
      fastqSync.outputFastq2 = new File(outputDir, s"$outputName.unmapped.R2.sync.fq.gz")
      fastqSync.outputStats = new File(outputDir, s"$outputName.sync.stats.json")
Peter van 't Hof's avatar
Peter van 't Hof committed
97
      add(fastqSync)
98

99
100
101
102
      GearsOutputFiles ++ Map("fastqsync_stats" -> fastqSync.outputStats)
      GearsOutputFiles ++ Map("fastqsync_R1" -> fastqSync.outputFastq1)
      GearsOutputFiles ++ Map("fastqsync_R2" -> fastqSync.outputFastq2)

Peter van 't Hof's avatar
Peter van 't Hof committed
103
104
      List(fastqSync.outputFastq1, fastqSync.outputFastq2)
    }.getOrElse(List(fastqFileR1, fastqFileR2).flatten)
105

Peter van 't Hof's avatar
Peter van 't Hof committed
106
107
108
109
    // start kraken
    val krakenAnalysis = new Kraken(qscript)
    krakenAnalysis.input = fastqFiles
    krakenAnalysis.output = new File(outputDir, s"$outputName.krkn.raw")
Wai Yi Leung's avatar
Wai Yi Leung committed
110
111
112

    krakenAnalysis.paired = (fastqFiles.length == 2)

Peter van 't Hof's avatar
Peter van 't Hof committed
113
114
    krakenAnalysis.classified_out = Option(new File(outputDir, s"$outputName.krkn.classified.fastq"))
    krakenAnalysis.unclassified_out = Option(new File(outputDir, s"$outputName.krkn.unclassified.fastq"))
Peter van 't Hof's avatar
Peter van 't Hof committed
115
    add(krakenAnalysis)
116

117
118
119
120
    GearsOutputFiles ++ Map("kraken_output_raw" -> krakenAnalysis.output)
    GearsOutputFiles ++ Map("kraken_classified_out" -> krakenAnalysis.classified_out)
    GearsOutputFiles ++ Map("kraken_unclassified_out" -> krakenAnalysis.unclassified_out)

Peter van 't Hof's avatar
Peter van 't Hof committed
121
122
123
124
125
126
    // create kraken summary file

    val krakenReport = new KrakenReport(qscript)
    krakenReport.input = krakenAnalysis.output
    krakenReport.show_zeros = true
    krakenReport.output = new File(outputDir, s"$outputName.krkn.full")
Peter van 't Hof's avatar
Peter van 't Hof committed
127
    add(krakenReport)
Peter van 't Hof's avatar
Peter van 't Hof committed
128

129
130
131
    GearsOutputFiles ++ Map("kraken_report_input" -> krakenReport.input)
    GearsOutputFiles ++ Map("kraken_report_output" -> krakenReport.output)

132
    val krakenReportJSON = new KrakenReportToJson(qscript)
133
    krakenReportJSON.inputReport = krakenReport.output
134
    krakenReportJSON.output = new File(outputDir, s"$outputName.krkn.json")
135
    krakenReportJSON.skipNames = config("skipNames", default = false)
136
137
    add(krakenReportJSON)

Peter van 't Hof's avatar
Peter van 't Hof committed
138
    addSummaryJobs()
139
140
141

    GearsOutputFiles ++ Map("kraken_report_json_input" -> krakenReportJSON.inputReport)
    GearsOutputFiles ++ Map("kraken_report_json_output" -> krakenReportJSON.output)
142
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
143
144
145
146

  /** Location of summary file */
  def summaryFile = new File(outputDir, "gears.summary.json")

147
148
149
150
  /** Pipeline settings shown in the summary file */
  def summarySettings: Map[String, Any] = Map.empty ++
    (if (bamFile.isDefined) Map("input_bam" -> bamFile.get) else Map()) ++
    (if (fastqFileR1.isDefined) Map("input_R1" -> fastqFileR1.get) else Map())
Peter van 't Hof's avatar
Peter van 't Hof committed
151

152
  /** Statistics shown in the summary file */
Wai Yi Leung's avatar
Wai Yi Leung committed
153
154
  def summaryFiles: Map[String, File] = Map.empty ++
    (if (bamFile.isDefined) Map("input_bam" -> bamFile.get) else Map()) ++
155
156
    (if (fastqFileR1.isDefined) Map("input_R1" -> fastqFileR1.get) else Map()) ++
    GearsOutputFiles
157
158
159
160
}

/** This object give a default main method to the pipelines */
object Gears extends PipelineCommand