Gears.scala 5.62 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
package nl.lumc.sasc.biopet.pipelines.gears

Peter van 't Hof's avatar
Peter van 't Hof committed
18
19
import nl.lumc.sasc.biopet.core.PipelineCommand
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
20
import nl.lumc.sasc.biopet.extensions.kraken.{ Kraken, KrakenReport }
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.extensions.picard.SamToFastq
22
import nl.lumc.sasc.biopet.extensions.sambamba.SambambaView
23
import nl.lumc.sasc.biopet.extensions.tools.{KrakenReportToJson, FastqSync}
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import nl.lumc.sasc.biopet.utils.config.Configurable
25
26
import org.broadinstitute.gatk.queue.QScript

27
28
29
30
/**
 * This is a trait for the Gears pipeline
 * The ShivaTrait is used as template for this pipeline
 */
31
32
class Gears(val root: Configurable) extends QScript with SummaryQScript {
  qscript =>
33
  def this() = this(null)
34

Peter van 't Hof's avatar
Peter van 't Hof committed
35
36
  @Input(shortName = "R1", required = false)
  var fastqFileR1: Option[File] = None
37

Peter van 't Hof's avatar
Peter van 't Hof committed
38
39
  @Input(shortName = "R2", required = false)
  var fastqFileR2: Option[File] = None
40

Peter van 't Hof's avatar
Peter van 't Hof committed
41
42
  @Input(doc = "From the bam all the upmapped reads are used for kraken", shortName = "bam", required = false)
  var bamFile: Option[File] = None
43

Peter van 't Hof's avatar
Peter van 't Hof committed
44
45
  @Argument(required = false)
  var outputName: String = _
46

Peter van 't Hof's avatar
Peter van 't Hof committed
47
48
49
50
51
52
53
54
55
56
57
58
  /** Executed before running the script */
  def init(): Unit = {
    require(fastqFileR1.isDefined || bamFile.isDefined, "Must define fastq file(s) or a bam file")
    require(fastqFileR1.isDefined != bamFile.isDefined, "Can't define a bam file and a R1 file")

    if (outputName == null) {
      if (fastqFileR1.isDefined) outputName = fastqFileR1.map(_.getName
        .stripSuffix(".gz")
        .stripSuffix(".fastq")
        .stripSuffix(".fq"))
        .getOrElse("noName")
      else outputName = bamFile.map(_.getName.stripSuffix(".bam")).getOrElse("noName")
59
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
60
  }
61

Peter van 't Hof's avatar
Peter van 't Hof committed
62
63
  /** Method to add jobs */
  def biopetScript(): Unit = {
64

Peter van 't Hof's avatar
Peter van 't Hof committed
65
    val fastqFiles: List[File] = bamFile.map { bamfile =>
66
67
68

      // sambamba view -f bam -F "unmapped or mate_is_unmapped" <alnFile> > <extracted.bam>
      val samFilterUnmapped = new SambambaView(qscript)
Peter van 't Hof's avatar
Peter van 't Hof committed
69
      samFilterUnmapped.input = bamfile
70
      samFilterUnmapped.filter = Some("unmapped or mate_is_unmapped")
Peter van 't Hof's avatar
Peter van 't Hof committed
71
      samFilterUnmapped.output = new File(outputDir, s"$outputName.unmapped.bam")
72
      samFilterUnmapped.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
73
      add(samFilterUnmapped)
74
75

      // start bam to fastq (only on unaligned reads) also extract the matesam
Peter van 't Hof's avatar
Peter van 't Hof committed
76
77
78
      val samToFastq = SamToFastq(qscript, samFilterUnmapped.output,
        new File(outputDir, s"$outputName.unmapped.R1.fq.gz"),
        new File(outputDir, s"$outputName.unmapped.R2.fq.gz")
79
80
      )
      samToFastq.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
81
      add(samToFastq)
82
83

      // sync the fastq records
Peter van 't Hof's avatar
Peter van 't Hof committed
84
85
86
87
      val fastqSync = new FastqSync(qscript)
      fastqSync.refFastq = samToFastq.fastqR1
      fastqSync.inputFastq1 = samToFastq.fastqR1
      fastqSync.inputFastq2 = samToFastq.fastqR2
Peter van 't Hof's avatar
Peter van 't Hof committed
88
      fastqSync.outputFastq1 = new File(outputDir, s"$outputName.unmapped.R1.sync.fq.gz")
Wai Yi Leung's avatar
Wai Yi Leung committed
89
90

      // TODO: need some sanity check on whether R2 is really containing reads (e.g. Single End libraries)
Peter van 't Hof's avatar
Peter van 't Hof committed
91
92
      fastqSync.outputFastq2 = new File(outputDir, s"$outputName.unmapped.R2.sync.fq.gz")
      fastqSync.outputStats = new File(outputDir, s"$outputName.sync.stats.json")
Peter van 't Hof's avatar
Peter van 't Hof committed
93
      add(fastqSync)
94

Peter van 't Hof's avatar
Peter van 't Hof committed
95
96
      List(fastqSync.outputFastq1, fastqSync.outputFastq2)
    }.getOrElse(List(fastqFileR1, fastqFileR2).flatten)
97

Peter van 't Hof's avatar
Peter van 't Hof committed
98
99
100
101
    // start kraken
    val krakenAnalysis = new Kraken(qscript)
    krakenAnalysis.input = fastqFiles
    krakenAnalysis.output = new File(outputDir, s"$outputName.krkn.raw")
Wai Yi Leung's avatar
Wai Yi Leung committed
102
103
104

    krakenAnalysis.paired = (fastqFiles.length == 2)

Peter van 't Hof's avatar
Peter van 't Hof committed
105
106
    krakenAnalysis.classified_out = Option(new File(outputDir, s"$outputName.krkn.classified.fastq"))
    krakenAnalysis.unclassified_out = Option(new File(outputDir, s"$outputName.krkn.unclassified.fastq"))
Peter van 't Hof's avatar
Peter van 't Hof committed
107
    add(krakenAnalysis)
108

Peter van 't Hof's avatar
Peter van 't Hof committed
109
110
111
112
113
114
    // create kraken summary file

    val krakenReport = new KrakenReport(qscript)
    krakenReport.input = krakenAnalysis.output
    krakenReport.show_zeros = true
    krakenReport.output = new File(outputDir, s"$outputName.krkn.full")
Peter van 't Hof's avatar
Peter van 't Hof committed
115
    add(krakenReport)
Peter van 't Hof's avatar
Peter van 't Hof committed
116

117
    val krakenReportJSON = new KrakenReportToJson(qscript)
118
    krakenReportJSON.inputReport = krakenAnalysis.output
119
    krakenReportJSON.output = new File(outputDir, s"$outputName.krkn.json")
120
    krakenReportJSON.skipNames = config("skipNames", default = true)
121
122
    add(krakenReportJSON)

123
124
125
126
127
//    val krakenReportJSON = new KrakenReportToJson(qscript)
//    krakenReportJSON.input = krakenReport.output
//    krakenReportJSON.output = new File(outputDir, s"$outputName.krkn.json")
//    add(krakenReportJSON)

Peter van 't Hof's avatar
Peter van 't Hof committed
128
    addSummaryJobs()
129
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
130
131
132
133
134
135
136
137

  /** Location of summary file */
  def summaryFile = new File(outputDir, "gears.summary.json")

  /** Settings of pipeline for summary */
  def summarySettings = Map()

  /** Files for the summary */
138
139
  def summaryFiles = (if (bamFile.isDefined) Map("input_bam" -> bamFile.get) else Map()) ++
    (if (fastqFileR1.isDefined) Map("input_R1" -> fastqFileR1.get) else Map())
140
141
142
143
}

/** This object give a default main method to the pipelines */
object Gears extends PipelineCommand