Gears.scala 4.86 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
package nl.lumc.sasc.biopet.pipelines.gears

Peter van 't Hof's avatar
Peter van 't Hof committed
18
19
import nl.lumc.sasc.biopet.core.PipelineCommand
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
20
import nl.lumc.sasc.biopet.extensions.kraken.{ Kraken, KrakenReport }
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.extensions.picard.SamToFastq
22
import nl.lumc.sasc.biopet.extensions.sambamba.SambambaView
23
import nl.lumc.sasc.biopet.extensions.tools.FastqSync
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import nl.lumc.sasc.biopet.utils.config.Configurable
25
26
import org.broadinstitute.gatk.queue.QScript

27
28
29
30
/**
 * This is a trait for the Gears pipeline
 * The ShivaTrait is used as template for this pipeline
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
31
class Gears(val root: Configurable) extends QScript with SummaryQScript { qscript =>
32
  def this() = this(null)
33

Peter van 't Hof's avatar
Peter van 't Hof committed
34
35
  @Input(shortName = "R1", required = false)
  var fastqFileR1: Option[File] = None
36

Peter van 't Hof's avatar
Peter van 't Hof committed
37
38
  @Input(shortName = "R2", required = false)
  var fastqFileR2: Option[File] = None
39

Peter van 't Hof's avatar
Peter van 't Hof committed
40
41
  @Input(doc = "From the bam all the upmapped reads are used for kraken", shortName = "bam", required = false)
  var bamFile: Option[File] = None
42

Peter van 't Hof's avatar
Peter van 't Hof committed
43
44
  @Argument(required = false)
  var outputName: String = _
45

Peter van 't Hof's avatar
Peter van 't Hof committed
46
47
48
49
50
51
52
53
54
55
56
57
  /** Executed before running the script */
  def init(): Unit = {
    require(fastqFileR1.isDefined || bamFile.isDefined, "Must define fastq file(s) or a bam file")
    require(fastqFileR1.isDefined != bamFile.isDefined, "Can't define a bam file and a R1 file")

    if (outputName == null) {
      if (fastqFileR1.isDefined) outputName = fastqFileR1.map(_.getName
        .stripSuffix(".gz")
        .stripSuffix(".fastq")
        .stripSuffix(".fq"))
        .getOrElse("noName")
      else outputName = bamFile.map(_.getName.stripSuffix(".bam")).getOrElse("noName")
58
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
59
  }
60

Peter van 't Hof's avatar
Peter van 't Hof committed
61
62
  /** Method to add jobs */
  def biopetScript(): Unit = {
63

Peter van 't Hof's avatar
Peter van 't Hof committed
64
    val fastqFiles: List[File] = bamFile.map { bamfile =>
65
66
67

      // sambamba view -f bam -F "unmapped or mate_is_unmapped" <alnFile> > <extracted.bam>
      val samFilterUnmapped = new SambambaView(qscript)
Peter van 't Hof's avatar
Peter van 't Hof committed
68
      samFilterUnmapped.input = bamfile
69
      samFilterUnmapped.filter = Some("unmapped or mate_is_unmapped")
Peter van 't Hof's avatar
Peter van 't Hof committed
70
      samFilterUnmapped.output = new File(outputDir, s"$outputName.unmapped.bam")
71
      samFilterUnmapped.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
72
      add(samFilterUnmapped)
73
74

      // start bam to fastq (only on unaligned reads) also extract the matesam
Peter van 't Hof's avatar
Peter van 't Hof committed
75
76
77
      val samToFastq = SamToFastq(qscript, samFilterUnmapped.output,
        new File(outputDir, s"$outputName.unmapped.R1.fq.gz"),
        new File(outputDir, s"$outputName.unmapped.R2.fq.gz")
78
79
80
81
82
      )
      samToFastq.isIntermediate = true
      qscript.add(samToFastq)

      // sync the fastq records
Peter van 't Hof's avatar
Peter van 't Hof committed
83
84
85
86
      val fastqSync = new FastqSync(qscript)
      fastqSync.refFastq = samToFastq.fastqR1
      fastqSync.inputFastq1 = samToFastq.fastqR1
      fastqSync.inputFastq2 = samToFastq.fastqR2
Peter van 't Hof's avatar
Peter van 't Hof committed
87
88
89
      fastqSync.outputFastq1 = new File(outputDir, s"$outputName.unmapped.R1.sync.fq.gz")
      fastqSync.outputFastq2 = new File(outputDir, s"$outputName.unmapped.R2.sync.fq.gz")
      fastqSync.outputStats = new File(outputDir, s"$outputName.sync.stats.json")
Peter van 't Hof's avatar
Peter van 't Hof committed
90
      qscript.add(fastqSync)
91

Peter van 't Hof's avatar
Peter van 't Hof committed
92
93
      List(fastqSync.outputFastq1, fastqSync.outputFastq2)
    }.getOrElse(List(fastqFileR1, fastqFileR2).flatten)
94

Peter van 't Hof's avatar
Peter van 't Hof committed
95
96
97
98
99
100
101
102
    // start kraken
    val krakenAnalysis = new Kraken(qscript)
    krakenAnalysis.input = fastqFiles
    krakenAnalysis.output = new File(outputDir, s"$outputName.krkn.raw")
    krakenAnalysis.paired = true
    krakenAnalysis.classified_out = Option(new File(outputDir, s"$outputName.krkn.classified.fastq"))
    krakenAnalysis.unclassified_out = Option(new File(outputDir, s"$outputName.krkn.unclassified.fastq"))
    qscript.add(krakenAnalysis)
103

Peter van 't Hof's avatar
Peter van 't Hof committed
104
105
106
107
108
109
110
111
112
    // create kraken summary file

    val krakenReport = new KrakenReport(qscript)
    krakenReport.input = krakenAnalysis.output
    krakenReport.show_zeros = true
    krakenReport.output = new File(outputDir, s"$outputName.krkn.full")
    qscript.add(krakenReport)

    addSummaryJobs()
113
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
114
115
116
117
118
119
120
121
122

  /** Location of summary file */
  def summaryFile = new File(outputDir, "gears.summary.json")

  /** Settings of pipeline for summary */
  def summarySettings = Map()

  /** Files for the summary */
  def summaryFiles = Map()
123
124
125
126
}

/** This object give a default main method to the pipelines */
object Gears extends PipelineCommand