Gears.scala 6.14 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
package nl.lumc.sasc.biopet.pipelines.gears

Peter van 't Hof's avatar
Peter van 't Hof committed
18
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.core.BiopetQScript.InputFile
Peter van 't Hof's avatar
Peter van 't Hof committed
20
21
import nl.lumc.sasc.biopet.core.{ PipelineCommand, SampleLibraryTag }
import nl.lumc.sasc.biopet.extensions.kraken.{ Kraken, KrakenReport }
Wai Yi Leung's avatar
Wai Yi Leung committed
22
import nl.lumc.sasc.biopet.extensions.picard.SamToFastq
23
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsView
24
import nl.lumc.sasc.biopet.extensions.tools.KrakenReportToJson
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import nl.lumc.sasc.biopet.utils.config.Configurable
26
27
import org.broadinstitute.gatk.queue.QScript

28
/**
Wai Yi Leung's avatar
Wai Yi Leung committed
29
 * Created by wyleung
30
 */
31
class Gears(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag {
32
  def this() = this(null)
33

Wai Yi Leung's avatar
Wai Yi Leung committed
34
35
  @Input(doc = "R1 reads in FastQ format", shortName = "R1", required = false)
  var fastqR1: Option[File] = None
36

Wai Yi Leung's avatar
Wai Yi Leung committed
37
38
  @Input(doc = "R2 reads in FastQ format", shortName = "R2", required = false)
  var fastqR2: Option[File] = None
39

Wai Yi Leung's avatar
Wai Yi Leung committed
40
  @Input(doc = "All unmapped reads will be extracted from this bam for analysis", shortName = "bam", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
41
  var bamFile: Option[File] = None
42

Peter van 't Hof's avatar
Peter van 't Hof committed
43
44
  @Argument(required = false)
  var outputName: String = _
45

Peter van 't Hof's avatar
Peter van 't Hof committed
46
47
  /** Executed before running the script */
  def init(): Unit = {
48
49
    require(fastqR1.isDefined || bamFile.isDefined, "Please specify fastq-file(s) or bam file")
    require(fastqR1.isDefined != bamFile.isDefined, "Provide either a bam file or la R1 file")
Peter van 't Hof's avatar
Peter van 't Hof committed
50
51

    if (outputName == null) {
Wai Yi Leung's avatar
Wai Yi Leung committed
52
      if (fastqR1.isDefined) outputName = fastqR1.map(_.getName
Peter van 't Hof's avatar
Peter van 't Hof committed
53
54
55
56
57
        .stripSuffix(".gz")
        .stripSuffix(".fastq")
        .stripSuffix(".fq"))
        .getOrElse("noName")
      else outputName = bamFile.map(_.getName.stripSuffix(".bam")).getOrElse("noName")
58
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
59
60
61
62
63
64
65

    if (fastqR1.isDefined) {
      fastqR1.foreach(inputFiles :+= InputFile(_))
      fastqR2.foreach(inputFiles :+= InputFile(_))
    } else {
      inputFiles :+= InputFile(bamFile.get)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
66
  }
67

Wai Yi Leung's avatar
Wai Yi Leung committed
68
69
70
71
72
73
  override def reportClass = {
    val gears = new GearsReport(this)
    gears.outputDir = new File(outputDir, "report")
    gears.summaryFile = summaryFile
    Some(gears)
  }
74
75
76
77
78
79

  override def defaults = Map(
    "samtofastq" -> Map(
      "validationstringency" -> "LENIENT"
    )
  )
Peter van 't Hof's avatar
Peter van 't Hof committed
80
81
82
  /** Method to add jobs */
  def biopetScript(): Unit = {
    val fastqFiles: List[File] = bamFile.map { bamfile =>
83

84
85
86
87
      val samtoolsViewSelectUnmapped = new SamtoolsView(this)
      samtoolsViewSelectUnmapped.input = bamfile
      samtoolsViewSelectUnmapped.b = true
      samtoolsViewSelectUnmapped.output = new File(outputDir, s"$outputName.unmapped.bam")
88
      samtoolsViewSelectUnmapped.f = List("12")
89
90
      samtoolsViewSelectUnmapped.isIntermediate = true
      add(samtoolsViewSelectUnmapped)
91
92

      // start bam to fastq (only on unaligned reads) also extract the matesam
Wai Yi Leung's avatar
Wai Yi Leung committed
93
      val samToFastq = new SamToFastq(this)
94
      samToFastq.input = samtoolsViewSelectUnmapped.output
95
96
97
      samToFastq.fastqR1 = new File(outputDir, s"$outputName.unmapped.R1.fq.gz")
      samToFastq.fastqR2 = new File(outputDir, s"$outputName.unmapped.R2.fq.gz")
      samToFastq.fastqUnpaired = new File(outputDir, s"$outputName.unmapped.singleton.fq.gz")
98
      samToFastq.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
99
      add(samToFastq)
100

101
      List(samToFastq.fastqR1, samToFastq.fastqR2)
Wai Yi Leung's avatar
Wai Yi Leung committed
102
    }.getOrElse(List(fastqR1, fastqR2).flatten)
103

Peter van 't Hof's avatar
Peter van 't Hof committed
104
    // start kraken
Wai Yi Leung's avatar
Wai Yi Leung committed
105
    val krakenAnalysis = new Kraken(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
106
107
    krakenAnalysis.input = fastqFiles
    krakenAnalysis.output = new File(outputDir, s"$outputName.krkn.raw")
Wai Yi Leung's avatar
Wai Yi Leung committed
108

Wai Yi Leung's avatar
Wai Yi Leung committed
109
    krakenAnalysis.paired = fastqFiles.length == 2
Wai Yi Leung's avatar
Wai Yi Leung committed
110

Wai Yi Leung's avatar
Wai Yi Leung committed
111
112
    krakenAnalysis.classified_out = Some(new File(outputDir, s"$outputName.krkn.classified.fastq"))
    krakenAnalysis.unclassified_out = Some(new File(outputDir, s"$outputName.krkn.unclassified.fastq"))
Peter van 't Hof's avatar
Peter van 't Hof committed
113
    add(krakenAnalysis)
114

Wai Yi Leung's avatar
Wai Yi Leung committed
115
116
117
    outputFiles += ("kraken_output_raw" -> krakenAnalysis.output)
    outputFiles += ("kraken_classified_out" -> krakenAnalysis.classified_out.getOrElse(""))
    outputFiles += ("kraken_unclassified_out" -> krakenAnalysis.unclassified_out.getOrElse(""))
118

Peter van 't Hof's avatar
Peter van 't Hof committed
119
    // create kraken summary file
Wai Yi Leung's avatar
Wai Yi Leung committed
120
    val krakenReport = new KrakenReport(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
121
122
123
    krakenReport.input = krakenAnalysis.output
    krakenReport.show_zeros = true
    krakenReport.output = new File(outputDir, s"$outputName.krkn.full")
Peter van 't Hof's avatar
Peter van 't Hof committed
124
    add(krakenReport)
Peter van 't Hof's avatar
Peter van 't Hof committed
125

Wai Yi Leung's avatar
Wai Yi Leung committed
126
127
    outputFiles += ("kraken_report_input" -> krakenReport.input)
    outputFiles += ("kraken_report_output" -> krakenReport.output)
128

Wai Yi Leung's avatar
Wai Yi Leung committed
129
    val krakenReportJSON = new KrakenReportToJson(this)
130
    krakenReportJSON.inputReport = krakenReport.output
131
    krakenReportJSON.output = new File(outputDir, s"$outputName.krkn.json")
132
    krakenReportJSON.skipNames = config("skipNames", default = false)
133
    add(krakenReportJSON)
Wai Yi Leung's avatar
Wai Yi Leung committed
134
    addSummarizable(krakenReportJSON, "krakenreport")
135

Wai Yi Leung's avatar
Wai Yi Leung committed
136
137
    outputFiles += ("kraken_report_json_input" -> krakenReportJSON.inputReport)
    outputFiles += ("kraken_report_json_output" -> krakenReportJSON.output)
138

Wai Yi Leung's avatar
Wai Yi Leung committed
139
    addSummaryJobs()
140
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
141
142

  /** Location of summary file */
Peter van 't Hof's avatar
Peter van 't Hof committed
143
  def summaryFile = new File(outputDir, sampleId.getOrElse("sampleName_unknown") + ".gears.summary.json")
Peter van 't Hof's avatar
Peter van 't Hof committed
144

145
  /** Pipeline settings shown in the summary file */
146
  def summarySettings: Map[String, Any] = Map.empty
Peter van 't Hof's avatar
Peter van 't Hof committed
147

148
  /** Statistics shown in the summary file */
Wai Yi Leung's avatar
Wai Yi Leung committed
149
150
  def summaryFiles: Map[String, File] = Map.empty ++
    (if (bamFile.isDefined) Map("input_bam" -> bamFile.get) else Map()) ++
Wai Yi Leung's avatar
Wai Yi Leung committed
151
152
    (if (fastqR1.isDefined) Map("input_R1" -> fastqR1.get) else Map()) ++
    outputFiles
153
154
155
156
}

/** This object give a default main method to the pipelines */
object Gears extends PipelineCommand