Gears.scala 7.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
package nl.lumc.sasc.biopet.pipelines.gears

Peter van 't Hof's avatar
Peter van 't Hof committed
18
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
19
20
import nl.lumc.sasc.biopet.core.{PipelineCommand, SampleLibraryTag}
import nl.lumc.sasc.biopet.extensions.kraken.{Kraken, KrakenReport}
Wai Yi Leung's avatar
Wai Yi Leung committed
21
import nl.lumc.sasc.biopet.extensions.picard.SamToFastq
22
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsView
23
import nl.lumc.sasc.biopet.extensions.tools.{FastqSync, KrakenReportToJson}
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import nl.lumc.sasc.biopet.utils.config.Configurable
25
26
import org.broadinstitute.gatk.queue.QScript

27
/**
Wai Yi Leung's avatar
Wai Yi Leung committed
28
 * Created by wyleung
29
 */
30
class Gears(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag {
31
  def this() = this(null)
32

Wai Yi Leung's avatar
Wai Yi Leung committed
33
34
  @Input(doc = "R1 reads in FastQ format", shortName = "R1", required = false)
  var fastqR1: Option[File] = None
35

Wai Yi Leung's avatar
Wai Yi Leung committed
36
37
  @Input(doc = "R2 reads in FastQ format", shortName = "R2", required = false)
  var fastqR2: Option[File] = None
38

Wai Yi Leung's avatar
Wai Yi Leung committed
39
  @Input(doc = "All unmapped reads will be extracted from this bam for analysis", shortName = "bam", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
40
  var bamFile: Option[File] = None
41

Peter van 't Hof's avatar
Peter van 't Hof committed
42
43
  @Argument(required = false)
  var outputName: String = _
44

Peter van 't Hof's avatar
Peter van 't Hof committed
45
46
  /** Executed before running the script */
  def init(): Unit = {
47
48
    require(fastqR1.isDefined || bamFile.isDefined, "Please specify fastq-file(s) or bam file")
    require(fastqR1.isDefined != bamFile.isDefined, "Provide either a bam file or la R1 file")
Peter van 't Hof's avatar
Peter van 't Hof committed
49
50

    if (outputName == null) {
Wai Yi Leung's avatar
Wai Yi Leung committed
51
      if (fastqR1.isDefined) outputName = fastqR1.map(_.getName
Peter van 't Hof's avatar
Peter van 't Hof committed
52
53
54
55
56
        .stripSuffix(".gz")
        .stripSuffix(".fastq")
        .stripSuffix(".fq"))
        .getOrElse("noName")
      else outputName = bamFile.map(_.getName.stripSuffix(".bam")).getOrElse("noName")
57
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
58
  }
59

Wai Yi Leung's avatar
Wai Yi Leung committed
60
61
62
63
64
65
  override def reportClass = {
    val gears = new GearsReport(this)
    gears.outputDir = new File(outputDir, "report")
    gears.summaryFile = summaryFile
    Some(gears)
  }
66
67
68
69
70
71

  override def defaults = Map(
    "samtofastq" -> Map(
      "validationstringency" -> "LENIENT"
    )
  )
Peter van 't Hof's avatar
Peter van 't Hof committed
72
73
74
  /** Method to add jobs */
  def biopetScript(): Unit = {
    val fastqFiles: List[File] = bamFile.map { bamfile =>
75

Wai Yi Leung's avatar
Wai Yi Leung committed
76
77
78
79
80
81
82
      //      // sambamba view -f bam -F "unmapped or mate_is_unmapped" <alnFile> > <extracted.bam>
      //      val samFilterUnmapped = new SambambaView(this)
      //      samFilterUnmapped.input = bamfile
      //      samFilterUnmapped.filter = Some("(unmapped or mate_is_unmapped) and not (secondary_alignment) and [XH] == null")
      //      samFilterUnmapped.output = new File(outputDir, s"$outputName.unmapped.bam")
      //      samFilterUnmapped.isIntermediate = false
      //      add(samFilterUnmapped)
83
84
85
86
87
88
89
90
91

      val samtoolsViewSelectUnmapped = new SamtoolsView(this)
      samtoolsViewSelectUnmapped.input = bamfile
      samtoolsViewSelectUnmapped.b = true
      samtoolsViewSelectUnmapped.output = new File(outputDir, s"$outputName.unmapped.bam")
      samtoolsViewSelectUnmapped.f = List("4")
      samtoolsViewSelectUnmapped.F = List("8")
      samtoolsViewSelectUnmapped.isIntermediate = true
      add(samtoolsViewSelectUnmapped)
92
93

      // start bam to fastq (only on unaligned reads) also extract the matesam
Wai Yi Leung's avatar
Wai Yi Leung committed
94
      val samToFastq = new SamToFastq(this)
95
      samToFastq.input = samtoolsViewSelectUnmapped.output
96
97
98
      samToFastq.fastqR1 = new File(outputDir, s"$outputName.unmapped.R1.fq.gz")
      samToFastq.fastqR2 = new File(outputDir, s"$outputName.unmapped.R2.fq.gz")
      samToFastq.fastqUnpaired = new File(outputDir, s"$outputName.unmapped.singleton.fq.gz")
99
      samToFastq.isIntermediate = true
Peter van 't Hof's avatar
Peter van 't Hof committed
100
      add(samToFastq)
101
102

      // sync the fastq records
Wai Yi Leung's avatar
Wai Yi Leung committed
103
      val fastqSync = new FastqSync(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
104
105
106
      fastqSync.refFastq = samToFastq.fastqR1
      fastqSync.inputFastq1 = samToFastq.fastqR1
      fastqSync.inputFastq2 = samToFastq.fastqR2
Peter van 't Hof's avatar
Peter van 't Hof committed
107
      fastqSync.outputFastq1 = new File(outputDir, s"$outputName.unmapped.R1.sync.fq.gz")
Wai Yi Leung's avatar
Wai Yi Leung committed
108
109

      // TODO: need some sanity check on whether R2 is really containing reads (e.g. Single End libraries)
Peter van 't Hof's avatar
Peter van 't Hof committed
110
      fastqSync.outputFastq2 = new File(outputDir, s"$outputName.unmapped.R2.sync.fq.gz")
Wai Yi Leung's avatar
Wai Yi Leung committed
111
      fastqSync.outputStats = new File(outputDir, s"$outputName.sync.stats")
Peter van 't Hof's avatar
Peter van 't Hof committed
112
      add(fastqSync)
Wai Yi Leung's avatar
Wai Yi Leung committed
113
      addSummarizable(fastqSync, "fastqsync")
114

Wai Yi Leung's avatar
Wai Yi Leung committed
115
116
117
      outputFiles += ("fastqsync_stats" -> fastqSync.outputStats)
      outputFiles += ("fastqsync_R1" -> fastqSync.outputFastq1)
      outputFiles += ("fastqsync_R2" -> fastqSync.outputFastq2)
118

Peter van 't Hof's avatar
Peter van 't Hof committed
119
      List(fastqSync.outputFastq1, fastqSync.outputFastq2)
Wai Yi Leung's avatar
Wai Yi Leung committed
120
    }.getOrElse(List(fastqR1, fastqR2).flatten)
121

Peter van 't Hof's avatar
Peter van 't Hof committed
122
    // start kraken
Wai Yi Leung's avatar
Wai Yi Leung committed
123
    val krakenAnalysis = new Kraken(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
124
125
    krakenAnalysis.input = fastqFiles
    krakenAnalysis.output = new File(outputDir, s"$outputName.krkn.raw")
Wai Yi Leung's avatar
Wai Yi Leung committed
126

Wai Yi Leung's avatar
Wai Yi Leung committed
127
    krakenAnalysis.paired = fastqFiles.length == 2
Wai Yi Leung's avatar
Wai Yi Leung committed
128

Wai Yi Leung's avatar
Wai Yi Leung committed
129
130
    krakenAnalysis.classified_out = Some(new File(outputDir, s"$outputName.krkn.classified.fastq"))
    krakenAnalysis.unclassified_out = Some(new File(outputDir, s"$outputName.krkn.unclassified.fastq"))
Peter van 't Hof's avatar
Peter van 't Hof committed
131
    add(krakenAnalysis)
132

Wai Yi Leung's avatar
Wai Yi Leung committed
133
134
135
    outputFiles += ("kraken_output_raw" -> krakenAnalysis.output)
    outputFiles += ("kraken_classified_out" -> krakenAnalysis.classified_out.getOrElse(""))
    outputFiles += ("kraken_unclassified_out" -> krakenAnalysis.unclassified_out.getOrElse(""))
136

Peter van 't Hof's avatar
Peter van 't Hof committed
137
    // create kraken summary file
Wai Yi Leung's avatar
Wai Yi Leung committed
138
    val krakenReport = new KrakenReport(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
139
140
141
    krakenReport.input = krakenAnalysis.output
    krakenReport.show_zeros = true
    krakenReport.output = new File(outputDir, s"$outputName.krkn.full")
Peter van 't Hof's avatar
Peter van 't Hof committed
142
    add(krakenReport)
Peter van 't Hof's avatar
Peter van 't Hof committed
143

Wai Yi Leung's avatar
Wai Yi Leung committed
144
145
    outputFiles += ("kraken_report_input" -> krakenReport.input)
    outputFiles += ("kraken_report_output" -> krakenReport.output)
146

Wai Yi Leung's avatar
Wai Yi Leung committed
147
    val krakenReportJSON = new KrakenReportToJson(this)
148
    krakenReportJSON.inputReport = krakenReport.output
149
    krakenReportJSON.output = new File(outputDir, s"$outputName.krkn.json")
150
    krakenReportJSON.skipNames = config("skipNames", default = false)
151
    add(krakenReportJSON)
Wai Yi Leung's avatar
Wai Yi Leung committed
152
    addSummarizable(krakenReportJSON, "krakenreport")
153

Wai Yi Leung's avatar
Wai Yi Leung committed
154
155
    outputFiles += ("kraken_report_json_input" -> krakenReportJSON.inputReport)
    outputFiles += ("kraken_report_json_output" -> krakenReportJSON.output)
156

Wai Yi Leung's avatar
Wai Yi Leung committed
157
    addSummaryJobs()
158
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
159
160

  /** Location of summary file */
161
  def summaryFile = new File(outputDir, sampleId.getOrElse("x") + "-" + libId.getOrElse("x") + ".gears.summary.json")
Peter van 't Hof's avatar
Peter van 't Hof committed
162

163
  /** Pipeline settings shown in the summary file */
164
  def summarySettings: Map[String, Any] = Map.empty
Peter van 't Hof's avatar
Peter van 't Hof committed
165

166
  /** Statistics shown in the summary file */
Wai Yi Leung's avatar
Wai Yi Leung committed
167
168
  def summaryFiles: Map[String, File] = Map.empty ++
    (if (bamFile.isDefined) Map("input_bam" -> bamFile.get) else Map()) ++
Wai Yi Leung's avatar
Wai Yi Leung committed
169
170
    (if (fastqR1.isDefined) Map("input_R1" -> fastqR1.get) else Map()) ++
    outputFiles
171
172
173
174
}

/** This object give a default main method to the pipelines */
object Gears extends PipelineCommand