Sage.scala 8.33 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
16
17
package nl.lumc.sasc.biopet.pipelines.sage

Peter van 't Hof's avatar
Peter van 't Hof committed
18
import nl.lumc.sasc.biopet.utils.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, PipelineCommand }
Peter van 't Hof's avatar
Peter van 't Hof committed
20
21
22
23
24
import nl.lumc.sasc.biopet.extensions.Cat
import nl.lumc.sasc.biopet.extensions.bedtools.BedtoolsCoverage
import nl.lumc.sasc.biopet.extensions.picard.MergeSamFiles
import nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep
import nl.lumc.sasc.biopet.pipelines.mapping.Mapping
25
26
import nl.lumc.sasc.biopet.extensions.tools.SquishBed
import nl.lumc.sasc.biopet.extensions.tools.{ BedtoolsCoverageToCounts, PrefixFastq, SageCountFastq, SageCreateLibrary, SageCreateTagCounts }
Peter van 't Hof's avatar
Peter van 't Hof committed
27
import nl.lumc.sasc.biopet.utils.ConfigUtils
Peter van 't Hof's avatar
Peter van 't Hof committed
28
29
30
import org.broadinstitute.gatk.queue.QScript

class Sage(val root: Configurable) extends QScript with MultiSampleQScript {
31
  qscript =>
Peter van 't Hof's avatar
Peter van 't Hof committed
32
33
  def this() = this(null)

Peter van 't Hof's avatar
Peter van 't Hof committed
34
  var countBed: Option[File] = config("count_bed")
Peter van 't Hof's avatar
Peter van 't Hof committed
35
  var squishedCountBed: File = null
Peter van 't Hof's avatar
Peter van 't Hof committed
36
37
  var transcriptome: Option[File] = config("transcriptome")
  var tagsLibrary: Option[File] = config("tags_library")
Peter van 't Hof's avatar
Peter van 't Hof committed
38

39
40
  override def defaults = Map(
    "bowtie" -> Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
41
42
43
44
45
46
47
48
49
50
51
52
53
      "m" -> 1,
      "k" -> 1,
      "best" -> true,
      "strata" -> true,
      "seedmms" -> 1
    ), "mapping" -> Map(
      "aligner" -> "bowtie",
      "skip_flexiprep" -> true,
      "skip_markduplicates" -> true
    ), "flexiprep" -> Map(
      "skip_clip" -> true,
      "skip_trim" -> true
    ), "strandSensitive" -> true
54
  )
Peter van 't Hof's avatar
Peter van 't Hof committed
55

56
57
  def summaryFile: File = new File(outputDir, "Sage.summary.json")

Peter van 't Hof's avatar
Peter van 't Hof committed
58
  //TODO: Add summary
59
60
  def summaryFiles: Map[String, File] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
61
  //TODO: Add summary
62
63
  def summarySettings: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
64
  def makeSample(id: String) = new Sample(id)
Peter van 't Hof's avatar
Peter van 't Hof committed
65
  class Sample(sampleId: String) extends AbstractSample(sampleId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
66
    //TODO: Add summary
67
68
    def summaryFiles: Map[String, File] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
69
    //TODO: Add summary
70
71
    def summaryStats: Map[String, Any] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
72
    def makeLibrary(id: String) = new Library(id)
73
    class Library(libId: String) extends AbstractLibrary(libId) {
Peter van 't Hof's avatar
Peter van 't Hof committed
74
      //TODO: Add summary
75
76
      def summaryFiles: Map[String, File] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
77
      //TODO: Add summary
78
79
      def summaryStats: Map[String, Any] = Map()

80
      val inputFastq: File = config("R1")
Peter van 't Hof's avatar
Peter van 't Hof committed
81
      val prefixFastq: File = createFile(".prefix.fastq")
82
83

      val flexiprep = new Flexiprep(qscript)
84
85
      flexiprep.sampleId = Some(sampleId)
      flexiprep.libId = Some(libId)
86
87

      val mapping = new Mapping(qscript)
88
89
      mapping.libId = Some(libId)
      mapping.sampleId = Some(sampleId)
90

Peter van 't Hof's avatar
Peter van 't Hof committed
91
      protected def addJobs(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
92
        inputFiles :+= new InputFile(inputFastq, config("R1_md5"))
93

Peter van 't Hof's avatar
Peter van 't Hof committed
94
        flexiprep.outputDir = new File(libDir, "flexiprep/")
95
        flexiprep.input_R1 = inputFastq
Peter van 't Hof's avatar
Peter van 't Hof committed
96
97
        flexiprep.init()
        flexiprep.biopetScript()
98
        qscript.addAll(flexiprep.functions)
99
100
101
102
103
104
105

        val flexiprepOutput = for ((key, file) <- flexiprep.outputFiles if key.endsWith("output_R1")) yield file
        val pf = new PrefixFastq(qscript)
        pf.inputFastq = flexiprepOutput.head
        pf.outputFastq = prefixFastq
        pf.prefixSeq = config("sage_tag", default = "CATG")
        pf.deps +:= flexiprep.outputFiles("fastq_input_R1")
106
        qscript.add(pf)
107
108

        mapping.input_R1 = pf.outputFastq
Peter van 't Hof's avatar
Peter van 't Hof committed
109
        mapping.outputDir = libDir
Peter van 't Hof's avatar
Peter van 't Hof committed
110
111
        mapping.init()
        mapping.biopetScript()
112
        qscript.addAll(mapping.functions)
113
114

        if (config("library_counts", default = false).asBoolean) {
115
116
          addBedtoolsCounts(mapping.finalBamFile, sampleId + "-" + libId, libDir)
          addTablibCounts(pf.outputFastq, sampleId + "-" + libId, libDir)
117
118
119
        }
      }
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
120

Peter van 't Hof's avatar
Peter van 't Hof committed
121
    protected def addJobs(): Unit = {
122
      addPerLibJobs()
Peter van 't Hof's avatar
Peter van 't Hof committed
123
      val libraryBamfiles = libraries.map(_._2.mapping.finalBamFile).toList
124
125
126
127
      val libraryFastqFiles = libraries.map(_._2.prefixFastq).toList

      val bamFile: File = if (libraryBamfiles.size == 1) libraryBamfiles.head
      else if (libraryBamfiles.size > 1) {
Peter van 't Hof's avatar
Peter van 't Hof committed
128
        val mergeSamFiles = MergeSamFiles(qscript, libraryBamfiles, new File(sampleDir, s"$sampleId.bam"))
129
        qscript.add(mergeSamFiles)
130
131
132
133
        mergeSamFiles.output
      } else null
      val fastqFile: File = if (libraryFastqFiles.size == 1) libraryFastqFiles.head
      else if (libraryFastqFiles.size > 1) {
Peter van 't Hof's avatar
Peter van 't Hof committed
134
        val cat = Cat(qscript, libraryFastqFiles, createFile(".fastq"))
135
        qscript.add(cat)
136
137
138
        cat.output
      } else null

Peter van 't Hof's avatar
Peter van 't Hof committed
139
140
      addBedtoolsCounts(bamFile, sampleId, sampleDir)
      addTablibCounts(fastqFile, sampleId, sampleDir)
141
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
142
143
144
  }

  def init() {
Peter van 't Hof's avatar
Peter van 't Hof committed
145
    if (transcriptome.isEmpty && tagsLibrary.isEmpty)
Peter van 't Hof's avatar
Peter van 't Hof committed
146
      throw new IllegalStateException("No transcriptome or taglib found")
Peter van 't Hof's avatar
Peter van 't Hof committed
147
148
    if (countBed.isEmpty)
      throw new IllegalStateException("No bedfile supplied, please add a countBed")
Peter van 't Hof's avatar
Peter van 't Hof committed
149
150
151
  }

  def biopetScript() {
Peter van 't Hof's avatar
Peter van 't Hof committed
152
153
154
    val squishBed = new SquishBed(this)
    squishBed.input = countBed.get
    squishBed.output = new File(outputDir, countBed.get.getName.stripSuffix(".bed") + ".squish.bed")
Peter van 't Hof's avatar
Peter van 't Hof committed
155
156
    add(squishBed)
    squishedCountBed = squishBed.output
Peter van 't Hof's avatar
Peter van 't Hof committed
157

Peter van 't Hof's avatar
Peter van 't Hof committed
158
    if (tagsLibrary.isEmpty) {
Peter van 't Hof's avatar
Peter van 't Hof committed
159
      val cdl = new SageCreateLibrary(this)
Peter van 't Hof's avatar
Peter van 't Hof committed
160
      cdl.input = transcriptome.get
Peter van 't Hof's avatar
Peter van 't Hof committed
161
162
163
164
      cdl.output = new File(outputDir, "taglib/tag.lib")
      cdl.noAntiTagsOutput = new File(outputDir, "taglib/no_antisense_genes.txt")
      cdl.noTagsOutput = new File(outputDir, "taglib/no_sense_genes.txt")
      cdl.allGenesOutput = new File(outputDir, "taglib/all_genes.txt")
Peter van 't Hof's avatar
Peter van 't Hof committed
165
      add(cdl)
Peter van 't Hof's avatar
Peter van 't Hof committed
166
      tagsLibrary = Some(cdl.output)
Peter van 't Hof's avatar
Peter van 't Hof committed
167
168
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
169
    addSamplesJobs()
170
171
172
  }

  def addMultiSampleJobs(): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
173
174
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
175
  def addBedtoolsCounts(bamFile: File, outputPrefix: String, outputDir: File) {
Peter van 't Hof's avatar
Peter van 't Hof committed
176
177
    val bedtoolsSense = BedtoolsCoverage(this, bamFile, squishedCountBed,
      output = Some(new File(outputDir, outputPrefix + ".genome.sense.coverage")),
Peter van 't Hof's avatar
Peter van 't Hof committed
178
179
180
      depth = false, sameStrand = true, diffStrand = false)
    val countSense = new BedtoolsCoverageToCounts(this)
    countSense.input = bedtoolsSense.output
Peter van 't Hof's avatar
Peter van 't Hof committed
181
    countSense.output = new File(outputDir, outputPrefix + ".genome.sense.counts")
Peter van 't Hof's avatar
Peter van 't Hof committed
182

Peter van 't Hof's avatar
Peter van 't Hof committed
183
184
    val bedtoolsAntisense = BedtoolsCoverage(this, bamFile, squishedCountBed,
      output = Some(new File(outputDir, outputPrefix + ".genome.antisense.coverage")),
Peter van 't Hof's avatar
Peter van 't Hof committed
185
186
187
      depth = false, sameStrand = false, diffStrand = true)
    val countAntisense = new BedtoolsCoverageToCounts(this)
    countAntisense.input = bedtoolsAntisense.output
Peter van 't Hof's avatar
Peter van 't Hof committed
188
    countAntisense.output = new File(outputDir, outputPrefix + ".genome.antisense.counts")
Peter van 't Hof's avatar
Peter van 't Hof committed
189

Peter van 't Hof's avatar
Peter van 't Hof committed
190
191
    val bedtools = BedtoolsCoverage(this, bamFile, squishedCountBed,
      output = Some(new File(outputDir, outputPrefix + ".genome.coverage")),
Peter van 't Hof's avatar
Peter van 't Hof committed
192
193
194
      depth = false, sameStrand = false, diffStrand = false)
    val count = new BedtoolsCoverageToCounts(this)
    count.input = bedtools.output
Peter van 't Hof's avatar
Peter van 't Hof committed
195
    count.output = new File(outputDir, outputPrefix + ".genome.counts")
Peter van 't Hof's avatar
Peter van 't Hof committed
196
197
198
199

    add(bedtoolsSense, countSense, bedtoolsAntisense, countAntisense, bedtools, count)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
200
  def addTablibCounts(fastq: File, outputPrefix: String, outputDir: File) {
Peter van 't Hof's avatar
Peter van 't Hof committed
201
202
    val countFastq = new SageCountFastq(this)
    countFastq.input = fastq
Peter van 't Hof's avatar
Peter van 't Hof committed
203
    countFastq.output = new File(outputDir, outputPrefix + ".raw.counts")
Peter van 't Hof's avatar
Peter van 't Hof committed
204
205
206
207
    add(countFastq)

    val createTagCounts = new SageCreateTagCounts(this)
    createTagCounts.input = countFastq.output
Peter van 't Hof's avatar
Peter van 't Hof committed
208
    createTagCounts.tagLib = tagsLibrary.get
Peter van 't Hof's avatar
Peter van 't Hof committed
209
210
211
212
    createTagCounts.countSense = new File(outputDir, outputPrefix + ".tagcount.sense.counts")
    createTagCounts.countAllSense = new File(outputDir, outputPrefix + ".tagcount.all.sense.counts")
    createTagCounts.countAntiSense = new File(outputDir, outputPrefix + ".tagcount.antisense.counts")
    createTagCounts.countAllAntiSense = new File(outputDir, outputPrefix + ".tagcount.all.antisense.counts")
Peter van 't Hof's avatar
Peter van 't Hof committed
213
214
215
216
    add(createTagCounts)
  }
}

Peter van 't Hof's avatar
Peter van 't Hof committed
217
object Sage extends PipelineCommand