Yamsvp.scala 7.31 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
wyleung's avatar
wyleung committed
16
17
18
19
20
21
22
/*
 * Structural variation calling
 */

package nl.lumc.sasc.biopet.pipelines.yamsvp

import nl.lumc.sasc.biopet.core.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.core.{ BiopetQScript, MultiSampleQScript, PipelineCommand }
wyleung's avatar
wyleung committed
24

25
import nl.lumc.sasc.biopet.extensions.Ln
26
import nl.lumc.sasc.biopet.extensions.igvtools.IGVToolsCount
27
import nl.lumc.sasc.biopet.extensions.sambamba.{ SambambaIndex, SambambaMerge, SambambaMarkdup }
28
import nl.lumc.sasc.biopet.extensions.svcallers.pindel.Pindel
29
import nl.lumc.sasc.biopet.extensions.svcallers.{ Breakdancer, Delly, CleverCaller }
Wai Yi Leung's avatar
Wai Yi Leung committed
30
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
31

wyleung's avatar
wyleung committed
32
33
34
35
import nl.lumc.sasc.biopet.pipelines.mapping.Mapping

import org.broadinstitute.gatk.queue.QScript
import org.broadinstitute.gatk.queue.function._
wyleung's avatar
wyleung committed
36
37
import org.broadinstitute.gatk.queue.engine.JobRunInfo

Peter van 't Hof's avatar
Peter van 't Hof committed
38
class Yamsvp(val root: Configurable) extends QScript with BiopetQScript { //with MultiSampleQScript {
wyleung's avatar
wyleung committed
39
40
  def this() = this(null)

Peter van 't Hof's avatar
Peter van 't Hof committed
41
  var reference: File = config("reference", required = true)
wyleung's avatar
wyleung committed
42
  var finalBamFiles: List[File] = Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
43
  /*
wyleung's avatar
wyleung committed
44
45
46
47
48
49
50
51
  class LibraryOutput extends AbstractLibraryOutput {
    var mappedBamFile: File = _
  }

  class SampleOutput extends AbstractSampleOutput {
    var vcf: Map[String, List[File]] = Map()
    var mappedBamFile: File = _
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
52
*/
53
  override def init() {
wyleung's avatar
wyleung committed
54
55
56
57
58
59
60
61
62
63
64
    if (outputDir == null)
      throw new IllegalStateException("Output directory is not specified in the config / argument")
    else if (!outputDir.endsWith("/"))
      outputDir += "/"
  }

  def biopetScript() {
    // write the pipeline here
    // start with QC, alignment, call sambamba, call sv callers, reporting

    // read config and set all parameters for the pipeline
65
    logger.info("Starting YAM SV Pipeline")
Peter van 't Hof's avatar
Peter van 't Hof committed
66
    //runSamplesJobs
67
    //
wyleung's avatar
wyleung committed
68
69

  }
70

wyleung's avatar
wyleung committed
71
  override def onExecutionDone(jobs: Map[QFunction, JobRunInfo], success: Boolean) {
72
    logger.info("YAM SV Pipeline has run .......................")
wyleung's avatar
wyleung committed
73
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
74
  /*
75
  def runSingleSampleJobs(sampleID: String): SampleOutput = {
wyleung's avatar
wyleung committed
76
    val sampleOutput = new SampleOutput
wyleung's avatar
wyleung committed
77
    var libraryBamfiles: List[File] = List()
wyleung's avatar
wyleung committed
78
    var outputFiles: Map[String, List[File]] = Map()
wyleung's avatar
wyleung committed
79
    var libraryFastqFiles: List[File] = List()
80
    val sampleDir: String = outputDir + sampleID + "/"
81
82
    val alignmentDir: String = sampleDir + "alignment/"

83
84
    val svcallingDir: String = sampleDir + "svcalls/"

85
    sampleOutput.libraries = runLibraryJobs(sampleID)
wyleung's avatar
wyleung committed
86
87
    for ((libraryID, libraryOutput) <- sampleOutput.libraries) {
      // this is extending the libraryBamfiles list like '~=' in D or .append in Python or .push_back in C++
88
      libraryBamfiles ++= List(libraryOutput.mappedBamFile)
wyleung's avatar
wyleung committed
89
    }
90

Peter van 't Hof's avatar
Peter van 't Hof committed
91
    val bamFile: File =
92
93
      if (libraryBamfiles.size == 1) {
        // When the sample has only 1 run, make a link in the main alignment directory
94
        val alignmentlink = Ln(this, libraryBamfiles.head,
95
          alignmentDir + sampleID + ".merged.bam", true)
Wai Yi Leung's avatar
Wai Yi Leung committed
96
        add(alignmentlink, isIntermediate=true)
97
        alignmentlink.out
98
      } else if (libraryBamfiles.size > 1) {
99
        val mergeSamFiles = new SambambaMerge(this)
100
101
        mergeSamFiles.input = libraryBamfiles
        mergeSamFiles.output = alignmentDir + sampleID + ".merged.bam"
Wai Yi Leung's avatar
Wai Yi Leung committed
102
        add(mergeSamFiles, isIntermediate=true)
103
104
        mergeSamFiles.output
      } else null
Peter van 't Hof's avatar
Peter van 't Hof committed
105

106
    val bamMarkDup = SambambaMarkdup(this, bamFile)
107
108
    add(bamMarkDup)

Wai Yi Leung's avatar
Wai Yi Leung committed
109
110
    addAll(BamMetrics(this, bamMarkDup.output, alignmentDir + "metrics/").functions)

111
112
113
114
    // create an IGV TDF file
    val tdfCount = IGVToolsCount(this, bamMarkDup.output, config("genomename", default = "hg19"))
    add(tdfCount)

115
    /// bamfile will be used as input for the SV callers. First run Clever
116
117
118
    //    val cleverVCF : File = sampleDir + "/" + sampleID + ".clever.vcf"

    val cleverDir = svcallingDir + sampleID + ".clever/"
119
    val clever = CleverCaller(this, bamMarkDup.output, this.reference, svcallingDir, cleverDir)
wyleung's avatar
wyleung committed
120
    sampleOutput.vcf += ("clever" -> List(clever.outputvcf))
121
122
    add(clever)

123
124
125
    val clever_vcf = Ln(this, clever.outputvcf, svcallingDir + sampleID + ".clever.vcf", relative = true)
    add(clever_vcf)

126
    val breakdancerDir = svcallingDir + sampleID + ".breakdancer/"
127
    val breakdancer = Breakdancer(this, bamMarkDup.output, this.reference, breakdancerDir)
wyleung's avatar
Renames    
wyleung committed
128
    sampleOutput.vcf += ("breakdancer" -> List(breakdancer.outputvcf))
129
    addAll(breakdancer.functions)
130

131
132
133
    val bd_vcf = Ln(this, breakdancer.outputvcf, svcallingDir + sampleID + ".breakdancer.vcf", relative = true)
    add(bd_vcf)

wyleung's avatar
wyleung committed
134
    val dellyDir = svcallingDir + sampleID + ".delly/"
135
    val delly = Delly(this, bamMarkDup.output, dellyDir)
wyleung's avatar
wyleung committed
136
137
138
139
140
141
    sampleOutput.vcf += ("delly" -> List(delly.outputvcf))
    addAll(delly.functions)

    val delly_vcf = Ln(this, delly.outputvcf, svcallingDir + sampleID + ".delly.vcf", relative = true)
    add(delly_vcf)

142
    // for pindel we should use per library config collected into one config file
Peter van 't Hof's avatar
Peter van 't Hof committed
143
    //    val pindelDir = svcallingDir + sampleID + ".pindel/"
144
    //    val pindel = Pindel(this, analysisBam, this.reference, pindelDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
145
146
147
    //    sampleOutput.vcf += ("pindel" -> List(pindel.outputvcf))
    //    addAll(pindel.functions)
    //
148
149
150
    //    val pindel_vcf = Ln(this, pindel.outputvcf, svcallingDir + sampleID + ".pindel.vcf", relative = true)
    //    add(pindel_vcf)
    //
wyleung's avatar
wyleung committed
151
    return sampleOutput
wyleung's avatar
wyleung committed
152
  }
153

wyleung's avatar
wyleung committed
154
  // Called for each run from a sample
155

156
  def runSingleLibraryJobs(libraryId: String, sampleID: String): LibraryOutput = {
wyleung's avatar
wyleung committed
157
    val libraryOutput = new LibraryOutput
Peter van 't Hof's avatar
Peter van 't Hof committed
158

159
    val alignmentDir: String = outputDir + sampleID + "/alignment/"
160
    val runDir: String = alignmentDir + "run_" + libraryId + "/"
Peter van 't Hof's avatar
Peter van 't Hof committed
161

162
    if (config.contains("R1")) {
wyleung's avatar
wyleung committed
163
      val mapping = new Mapping(this)
164

165
166
167
168
      // TODO: check and test config[aligner] in json
      // yamsvp/aligner -> value
      // this setting causes error if not defined?
      mapping.aligner = config("aligner", default = "bwa")
169
      mapping.skipFlexiprep = false
170
      mapping.skipMarkduplicates = true // we do the dedup marking using Sambamba
171

172
173
      mapping.input_R1 = config("R1")
      mapping.input_R2 = config("R2")
wyleung's avatar
wyleung committed
174
      mapping.paired = (mapping.input_R2 != null)
175
176
177
178
179
      mapping.RGLB = libraryId
      mapping.RGSM = sampleID
      mapping.RGPL = config("PL")
      mapping.RGPU = config("PU")
      mapping.RGCN = config("CN")
wyleung's avatar
wyleung committed
180
      mapping.outputDir = runDir
wyleung's avatar
wyleung committed
181

wyleung's avatar
wyleung committed
182
183
184
      mapping.init
      mapping.biopetScript
      addAll(mapping.functions)
185

wyleung's avatar
wyleung committed
186
      // start sambamba dedup
187

wyleung's avatar
wyleung committed
188
      libraryOutput.mappedBamFile = mapping.outputFiles("finalBamFile")
189
    } else this.logger.error("Sample: " + sampleID + ": No R1 found for library: " + libraryId)
wyleung's avatar
wyleung committed
190
    return libraryOutput
191
192
    //    logger.debug(outputFiles)
    //    return outputFiles
wyleung's avatar
wyleung committed
193
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
194
  */
wyleung's avatar
wyleung committed
195
196
}

197
object Yamsvp extends PipelineCommand