Yamsvp.scala 5.57 KB
Newer Older
wyleung's avatar
wyleung committed
1
2
3
4
5
6
7
8
9
10
/*
 * Structural variation calling
 */

package nl.lumc.sasc.biopet.pipelines.yamsvp

import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.core.MultiSampleQScript
import nl.lumc.sasc.biopet.core.PipelineCommand

11
12
13
14
import nl.lumc.sasc.biopet.extensions.sambamba.{ SambambaIndex, SambambaMerge }
import nl.lumc.sasc.biopet.extensions.svcallers.pindel.Pindel
import nl.lumc.sasc.biopet.extensions.svcallers.{ Breakdancer, Clever }

wyleung's avatar
wyleung committed
15
16
17
18
19

import nl.lumc.sasc.biopet.pipelines.mapping.Mapping

import org.broadinstitute.gatk.queue.QScript
import org.broadinstitute.gatk.queue.function._
wyleung's avatar
wyleung committed
20
21
import org.broadinstitute.gatk.queue.engine.JobRunInfo

wyleung's avatar
wyleung committed
22
23
24
25
26
class Yamsvp(val root: Configurable) extends QScript with MultiSampleQScript {
  def this() = this(null)

  var reference: File = _
  var finalBamFiles: List[File] = Nil
27

wyleung's avatar
wyleung committed
28
29
30
31
32
33
34
35
36
  class LibraryOutput extends AbstractLibraryOutput {
    var mappedBamFile: File = _
  }

  class SampleOutput extends AbstractSampleOutput {
    var vcf: Map[String, List[File]] = Map()
    var mappedBamFile: File = _
  }

37
  override def init() {
wyleung's avatar
wyleung committed
38
39
40
41
42
43
44
45
46
47
48
49
50
    for (file <- configfiles) globalConfig.loadConfigFile(file)
    reference = config("reference", required = true)
    if (outputDir == null)
      throw new IllegalStateException("Output directory is not specified in the config / argument")
    else if (!outputDir.endsWith("/"))
      outputDir += "/"
  }

  def biopetScript() {
    // write the pipeline here
    // start with QC, alignment, call sambamba, call sv callers, reporting

    // read config and set all parameters for the pipeline
51
    logger.info("Starting YAM SV Pipeline")
wyleung's avatar
wyleung committed
52
    runSamplesJobs
53
    //
wyleung's avatar
wyleung committed
54
55

  }
56

wyleung's avatar
wyleung committed
57
  override def onExecutionDone(jobs: Map[QFunction, JobRunInfo], success: Boolean) {
58
    logger.info("YAM SV Pipeline has run .......................")
wyleung's avatar
wyleung committed
59
  }
60

wyleung's avatar
wyleung committed
61
62
  def runSingleSampleJobs(sampleConfig: Map[String, Any]): SampleOutput = {
    val sampleOutput = new SampleOutput
wyleung's avatar
wyleung committed
63
    var libraryBamfiles: List[File] = List()
wyleung's avatar
wyleung committed
64
    var outputFiles: Map[String, List[File]] = Map()
wyleung's avatar
wyleung committed
65
66
    var libraryFastqFiles: List[File] = List()
    val sampleID: String = sampleConfig("ID").toString
67
    val sampleDir: String = outputDir + sampleID + "/"
68
69
    val alignmentDir: String = sampleDir + "alignment/"

70
71
72

    val svcallingDir: String = sampleDir + "svcalls/"

wyleung's avatar
wyleung committed
73
74
75
    sampleOutput.libraries = runLibraryJobs(sampleConfig)
    for ((libraryID, libraryOutput) <- sampleOutput.libraries) {
      // this is extending the libraryBamfiles list like '~=' in D or .append in Python or .push_back in C++
76
      libraryBamfiles ++= List(libraryOutput.mappedBamFile)
wyleung's avatar
wyleung committed
77
    }
78
79
80
81
82
83
84
85
86
87

    val bamFile: File = 
      if (libraryBamfiles.size == 1) libraryBamfiles.head
      else if (libraryBamfiles.size > 1) {
        val mergeSamFiles = new SambambaMerge(root)
        mergeSamFiles.input = libraryBamfiles
        mergeSamFiles.output = alignmentDir + sampleID + ".merged.bam"
        add(mergeSamFiles)
        mergeSamFiles.output
      } else null
wyleung's avatar
wyleung committed
88
89
90
      
    val bamIndex = SambambaIndex(root, bamFile)
    add(bamIndex)
91
    
92
    /// bamfile will be used as input for the SV callers. First run Clever
93
94
95
    //    val cleverVCF : File = sampleDir + "/" + sampleID + ".clever.vcf"

    val cleverDir = svcallingDir + sampleID + ".clever/"
96
    val clever = Clever(this, bamFile, this.reference, svcallingDir, cleverDir)
wyleung's avatar
wyleung committed
97
    clever.deps = List(bamIndex.output)
wyleung's avatar
wyleung committed
98
    sampleOutput.vcf += ("clever" -> List(clever.outputvcf))
99
100
    add(clever)

101
    val breakdancerDir = svcallingDir + sampleID + ".breakdancer/"
102
    val breakdancer = Breakdancer(this, bamFile, this.reference, breakdancerDir)
wyleung's avatar
Renames    
wyleung committed
103
    sampleOutput.vcf += ("breakdancer" -> List(breakdancer.outputvcf))
104
    addAll(breakdancer.functions)
105
106

    // for pindel we should use per library config collected into one config file
107
108
109
110
111
112
//    val pindelDir = svcallingDir + sampleID + ".pindel/"
//    val pindel = Pindel(this, bamFile, this.reference, pindelDir)
//    sampleOutput.vcf += ("pindel" -> List(pindel.outputvcf))
//    addAll(pindel.functions)
//
//    
wyleung's avatar
wyleung committed
113
    return sampleOutput
wyleung's avatar
wyleung committed
114
  }
115

wyleung's avatar
wyleung committed
116
  // Called for each run from a sample
117

wyleung's avatar
wyleung committed
118
119
  def runSingleLibraryJobs(runConfig: Map[String, Any], sampleConfig: Map[String, Any]): LibraryOutput = {
    val libraryOutput = new LibraryOutput
120
    
wyleung's avatar
wyleung committed
121
122
    val runID: String = runConfig("ID").toString
    val sampleID: String = sampleConfig("ID").toString
123
124
    val alignmentDir: String = outputDir + sampleID + "/alignment/"
    val runDir: String = alignmentDir + "run_" + runID + "/"
125
    
wyleung's avatar
wyleung committed
126
    if (runConfig.contains("R1")) {
wyleung's avatar
wyleung committed
127
      val mapping = new Mapping(this)
128

wyleung's avatar
wyleung committed
129
      mapping.defaultAligner = "stampy"
130
      mapping.skipFlexiprep = false
131
      mapping.skipMarkduplicates = true // we do the dedup marking using Sambamba
132

wyleung's avatar
wyleung committed
133
134
135
      if (runConfig.contains("R1")) mapping.input_R1 = new File(runConfig("R1").toString)
      if (runConfig.contains("R2")) mapping.input_R2 = new File(runConfig("R2").toString)
      mapping.paired = (mapping.input_R2 != null)
wyleung's avatar
wyleung committed
136
137
138
139
140
141
      mapping.RGLB = runConfig("ID").toString
      mapping.RGSM = sampleConfig("ID").toString
      if (runConfig.contains("PL")) mapping.RGPL = runConfig("PL").toString
      if (runConfig.contains("PU")) mapping.RGPU = runConfig("PU").toString
      if (runConfig.contains("CN")) mapping.RGCN = runConfig("CN").toString
      mapping.outputDir = runDir
wyleung's avatar
wyleung committed
142

wyleung's avatar
wyleung committed
143
144
145
      mapping.init
      mapping.biopetScript
      addAll(mapping.functions)
146

wyleung's avatar
wyleung committed
147
      // start sambamba dedup
148

wyleung's avatar
wyleung committed
149
      libraryOutput.mappedBamFile = mapping.outputFiles("finalBamFile")
wyleung's avatar
wyleung committed
150
    } else this.logger.error("Sample: " + sampleID + ": No R1 found for run: " + runConfig)
wyleung's avatar
wyleung committed
151
    return libraryOutput
152
153
    //    logger.debug(outputFiles)
    //    return outputFiles
wyleung's avatar
wyleung committed
154
155
156
  }
}

157
object Yamsvp extends PipelineCommand