ShivaSvCalling.scala 5.38 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
2
3
4
5
6
7
8
9
10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
Peter van 't Hof's avatar
Peter van 't Hof committed
12
13
14
15
16
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
package nl.lumc.sasc.biopet.pipelines.shiva

17
import nl.lumc.sasc.biopet.core.summary.{ Summarizable, SummaryQScript }
akaljuvee's avatar
-    
akaljuvee committed
18
import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference }
19
import nl.lumc.sasc.biopet.extensions.Pysvtools
akaljuvee's avatar
akaljuvee committed
20
import nl.lumc.sasc.biopet.extensions.tools.VcfStatsForSv
Wai Yi Leung's avatar
style    
Wai Yi Leung committed
21
import nl.lumc.sasc.biopet.pipelines.shiva.svcallers._
22
import nl.lumc.sasc.biopet.utils.config.Configurable
akaljuvee's avatar
akaljuvee committed
23
import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb
24
import nl.lumc.sasc.biopet.utils.{ BamUtils, Logging }
Peter van 't Hof's avatar
Peter van 't Hof committed
25
import org.broadinstitute.gatk.queue.QScript
26

akaljuvee's avatar
akaljuvee committed
27
28
29
30
import scala.concurrent.Await
import scala.concurrent.duration.Duration
import scala.concurrent.ExecutionContext.Implicits.global

Peter van 't Hof's avatar
Peter van 't Hof committed
31
32
33
34
35
/**
 * Common trait for ShivaVariantcalling
 *
 * Created by pjvan_thof on 2/26/15.
 */
akaljuvee's avatar
-    
akaljuvee committed
36
class ShivaSvCalling(val parent: Configurable) extends QScript with SummaryQScript with Reference {
Peter van 't Hof's avatar
Peter van 't Hof committed
37
38
  qscript =>

Peter van 't Hof's avatar
Peter van 't Hof committed
39
40
  def this() = this(null)

41
42
43
  var outputMergedVCFbySample: Map[String, File] = Map()
  var outputMergedVCF: File = _

Peter van 't Hof's avatar
Peter van 't Hof committed
44
  @Input(doc = "Bam files (should be deduped bams)", shortName = "BAM", required = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
45
  protected[shiva] var inputBamsArg: List[File] = Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
46

47
  var inputBams: Map[String, File] = Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
48
49
50

  /** Executed before script */
  def init(): Unit = {
akaljuvee's avatar
akaljuvee committed
51
52
53
54
55
56
57
58
59
60
    if (inputBamsArg.nonEmpty) {
      inputBams = BamUtils.sampleBamMap(inputBamsArg)

      val db = SummaryDb.openSqliteSummary(summaryDbFile)
      for (sampleName <- inputBams.keys) {
        if (Await.result(db.getSampleId(summaryRunId, sampleName), Duration.Inf).isEmpty) {
          db.createSample(sampleName, summaryRunId)
        }
      }
    }
61
    outputMergedVCF = new File(outputDir, "allsamples.merged.vcf")
Peter van 't Hof's avatar
Peter van 't Hof committed
62
63
64
  }

  /** Variantcallers requested by the config */
Peter van 't Hof's avatar
Peter van 't Hof committed
65
  protected val configCallers: Set[String] = config("sv_callers", default = Set("breakdancer", "clever", "delly"))
Peter van 't Hof's avatar
Peter van 't Hof committed
66
67
68
69
70

  /** This will add jobs for this pipeline */
  def biopetScript(): Unit = {
    for (cal <- configCallers) {
      if (!callersList.exists(_.name == cal))
71
        Logging.addError("variantcaller '" + cal + "' does not exist, possible to use: " + callersList.map(_.name).mkString(", "))
Peter van 't Hof's avatar
Peter van 't Hof committed
72
73
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
74
    val callers = callersList.filter(x => configCallers.contains(x.name))
Peter van 't Hof's avatar
Peter van 't Hof committed
75
76

    require(inputBams.nonEmpty, "No input bams found")
77
    require(callers.nonEmpty, "Please select at least 1 SV caller, choices are: " + callersList.map(_.name).mkString(", "))
Peter van 't Hof's avatar
Peter van 't Hof committed
78

Peter van 't Hof's avatar
Peter van 't Hof committed
79
    callers.foreach { caller =>
80
      caller.inputBams = inputBams
Peter van 't Hof's avatar
Peter van 't Hof committed
81
82
83
      caller.outputDir = new File(outputDir, caller.name)
      add(caller)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
84

85
86
    // merge VCF by sample
    for ((sample, bamFile) <- inputBams) {
87
88
89
90
91
92
93
94
95
96
97
      if (callers.size > 1) {
        var sampleVCFS: List[Option[File]] = List.empty
        callers.foreach { caller =>
          sampleVCFS ::= caller.outputVCF(sample)
        }
        val mergeSVcalls = new Pysvtools(this)
        mergeSVcalls.input = sampleVCFS.flatten
        mergeSVcalls.output = new File(outputDir, sample + ".merged.vcf")
        add(mergeSVcalls)
        outputMergedVCFbySample += (sample -> mergeSVcalls.output)
      } else {
akaljuvee's avatar
-    
akaljuvee committed
98
        outputMergedVCFbySample += (sample -> callers.head.outputVCF(sample).get)
99
100
101
      }
    }

102
103
104
105
106
107
108
    if (inputBams.size > 1) {
      // merge all files from all samples in project
      val mergeSVcallsProject = new Pysvtools(this)
      mergeSVcallsProject.input = outputMergedVCFbySample.values.toList
      mergeSVcallsProject.output = outputMergedVCF
      add(mergeSVcallsProject)
    }
109
110
111
112
113
    // merging the VCF calls by project
    // basicly this will do all samples from this pipeline run
    // group by "tags"
    // sample tagging is however not available within this pipeline

114
    for ((sample, mergedResultFile) <- outputMergedVCFbySample) {
akaljuvee's avatar
akaljuvee committed
115
116
117
118
119
120
121
122
      val vcfStats = new VcfStatsForSv(qscript)
      vcfStats.inputFile = mergedResultFile
      vcfStats.outputFile = new File(outputDir, s".$sample.merged.stats")
      vcfStats.histogramBinBoundaries = ShivaSvCallingReport.histogramBinBoundaries

      add(vcfStats)
      addSummarizable(vcfStats, "vcfstats-sv", Some(sample))

123
      addSummarizable(new Summarizable {
akaljuvee's avatar
-    
akaljuvee committed
124
        def summaryFiles = Map("output_vcf" -> mergedResultFile)
akaljuvee's avatar
akaljuvee committed
125
126
        def summaryStats = Map.empty
      }, "merge_variants", Some(sample))
127
128
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
129
130
131
132
    addSummaryJobs()
  }

  /** Will generate all available variantcallers */
Wai Yi Leung's avatar
Wai Yi Leung committed
133
  protected def callersList: List[SvCaller] = List(new Breakdancer(this), new Clever(this), new Delly(this), new Pindel(this))
Peter van 't Hof's avatar
Peter van 't Hof committed
134
135

  /** Settings for the summary */
akaljuvee's avatar
akaljuvee committed
136
  def summarySettings = Map("sv_callers" -> configCallers.toList, "hist_bin_boundaries" -> ShivaSvCallingReport.histogramBinBoundaries)
Peter van 't Hof's avatar
Peter van 't Hof committed
137
138

  /** Files for the summary */
akaljuvee's avatar
-    
akaljuvee committed
139
  def summaryFiles: Map[String, File] = if (inputBams.size > 1) Map("final_mergedvcf" -> outputMergedVCF) else Map.empty
akaljuvee's avatar
-    
akaljuvee committed
140

akaljuvee's avatar
-    
akaljuvee committed
141
142
143
}

object ShivaSvCalling extends PipelineCommand