ShivaSvCalling.scala 5.97 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
2
3
4
5
6
7
8
9
10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
Peter van 't Hof's avatar
Peter van 't Hof committed
12
13
14
15
16
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
package nl.lumc.sasc.biopet.pipelines.shiva

17
import htsjdk.variant.vcf.VCFFileReader
18
19
import nl.lumc.sasc.biopet.core.summary.{ Summarizable, SummaryQScript }
import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference, SampleLibraryTag }
20
import nl.lumc.sasc.biopet.extensions.Pysvtools
Wai Yi Leung's avatar
style    
Wai Yi Leung committed
21
import nl.lumc.sasc.biopet.pipelines.shiva.svcallers._
22
import nl.lumc.sasc.biopet.utils.config.Configurable
23
import nl.lumc.sasc.biopet.utils.{ BamUtils, Logging }
Peter van 't Hof's avatar
Peter van 't Hof committed
24
import org.broadinstitute.gatk.queue.QScript
25

Peter van 't Hof's avatar
Peter van 't Hof committed
26
27
28
29
30
/**
 * Common trait for ShivaVariantcalling
 *
 * Created by pjvan_thof on 2/26/15.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
31
class ShivaSvCalling(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag with Reference {
Peter van 't Hof's avatar
Peter van 't Hof committed
32
33
  qscript =>

Peter van 't Hof's avatar
Peter van 't Hof committed
34
35
  def this() = this(null)

36
37
38
  var outputMergedVCFbySample: Map[String, File] = Map()
  var outputMergedVCF: File = _

Peter van 't Hof's avatar
Peter van 't Hof committed
39
  @Input(doc = "Bam files (should be deduped bams)", shortName = "BAM", required = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
40
  protected[shiva] var inputBamsArg: List[File] = Nil
Peter van 't Hof's avatar
Peter van 't Hof committed
41

42
  var inputBams: Map[String, File] = Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
43
44
45

  /** Executed before script */
  def init(): Unit = {
46
    if (inputBamsArg.nonEmpty) inputBams = BamUtils.sampleBamMap(inputBamsArg)
47
    outputMergedVCF = new File(outputDir, "allsamples.merged.vcf")
Peter van 't Hof's avatar
Peter van 't Hof committed
48
49
50
  }

  /** Variantcallers requested by the config */
Peter van 't Hof's avatar
Peter van 't Hof committed
51
  protected val configCallers: Set[String] = config("sv_callers", default = Set("breakdancer", "clever", "delly"))
Peter van 't Hof's avatar
Peter van 't Hof committed
52
53
54
55
56

  /** This will add jobs for this pipeline */
  def biopetScript(): Unit = {
    for (cal <- configCallers) {
      if (!callersList.exists(_.name == cal))
57
        Logging.addError("variantcaller '" + cal + "' does not exist, possible to use: " + callersList.map(_.name).mkString(", "))
Peter van 't Hof's avatar
Peter van 't Hof committed
58
59
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
60
    val callers = callersList.filter(x => configCallers.contains(x.name))
Peter van 't Hof's avatar
Peter van 't Hof committed
61
62

    require(inputBams.nonEmpty, "No input bams found")
63
    require(callers.nonEmpty, "Please select at least 1 SV caller, choices are: " + callersList.map(_.name).mkString(", "))
Peter van 't Hof's avatar
Peter van 't Hof committed
64

Peter van 't Hof's avatar
Peter van 't Hof committed
65
    callers.foreach { caller =>
66
      caller.inputBams = inputBams
Peter van 't Hof's avatar
Peter van 't Hof committed
67
68
69
      caller.outputDir = new File(outputDir, caller.name)
      add(caller)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
70

71
72
    // merge VCF by sample
    for ((sample, bamFile) <- inputBams) {
73
74
75
76
77
78
79
80
81
82
83
      if (callers.size > 1) {
        var sampleVCFS: List[Option[File]] = List.empty
        callers.foreach { caller =>
          sampleVCFS ::= caller.outputVCF(sample)
        }
        val mergeSVcalls = new Pysvtools(this)
        mergeSVcalls.input = sampleVCFS.flatten
        mergeSVcalls.output = new File(outputDir, sample + ".merged.vcf")
        add(mergeSVcalls)
        outputMergedVCFbySample += (sample -> mergeSVcalls.output)
      } else {
akaljuvee's avatar
-    
akaljuvee committed
84
        outputMergedVCFbySample += (sample -> callers.head.outputVCF(sample).get)
85
86
87
      }
    }

88
89
90
91
92
93
94
    if (inputBams.size > 1) {
      // merge all files from all samples in project
      val mergeSVcallsProject = new Pysvtools(this)
      mergeSVcallsProject.input = outputMergedVCFbySample.values.toList
      mergeSVcallsProject.output = outputMergedVCF
      add(mergeSVcallsProject)
    }
95
96
97
98
99
    // merging the VCF calls by project
    // basicly this will do all samples from this pipeline run
    // group by "tags"
    // sample tagging is however not available within this pipeline

100
    for ((sample, mergedResultFile) <- outputMergedVCFbySample) {
101
      lazy val counts = getVariantCounts(mergedResultFile, ShivaSvCallingReport.histogramBinBoundaries)
102
103
104
105
106
107
108
      addSummarizable(new Summarizable {
        def summaryFiles = Map.empty
        def summaryStats = counts
      }, "variantsBySizeAndType", Some(sample))
    }
    addSummarizable(new Summarizable {
      def summaryFiles = Map.empty
109
      def summaryStats = ShivaSvCallingReport.histogramBinBoundaries
110
111
    }, "histBreaksForCounts")

Peter van 't Hof's avatar
Peter van 't Hof committed
112
113
114
115
    addSummaryJobs()
  }

  /** Will generate all available variantcallers */
Wai Yi Leung's avatar
Wai Yi Leung committed
116
  protected def callersList: List[SvCaller] = List(new Breakdancer(this), new Clever(this), new Delly(this), new Pindel(this))
Peter van 't Hof's avatar
Peter van 't Hof committed
117
118
119
120
121
122
123
124

  /** Location of summary file */
  def summaryFile = new File(outputDir, "ShivaSvCalling.summary.json")

  /** Settings for the summary */
  def summarySettings = Map("sv_callers" -> configCallers.toList)

  /** Files for the summary */
akaljuvee's avatar
-    
akaljuvee committed
125
126
  def summaryFiles: Map[String, File] = outputMergedVCFbySample ++ (if (inputBams.size > 1) Map("final_mergedvcf" -> outputMergedVCF) else Nil)

127
  def getVariantCounts(vcfFile: File, breaks: Array[Int]): Map[String, Any] = {
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
    val delCounts, insCounts, dupCounts, invCounts = Array.fill(breaks.size + 1) { 0 }
    var traCount = 0

    val iterator = new VCFFileReader(vcfFile, false).iterator
    while (iterator.hasNext) {
      val record = iterator.next
      val svType = record.getAttributeAsString("SVTYPE", "")
      if (svType == "TRA" || svType == "CTX" || svType == "ITX") {
        traCount += 1
      } else {
        val size = record.getEnd - record.getStart
        var i = 0
        while (i < breaks.size && size > breaks(i)) i += 1
        svType match {
          case "DEL" => delCounts(i) += 1
          case "INS" => insCounts(i) += 1
          case "DUP" => dupCounts(i) += 1
          case "INV" => invCounts(i) += 1
          case _     => logger.warn(s"Vcf file contains a record of unknown type: file-$vcfFile, type-$svType")
        }
      }
    }
    iterator.close()

akaljuvee's avatar
akaljuvee committed
152
    Map("DEL" -> delCounts, "INS" -> insCounts, "DUP" -> dupCounts, "INV" -> invCounts, "TRA" -> traCount)
153
154
  }

akaljuvee's avatar
-    
akaljuvee committed
155
156
157
}

object ShivaSvCalling extends PipelineCommand