ShivaSvCalling.scala 5.34 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
package nl.lumc.sasc.biopet.pipelines.shiva

import java.io.File

import htsjdk.samtools.SamReaderFactory
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.core.config.Configurable
Peter van 't Hof's avatar
Peter van 't Hof committed
22
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.core.{ PipelineCommand, BiopetQScript, Reference, SampleLibraryTag }
Peter van 't Hof's avatar
Peter van 't Hof committed
24 25 26
import nl.lumc.sasc.biopet.extensions.breakdancer.Breakdancer
import nl.lumc.sasc.biopet.extensions.clever.CleverCaller
import nl.lumc.sasc.biopet.extensions.delly.Delly
Peter van 't Hof's avatar
Peter van 't Hof committed
27 28
import nl.lumc.sasc.biopet.tools.VcfStats
import org.broadinstitute.gatk.queue.QScript
Peter van 't Hof's avatar
Peter van 't Hof committed
29 30 31 32 33 34 35 36
import org.broadinstitute.gatk.utils.commandline.Input
import scala.collection.JavaConversions._

/**
 * Common trait for ShivaVariantcalling
 *
 * Created by pjvan_thof on 2/26/15.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
37
class ShivaSvCalling(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag with Reference {
Peter van 't Hof's avatar
Peter van 't Hof committed
38 39
  qscript =>

Peter van 't Hof's avatar
Peter van 't Hof committed
40 41
  def this() = this(null)

Peter van 't Hof's avatar
Peter van 't Hof committed
42 43 44 45 46
  @Input(doc = "Bam files (should be deduped bams)", shortName = "BAM", required = true)
  protected var inputBamsArg: List[File] = Nil

  protected var inputBams: Map[String, File] = Map()

Peter van 't Hof's avatar
Peter van 't Hof committed
47
  def addBamFile(file: File, sampleId: Option[String] = None): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
48
    sampleId match {
Peter van 't Hof's avatar
Peter van 't Hof committed
49
      case Some(sample)        => inputBams += sample -> file
Peter van 't Hof's avatar
Peter van 't Hof committed
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
      case _ if !file.exists() => throw new IllegalArgumentException("Bam file does not exits: " + file)
      case _ => {
        val inputSam = SamReaderFactory.makeDefault.open(file)
        val samples = inputSam.getFileHeader.getReadGroups.map(_.getSample).distinct
        if (samples.size == 1) {
          inputBams += samples.head -> file
        } else throw new IllegalArgumentException("Bam contains multiple sample IDs: " + file)
      }
    }
  }

  /** Executed before script */
  def init(): Unit = {
    inputBamsArg.foreach(addBamFile(_))
  }

  /** Variantcallers requested by the config */
Peter van 't Hof's avatar
Peter van 't Hof committed
67
  protected val configCallers: Set[String] = config("sv_callers", default = Set("breakdancer", "clever", "delly"))
Peter van 't Hof's avatar
Peter van 't Hof committed
68 69 70 71 72 73 74 75

  /** This will add jobs for this pipeline */
  def biopetScript(): Unit = {
    for (cal <- configCallers) {
      if (!callersList.exists(_.name == cal))
        BiopetQScript.addError("variantcaller '" + cal + "' does not exist, possible to use: " + callersList.map(_.name).mkString(", "))
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
76
    val callers = callersList.filter(x => configCallers.contains(x.name))
Peter van 't Hof's avatar
Peter van 't Hof committed
77 78 79 80

    require(inputBams.nonEmpty, "No input bams found")
    require(callers.nonEmpty, "must select at least 1 SV caller, choices are: " + callersList.map(_.name).mkString(", "))

Peter van 't Hof's avatar
Peter van 't Hof committed
81 82
    callers.foreach(_.addJobs())

Peter van 't Hof's avatar
Peter van 't Hof committed
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
    addSummaryJobs()
  }

  /** Will generate all available variantcallers */
  protected def callersList: List[SvCaller] = List(new Breakdancer, new Clever, new Delly)

  /** General trait for a variantcaller mode */
  trait SvCaller {
    /** Name of mode, this should also be used in the config */
    val name: String

    /** Output dir for this mode */
    def outputDir = new File(qscript.outputDir, name)

    /** This should add the variantcaller jobs */
    def addJobs()
  }

  /** default mode of freebayes */
  class Breakdancer extends SvCaller {
    val name = "breakdancer"

    def addJobs() {
      //TODO: move minipipeline of breakdancer to here
      for ((sample, bamFile) <- inputBams) {
        val breakdancerDir = new File(outputDir, sample)
Peter van 't Hof's avatar
Peter van 't Hof committed
109
        val breakdancer = Breakdancer(qscript, bamFile, breakdancerDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
110 111 112 113 114 115 116 117 118 119 120 121
        addAll(breakdancer.functions)
      }
    }
  }

  /** default mode of bcftools */
  class Clever extends SvCaller {
    val name = "clever"

    def addJobs() {
      //TODO: check double directories
      for ((sample, bamFile) <- inputBams) {
Peter van 't Hof's avatar
Peter van 't Hof committed
122
        val cleverDir = new File(outputDir, sample)
Peter van 't Hof's avatar
Peter van 't Hof committed
123
        val clever = CleverCaller(qscript, bamFile, cleverDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
124 125 126 127 128 129 130 131 132 133 134 135
        add(clever)
      }
    }
  }

  /** Makes a vcf file from a mpileup without statistics */
  class Delly extends SvCaller {
    val name = "delly"

    def addJobs() {
      //TODO: Move mini delly pipeline to here
      for ((sample, bamFile) <- inputBams) {
Peter van 't Hof's avatar
Peter van 't Hof committed
136
        val dellyDir = new File(outputDir, sample)
Peter van 't Hof's avatar
Peter van 't Hof committed
137
        val delly = Delly(qscript, bamFile, dellyDir)
138
        delly.outputName = sample
Peter van 't Hof's avatar
Peter van 't Hof committed
139 140 141 142 143 144 145 146 147 148 149 150 151
        addAll(delly.functions)
      }
    }
  }

  /** Location of summary file */
  def summaryFile = new File(outputDir, "ShivaSvCalling.summary.json")

  /** Settings for the summary */
  def summarySettings = Map("sv_callers" -> configCallers.toList)

  /** Files for the summary */
  def summaryFiles: Map[String, File] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
152
    val callers: Set[String] = configCallers
Peter van 't Hof's avatar
Peter van 't Hof committed
153 154
    //callersList.filter(x => callers.contains(x.name)).map(x => x.name -> x.outputFile).toMap + ("final" -> finalFile)
    Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
155 156 157 158
  }
}

object ShivaSvCalling extends PipelineCommand