From 3c2f4a07686409a8984a8ded08654605748cdc4b Mon Sep 17 00:00:00 2001 From: Peter van 't Hof <p.j.van_t_hof@lumc.nl> Date: Fri, 24 Jul 2015 11:33:51 +0200 Subject: [PATCH] Add ShivaSvCalling --- .../biopet/core/BiopetExecutablePublic.scala | 3 +- .../pipelines/shiva/ShivaSvCalling.scala | 149 ++++++++++++++++++ 2 files changed, 151 insertions(+), 1 deletion(-) create mode 100644 public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala diff --git a/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutablePublic.scala b/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutablePublic.scala index a52f6ab3f..89a195b98 100644 --- a/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutablePublic.scala +++ b/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/core/BiopetExecutablePublic.scala @@ -25,7 +25,8 @@ object BiopetExecutablePublic extends BiopetExecutable { nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig, nl.lumc.sasc.biopet.pipelines.carp.Carp, nl.lumc.sasc.biopet.pipelines.toucan.Toucan, - nl.lumc.sasc.biopet.pipelines.yamsvp.Yamsvp + nl.lumc.sasc.biopet.pipelines.yamsvp.Yamsvp, + nl.lumc.sasc.biopet.pipelines.shiva.ShivaSvCalling ) def pipelines: List[MainCommand] = List( diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala new file mode 100644 index 000000000..3e4de155e --- /dev/null +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala @@ -0,0 +1,149 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.pipelines.shiva + +import java.io.File + +import htsjdk.samtools.SamReaderFactory +import nl.lumc.sasc.biopet.core.summary.SummaryQScript +import nl.lumc.sasc.biopet.core.{PipelineCommand, BiopetQScript, Reference, SampleLibraryTag} +import nl.lumc.sasc.biopet.extensions.breakdancer.Breakdancer +import nl.lumc.sasc.biopet.extensions.clever.CleverCaller +import nl.lumc.sasc.biopet.extensions.delly.Delly +import org.broadinstitute.gatk.utils.commandline.Input +import scala.collection.JavaConversions._ + +/** + * Common trait for ShivaVariantcalling + * + * Created by pjvan_thof on 2/26/15. + */ +class ShivaSvCalling extends SummaryQScript with SampleLibraryTag with Reference { + qscript => + + @Input(doc = "Bam files (should be deduped bams)", shortName = "BAM", required = true) + protected var inputBamsArg: List[File] = Nil + + protected var inputBams: Map[String, File] = Map() + + protected def addBamFile(file: File, sampleId: Option[String] = None): Unit = { + sampleId match { + case Some(sample) => inputBams += sample -> file + case _ if !file.exists() => throw new IllegalArgumentException("Bam file does not exits: " + file) + case _ => { + val inputSam = SamReaderFactory.makeDefault.open(file) + val samples = inputSam.getFileHeader.getReadGroups.map(_.getSample).distinct + if (samples.size == 1) { + inputBams += samples.head -> file + } else throw new IllegalArgumentException("Bam contains multiple sample IDs: " + file) + } + } + } + + /** Executed before script */ + def init(): Unit = { + inputBamsArg.foreach(addBamFile(_)) + } + + /** Variantcallers requested by the config */ + protected val configCallers: Set[String] = config("sv_callers") + + /** This will add jobs for this pipeline */ + def biopetScript(): Unit = { + for (cal <- configCallers) { + if (!callersList.exists(_.name == cal)) + BiopetQScript.addError("variantcaller '" + cal + "' does not exist, possible to use: " + callersList.map(_.name).mkString(", ")) + } + + val callers = callersList.filter(x => configCallers.contains(x.name)).sortBy(_.prio) + + require(inputBams.nonEmpty, "No input bams found") + require(callers.nonEmpty, "must select at least 1 SV caller, choices are: " + callersList.map(_.name).mkString(", ")) + + addSummaryJobs() + } + + /** Will generate all available variantcallers */ + protected def callersList: List[SvCaller] = List(new Breakdancer, new Clever, new Delly) + + /** General trait for a variantcaller mode */ + trait SvCaller { + /** Name of mode, this should also be used in the config */ + val name: String + + /** Output dir for this mode */ + def outputDir = new File(qscript.outputDir, name) + + /** This should add the variantcaller jobs */ + def addJobs() + } + + /** default mode of freebayes */ + class Breakdancer extends SvCaller { + val name = "breakdancer" + + def addJobs() { + //TODO: move minipipeline of breakdancer to here + for ((sample, bamFile) <- inputBams) { + val breakdancerDir = new File(outputDir, sample) + val breakdancer = Breakdancer(qscript, bamFile, qscript.reference, breakdancerDir) + addAll(breakdancer.functions) + } + } + } + + /** default mode of bcftools */ + class Clever extends SvCaller { + val name = "clever" + + def addJobs() { + //TODO: check double directories + for ((sample, bamFile) <- inputBams) { + val cleverDir = new File(outputDir, sampleId) + val clever = CleverCaller(qscript, bamFile, qscript.reference, cleverDir, cleverDir) + add(clever) + } + } + } + + /** Makes a vcf file from a mpileup without statistics */ + class Delly extends SvCaller { + val name = "delly" + + def addJobs() { + //TODO: Move mini delly pipeline to here + for ((sample, bamFile) <- inputBams) { + val dellyDir = new File(outputDir, sampleId) + val delly = Delly(qscript, bamFile, dellyDir) + addAll(delly.functions) + } + } + } + + /** Location of summary file */ + def summaryFile = new File(outputDir, "ShivaSvCalling.summary.json") + + /** Settings for the summary */ + def summarySettings = Map("sv_callers" -> configCallers.toList) + + /** Files for the summary */ + def summaryFiles: Map[String, File] = { + val callers: Set[String] = config("sv_callers") + callersList.filter(x => callers.contains(x.name)).map(x => x.name -> x.outputFile).toMap + ("final" -> finalFile) + } +} + +object ShivaSvCalling extends PipelineCommand \ No newline at end of file -- GitLab