diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/Pindel.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelCaller.scala similarity index 97% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/Pindel.scala rename to public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelCaller.scala index eceb5b60ff0b6d33d2102a11334cc3b646d1d61b..f3bbea7c7c21d73744c7d2adf94f9876527427b3 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/Pindel.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelCaller.scala @@ -28,7 +28,7 @@ import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output } * Based on version 0.2.5b8 */ -class Pindel(val root: Configurable) extends BiopetCommandLineFunction with Reference with Version { +class PindelCaller(val root: Configurable) extends BiopetCommandLineFunction with Reference with Version { executable = config("exe", default = "pindel", freeVar = false) override def defaultCoreMemory = 3.0 @@ -169,9 +169,9 @@ class Pindel(val root: Configurable) extends BiopetCommandLineFunction with Refe optional("--DD_REPORT_DUPLICATION_READS", DD_REPORT_DUPLICATION_READS) + } -object Pindel { - def apply(root: Configurable, configFile: File, outputDir: File): Pindel = { - val caller = new Pindel(root) +object PindelCaller { + def apply(root: Configurable, configFile: File, outputDir: File): PindelCaller = { + val caller = new PindelCaller(root) caller.config_file = Some(configFile) caller.output_prefix = outputDir caller diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelConfig.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelConfig.scala index 77ab79f1e513f087d967d72ce757f8a0d1b95853..cef1c6ea23c25aec10a74d5a2311d86c48baf8ff 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelConfig.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelConfig.scala @@ -15,8 +15,9 @@ */ package nl.lumc.sasc.biopet.extensions.pindel -import java.io.File +import java.io.{PrintWriter, File} +import htsjdk.samtools.SamReaderFactory import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction import nl.lumc.sasc.biopet.utils.ToolCommand import nl.lumc.sasc.biopet.utils.config.Configurable @@ -31,35 +32,20 @@ class PindelConfig(val root: Configurable) extends BiopetJavaCommandLineFunction var output: File = _ @Argument(doc = "Insertsize") - var insertsize: Option[Int] = _ + var insertsize: Int = _ + + var sampleName: String = _ override def cmdLine = super.cmdLine + "-i" + required(input) + + "-l" + required(sampleName) + "-s" + required(insertsize) + "-o" + required(output) } object PindelConfig extends ToolCommand { - def apply(root: Configurable, input: File, output: File): PindelConfig = { - val conf = new PindelConfig(root) - conf.input = input - conf.output = output - conf - } - - def apply(root: Configurable, input: File, outputDir: String): PindelConfig = { - val dir = if (outputDir.endsWith("/")) outputDir else outputDir + "/" - val outputFile = new File(dir + swapExtension(input.getName)) - apply(root, input, outputFile) - } - - def apply(root: Configurable, input: File): PindelConfig = { - apply(root, input, new File(swapExtension(input.getAbsolutePath))) - } - - private def swapExtension(inputFile: String) = inputFile.substring(0, inputFile.lastIndexOf(".bam")) + ".pindel.cfg" - - case class Args(inputbam: File = null, samplelabel: Option[String] = None, insertsize: Option[Int] = None) extends AbstractArgs + case class Args(inputbam: File = null, samplelabel: Option[String] = None, + insertsize: Option[Int] = None, output: Option[File] = None) extends AbstractArgs class OptParser extends AbstractOptParser { opt[File]('i', "inputbam") required () valueName "<bamfile/path>" action { (x, c) => @@ -71,6 +57,9 @@ object PindelConfig extends ToolCommand { opt[Int]('s', "insertsize") valueName "<insertsize>" action { (x, c) => c.copy(insertsize = Some(x)) } text "Insertsize is missing" + opt[Int]('o', "output") valueName "<output>" action { (x, c) => + c.copy(insertsize = Some(x)) + } text "Output path is missing" } /** @@ -81,10 +70,26 @@ object PindelConfig extends ToolCommand { val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) val input: File = commandArgs.inputbam + val output: File = commandArgs.output.getOrElse( new File(input.getAbsoluteFile + ".pindel.cfg") ) + val insertsize: Int = commandArgs.insertsize.getOrElse(0) + + + val bamReader = SamReaderFactory.makeDefault().open(input) + val writer = new PrintWriter(output) + var sampleName: String = "" + for( readgroup <- bamReader.getFileHeader.getReadGroups() ) { + val rg = bamReader.getFileHeader.getReadGroup( readgroup ) + writer.write( s"${input.getAbsoluteFile}\t${insertsize}\t${rg.getSample}") + } + bamReader.close() + writer.close() // the logic here is to pull the libraries stored in the bam file and output this to a pindel config file. // see: http://gmt.genome.wustl.edu/packages/pindel/quick-start.html // this is called bam-configuration file + + // sampleLabel can be given from the commandline or read from the bam header + /** * filename<tab>avg insert size<tab>sample_label or name for reporting * tumor_sample_1222.bam<tab>250<tab>TUMOR_1222 diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelPipeline.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelPipeline.scala deleted file mode 100644 index e5d276727eda9104fefebddc303747200415c354..0000000000000000000000000000000000000000 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelPipeline.scala +++ /dev/null @@ -1,86 +0,0 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ -package nl.lumc.sasc.biopet.extensions.pindel - -import java.io.File - -import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand } -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.queue.QScript - -/// Pindel is actually a mini pipeline executing binaries from the pindel package -class PindelPipeline(val root: Configurable) extends QScript with BiopetQScript { - def this() = this(null) - - @Input(doc = "Input file (bam)") - var input: File = _ - - @Input(doc = "Reference Fasta file") - var reference: File = _ - - @Argument(doc = "Work directory") - var workdir: String = _ - - // @Output(doc = "Pindel VCF output") - // lazy val outputvcf: File = { - // new File(workdir + "/" + input.getName.substring(0, input.getName.lastIndexOf(".bam")) + ".pindel.vcf") - // } - - @Output(doc = "Pindel config") - lazy val configfile: File = { - new File(workdir + "/" + input.getName.substring(0, input.getName.lastIndexOf(".bam")) + ".pindel.cfg") - } - @Output(doc = "Pindel raw output") - lazy val outputvcf: File = { - new File(workdir + "/" + input.getName.substring(0, input.getName.lastIndexOf(".bam")) + ".pindel.vcf") - } - - override def init() { - } - - def biopetScript() { - // read config and set all parameters for the pipeline - logger.info("Starting Pindel configuration") - - val cfg = PindelConfig(this, input, this.configfile) - outputFiles += ("pindel_cfg" -> cfg.output) - add(cfg) - - val output: File = this.outputvcf - val pindel = Pindel(this, cfg.output, output) - add(pindel) - outputFiles += ("pindel_tsv" -> pindel.output) - - // val output_vcf: File = this.outputvcf - // convert this tsv to vcf using the python script - - } - - // private def swapExtension(inputFile: String) = inputFile.substring(0, inputFile.lastIndexOf(".bam")) + ".pindel.tsv" -} - -object PindelPipeline extends PipelineCommand { - def apply(root: Configurable, input: File, reference: File, runDir: String): PindelPipeline = { - val pindel = new PindelPipeline(root) - pindel.input = input - pindel.reference = reference - pindel.workdir = runDir - // run the following for activating the pipeline steps - pindel.init() - pindel.biopetScript() - pindel - } -} \ No newline at end of file diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala index a810b5257a94479e5e7125e6755722543457ec30..e97dfd5262ed2a12699f4e4fc900df4696e67aa9 100644 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala @@ -17,7 +17,7 @@ package nl.lumc.sasc.biopet.pipelines.shiva import nl.lumc.sasc.biopet.core.summary.SummaryQScript import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference, SampleLibraryTag } -import nl.lumc.sasc.biopet.pipelines.shiva.svcallers.{ Delly, Breakdancer, Clever, SvCaller } +import nl.lumc.sasc.biopet.pipelines.shiva.svcallers._ import nl.lumc.sasc.biopet.utils.{ BamUtils, Logging } import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.queue.QScript @@ -43,7 +43,7 @@ class ShivaSvCalling(val root: Configurable) extends QScript with SummaryQScript } /** Variantcallers requested by the config */ - protected val configCallers: Set[String] = config("sv_callers", default = Set("breakdancer", "clever", "delly")) + protected val configCallers: Set[String] = config("sv_callers", default = Set("breakdancer", "clever", "delly", "pindel")) /** This will add jobs for this pipeline */ def biopetScript(): Unit = { @@ -66,7 +66,7 @@ class ShivaSvCalling(val root: Configurable) extends QScript with SummaryQScript } /** Will generate all available variantcallers */ - protected def callersList: List[SvCaller] = List(new Breakdancer(this), new Clever(this), new Delly(this)) + protected def callersList: List[SvCaller] = List(new Breakdancer(this), new Clever(this), new Delly(this), new Pindel(this)) /** Location of summary file */ def summaryFile = new File(outputDir, "ShivaSvCalling.summary.json") diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Pindel.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Pindel.scala new file mode 100644 index 0000000000000000000000000000000000000000..17b669c089543b208e5bff7e06947b97b4d3b2e6 --- /dev/null +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Pindel.scala @@ -0,0 +1,83 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.pipelines.shiva.svcallers + +import java.io.File + +import nl.lumc.sasc.biopet.core.{BiopetQScript, PipelineCommand} +import nl.lumc.sasc.biopet.extensions.pindel.{PindelCaller, PindelCaller$, PindelConfig} +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.queue.QScript + +/// Pindel is actually a mini pipeline executing binaries from the pindel package +class Pindel(val root: Configurable) extends SvCaller { + val name = "pindel" + + def this() = this(null) +// +// @Input(doc = "Input file (bam)") +// var input: File = _ +// +// @Input(doc = "Reference Fasta file") +// var reference: File = _ +// +// @Argument(doc = "Work directory") +// var workdir: String = _ +// +// // @Output(doc = "Pindel VCF output") +// // lazy val outputvcf: File = { +// // new File(workdir + "/" + input.getName.substring(0, input.getName.lastIndexOf(".bam")) + ".pindel.vcf") +// // } +// +// @Output(doc = "Pindel config") +// def configfile: File = { +// new File(workdir + "/" + input.getName.substring(0, input.getName.lastIndexOf(".bam")) + ".pindel.cfg") +// } +// @Output(doc = "Pindel raw output") +// def outputvcf: File = { +// new File(workdir + "/" + input.getName.substring(0, input.getName.lastIndexOf(".bam")) + ".pindel.vcf") +// } + + def biopetScript() { + for ((sample, bamFile) <- inputBams) { + val pindelDir = new File(outputDir, sample) + + val config_file: File = new File( bamFile.getAbsolutePath + ".pindel.cfg" ) + val cfg = new PindelConfig(this) + cfg.input = bamFile + + // FIXME: get the real insert size of the bam (from bammetrics?) + cfg.insertsize = 500 + cfg.sampleName = sample + cfg.output = config_file + add(cfg) + + val pindel = PindelCaller(this, cfg.output, pindelDir) + add(pindel) + } + + } +} + +object Pindel extends PipelineCommand { + def apply(root: Configurable, input: File, reference: File, runDir: String): Pindel = { + val pindel = new Pindel(root) + // run the following for activating the pipeline steps + pindel.init() + pindel.biopetScript() + pindel + } +} \ No newline at end of file