From 95d45227745ff522c44eb6e90986daf8e2033005 Mon Sep 17 00:00:00 2001 From: Peter van 't Hof <p.j.van_t_hof@lumc.nl> Date: Thu, 24 Sep 2015 16:07:49 +0200 Subject: [PATCH] Move breaking classes --- .../biopet/extensions/pindel/Pindel.scala | 86 ---------- .../extensions/pindel/PindelCaller.scala | 72 -------- .../extensions/pindel/PindelConfig.scala | 86 ---------- .../sasc/biopet/pipelines/yamsvp/Yamsvp.scala | 155 ------------------ .../src/test/resources/log4j.properties | 25 --- 5 files changed, 424 deletions(-) delete mode 100644 public/yamsvp/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/Pindel.scala delete mode 100644 public/yamsvp/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelCaller.scala delete mode 100644 public/yamsvp/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelConfig.scala delete mode 100644 public/yamsvp/src/main/scala/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.scala delete mode 100644 public/yamsvp/src/test/resources/log4j.properties diff --git a/public/yamsvp/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/Pindel.scala b/public/yamsvp/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/Pindel.scala deleted file mode 100644 index 107955334..000000000 --- a/public/yamsvp/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/Pindel.scala +++ /dev/null @@ -1,86 +0,0 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ -package nl.lumc.sasc.biopet.extensions.pindel - -import java.io.File - -import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand } -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.queue.QScript - -/// Pindel is actually a mini pipeline executing binaries from the pindel package -class Pindel(val root: Configurable) extends QScript with BiopetQScript { - def this() = this(null) - - @Input(doc = "Input file (bam)") - var input: File = _ - - @Input(doc = "Reference Fasta file") - var reference: File = _ - - @Argument(doc = "Work directory") - var workdir: String = _ - - // @Output(doc = "Pindel VCF output") - // lazy val outputvcf: File = { - // new File(workdir + "/" + input.getName.substring(0, input.getName.lastIndexOf(".bam")) + ".pindel.vcf") - // } - - @Output(doc = "Pindel config") - lazy val configfile: File = { - new File(workdir + "/" + input.getName.substring(0, input.getName.lastIndexOf(".bam")) + ".pindel.cfg") - } - @Output(doc = "Pindel raw output") - lazy val outputvcf: File = { - new File(workdir + "/" + input.getName.substring(0, input.getName.lastIndexOf(".bam")) + ".pindel.vcf") - } - - override def init() { - } - - def biopetScript() { - // read config and set all parameters for the pipeline - logger.info("Starting Pindel configuration") - - val cfg = PindelConfig(this, input, this.configfile) - outputFiles += ("pindel_cfg" -> cfg.output) - add(cfg) - - val output: File = this.outputvcf - val pindel = PindelCaller(this, cfg.output, output) - add(pindel) - outputFiles += ("pindel_tsv" -> pindel.output) - - // val output_vcf: File = this.outputvcf - // convert this tsv to vcf using the python script - - } - - // private def swapExtension(inputFile: String) = inputFile.substring(0, inputFile.lastIndexOf(".bam")) + ".pindel.tsv" -} - -object Pindel extends PipelineCommand { - def apply(root: Configurable, input: File, reference: File, runDir: String): Pindel = { - val pindel = new Pindel(root) - pindel.input = input - pindel.reference = reference - pindel.workdir = runDir - // run the following for activating the pipeline steps - pindel.init() - pindel.biopetScript() - pindel - } -} \ No newline at end of file diff --git a/public/yamsvp/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelCaller.scala b/public/yamsvp/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelCaller.scala deleted file mode 100644 index cbe957e79..000000000 --- a/public/yamsvp/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelCaller.scala +++ /dev/null @@ -1,72 +0,0 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ -package nl.lumc.sasc.biopet.extensions.pindel - -import java.io.File - -import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output } - -class PindelCaller(val root: Configurable) extends BiopetCommandLineFunction { - executable = config("exe", default = "pindel", freeVar = false) - - override def defaultCoreMemory = 5.0 - override def defaultThreads = 8 - - override def versionRegex = """Pindel version:? (.*)""".r - override def versionExitcode = List(1) - override def versionCommand = executable - - @Input(doc = "The pindel configuration file") - var input: File = _ - - @Input(doc = "Fasta reference") - var reference: File = config("reference") - - // this is a pointer to where the results files will be stored - // inside this directory, we can expect files named: - // <prefix>_D - // <prefix>_SI - // <prefix>_I - // <prefix>_TR - @Argument(doc = "Work directory") - var workdir: String = _ - - @Output(doc = "Pindel VCF output") - var output: File = _ - - var window_size: Option[Int] = config("window_size", default = 5) - - override def beforeCmd() { - } - - def cmdLine = required(executable) + - "-i " + required(input) + - "-f " + required(reference) + - "-o " + required(output) + - optional("-w", window_size) + - optional("-T", nCoresRequest) -} - -object PindelCaller { - def apply(root: Configurable, input: File, output: File): PindelCaller = { - val caller = new PindelCaller(root) - caller.input = input - caller.output = output - caller - } -} diff --git a/public/yamsvp/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelConfig.scala b/public/yamsvp/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelConfig.scala deleted file mode 100644 index 55127e59f..000000000 --- a/public/yamsvp/src/main/scala/nl/lumc/sasc/biopet/extensions/pindel/PindelConfig.scala +++ /dev/null @@ -1,86 +0,0 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ -package nl.lumc.sasc.biopet.extensions.pindel - -import java.io.File - -import nl.lumc.sasc.biopet.core.{ BiopetJavaCommandLineFunction, ToolCommand } -import nl.lumc.sasc.biopet.utils.config.Configurable -import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output } - -class PindelConfig(val root: Configurable) extends BiopetJavaCommandLineFunction { - javaMainClass = getClass.getName - @Input(doc = "Bam File") - var input: File = _ - - @Output(doc = "Output Config file") - var output: File = _ - - @Argument(doc = "Insertsize") - var insertsize: Option[Int] = _ - - override def commandLine = super.commandLine + - "-i" + required(input) + - "-s" + required(insertsize) + - "-o" + required(output) -} - -object PindelConfig extends ToolCommand { - def apply(root: Configurable, input: File, output: File): PindelConfig = { - val conf = new PindelConfig(root) - conf.input = input - conf.output = output - conf - } - - def apply(root: Configurable, input: File, outputDir: String): PindelConfig = { - val dir = if (outputDir.endsWith("/")) outputDir else outputDir + "/" - val outputFile = new File(dir + swapExtension(input.getName)) - apply(root, input, outputFile) - } - - def apply(root: Configurable, input: File): PindelConfig = { - apply(root, input, new File(swapExtension(input.getAbsolutePath))) - } - - private def swapExtension(inputFile: String) = inputFile.substring(0, inputFile.lastIndexOf(".bam")) + ".pindel.cfg" - - case class Args(inputbam: File = null, samplelabel: Option[String] = None, insertsize: Option[Int] = None) extends AbstractArgs - - class OptParser extends AbstractOptParser { - opt[File]('i', "inputbam") required () valueName "<bamfile/path>" action { (x, c) => - c.copy(inputbam = x) - } text "Please specify the input bam file" - opt[String]('l', "samplelabel") valueName "<sample label>" action { (x, c) => - c.copy(samplelabel = Some(x)) - } text "Sample label is missing" - opt[Int]('s', "insertsize") valueName "<insertsize>" action { (x, c) => - c.copy(insertsize = Some(x)) - } text "Insertsize is missing" - } - - /** - * @param args the command line arguments - */ - def main(args: Array[String]): Unit = { - val argsParser = new OptParser - val commandArgs: Args = argsParser.parse(args, Args()) getOrElse sys.exit(1) - - val input: File = commandArgs.inputbam - - } -} - diff --git a/public/yamsvp/src/main/scala/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.scala b/public/yamsvp/src/main/scala/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.scala deleted file mode 100644 index a0ade5706..000000000 --- a/public/yamsvp/src/main/scala/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.scala +++ /dev/null @@ -1,155 +0,0 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ -/* - * Structural variation calling - */ - -package nl.lumc.sasc.biopet.pipelines.yamsvp - -import java.io.File - -import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, PipelineCommand } -import nl.lumc.sasc.biopet.extensions.Ln -import nl.lumc.sasc.biopet.extensions.breakdancer.Breakdancer -import nl.lumc.sasc.biopet.extensions.clever.CleverCaller -import nl.lumc.sasc.biopet.extensions.igvtools.IGVToolsCount -import nl.lumc.sasc.biopet.extensions.sambamba.{ SambambaMarkdup, SambambaMerge } -//import nl.lumc.sasc.biopet.extensions.pindel.Pindel -import nl.lumc.sasc.biopet.extensions.delly.Delly -import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics -import nl.lumc.sasc.biopet.pipelines.mapping.Mapping -import org.broadinstitute.gatk.queue.QScript -import org.broadinstitute.gatk.queue.engine.JobRunInfo -import org.broadinstitute.gatk.queue.function._ - -class Yamsvp(val root: Configurable) extends QScript with MultiSampleQScript { - qscript => - def this() = this(null) - def summaryFile = null - def summaryFiles = Map() - def summarySettings = Map() - - var reference: File = config("reference") - - def makeSample(id: String) = new Sample(id) - class Sample(sampleId: String) extends AbstractSample(sampleId) { - - def summaryFiles = Map() - def summaryStats = Map() - - val alignmentDir: String = sampleDir + "alignment/" - val svcallingDir: String = sampleDir + "svcalls/" - - def makeLibrary(id: String) = new Library(id) - class Library(libraryId: String) extends AbstractLibrary(libraryId) { - // val runDir: String = alignmentDir + "run_" + libraryId + "/" - def summaryFiles = Map() - def summaryStats = Map() - - val mapping = new Mapping(qscript) - mapping.libId = Some(libraryId) - mapping.sampleId = Some(sampleId) - - protected def addJobs(): Unit = { - mapping.input_R1 = config("R1") - mapping.input_R2 = config("R2") - mapping.outputDir = libDir - - mapping.init() - mapping.biopetScript() - qscript.addAll(mapping.functions) - } - } - protected def addJobs(): Unit = { - val libraryBamfiles = libraries.map(_._2.mapping.finalBamFile).toList - - val bamFile: File = if (libraryBamfiles.size == 1) { - val alignmentlink = Ln(qscript, libraryBamfiles.head, - alignmentDir + sampleId + ".merged.bam", relative = true) - alignmentlink.isIntermediate = true - add(alignmentlink) - alignmentlink.output - } else if (libraryBamfiles.size > 1) { - val mergeSamFiles = new SambambaMerge(qscript) - mergeSamFiles.input = libraryBamfiles - mergeSamFiles.output = sampleDir + sampleId + ".merged.bam" - mergeSamFiles.isIntermediate = true - add(mergeSamFiles) - mergeSamFiles.output - } else null - - val bamMarkDup = SambambaMarkdup(qscript, bamFile) - add(bamMarkDup) - - addAll(BamMetrics(qscript, bamMarkDup.output, alignmentDir + "metrics" + File.separator).functions) - - // create an IGV TDF file - val tdfCount = IGVToolsCount(qscript, bamMarkDup.output, config("genome_name", default = "hg19")) - add(tdfCount) - - /// bamfile will be used as input for the SV callers. First run Clever - // val cleverVCF : File = sampleDir + "/" + sampleID + ".clever.vcf" - - val cleverDir = svcallingDir + sampleId + ".clever/" - val clever = CleverCaller(qscript, bamMarkDup.output, qscript.reference, svcallingDir, cleverDir) - add(clever) - - val clever_vcf = Ln(qscript, clever.outputvcf, svcallingDir + sampleId + ".clever.vcf", relative = true) - add(clever_vcf) - - val breakdancerDir = svcallingDir + sampleId + ".breakdancer/" - val breakdancer = Breakdancer(qscript, bamMarkDup.output, qscript.reference, breakdancerDir) - addAll(breakdancer.functions) - - val bd_vcf = Ln(qscript, breakdancer.outputvcf, svcallingDir + sampleId + ".breakdancer.vcf", relative = true) - add(bd_vcf) - - val dellyDir = svcallingDir + sampleId + ".delly/" - val delly = Delly(qscript, bamMarkDup.output, dellyDir) - addAll(delly.functions) - - val delly_vcf = Ln(qscript, delly.outputvcf, svcallingDir + sampleId + ".delly.vcf", relative = true) - add(delly_vcf) - - // for pindel we should use per library config collected into one config file - // val pindelDir = svcallingDir + sampleID + ".pindel/" - // val pindel = Pindel(qscript, analysisBam, this.reference, pindelDir) - // sampleOutput.vcf += ("pindel" -> List(pindel.outputvcf)) - // addAll(pindel.functions) - // - // val pindel_vcf = Ln(qscript, pindel.outputvcf, svcallingDir + sampleID + ".pindel.vcf", relative = true) - // add(pindel_vcf) - // - } - } - - def addMultiSampleJobs() = {} - - def init() { - } - - def biopetScript() { - logger.info("Starting YAM SV Pipeline") - addSamplesJobs() - } - - override def onExecutionDone(jobs: Map[QFunction, JobRunInfo], success: Boolean) { - logger.info("YAM SV Pipeline has run .......................") - } -} - -object Yamsvp extends PipelineCommand \ No newline at end of file diff --git a/public/yamsvp/src/test/resources/log4j.properties b/public/yamsvp/src/test/resources/log4j.properties deleted file mode 100644 index 501af6758..000000000 --- a/public/yamsvp/src/test/resources/log4j.properties +++ /dev/null @@ -1,25 +0,0 @@ -# -# Biopet is built on top of GATK Queue for building bioinformatic -# pipelines. It is mainly intended to support LUMC SHARK cluster which is running -# SGE. But other types of HPC that are supported by GATK Queue (such as PBS) -# should also be able to execute Biopet tools and pipelines. -# -# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center -# -# Contact us at: sasc@lumc.nl -# -# A dual licensing mode is applied. The source code within this project that are -# not part of GATK Queue is freely available for non-commercial use under an AGPL -# license; For commercial users or users who do not want to follow the AGPL -# license, please contact us to obtain a separate license. -# - -# Set root logger level to DEBUG and its only appender to A1. -log4j.rootLogger=ERROR, A1 - -# A1 is set to be a ConsoleAppender. -log4j.appender.A1=org.apache.log4j.ConsoleAppender - -# A1 uses PatternLayout. -log4j.appender.A1.layout=org.apache.log4j.PatternLayout -log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n \ No newline at end of file -- GitLab