diff --git a/public/yamsvp/src/main/scala/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.scala b/public/yamsvp/src/main/scala/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.scala index 8434495c76d81c8fa1e6df2ffcdc612c0f9add17..c57ce72d140cafab19d305730310372cdf001fe1 100644 --- a/public/yamsvp/src/main/scala/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.scala +++ b/public/yamsvp/src/main/scala/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.scala @@ -19,13 +19,15 @@ package nl.lumc.sasc.biopet.pipelines.yamsvp +import java.io.File + import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.core.{ BiopetQScript, MultiSampleQScript, PipelineCommand } import nl.lumc.sasc.biopet.extensions.Ln import nl.lumc.sasc.biopet.extensions.igvtools.IGVToolsCount -import nl.lumc.sasc.biopet.extensions.sambamba.{ SambambaIndex, SambambaMerge, SambambaMarkdup } -import nl.lumc.sasc.biopet.extensions.svcallers.pindel.Pindel +import nl.lumc.sasc.biopet.extensions.sambamba.{ SambambaMerge, SambambaMarkdup } +//import nl.lumc.sasc.biopet.extensions.svcallers.pindel.Pindel import nl.lumc.sasc.biopet.extensions.svcallers.{ Breakdancer, Delly, CleverCaller } import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics @@ -35,163 +37,112 @@ import org.broadinstitute.gatk.queue.QScript import org.broadinstitute.gatk.queue.function._ import org.broadinstitute.gatk.queue.engine.JobRunInfo -class Yamsvp(val root: Configurable) extends QScript with BiopetQScript { //with MultiSampleQScript { +class Yamsvp(val root: Configurable) extends QScript with MultiSampleQScript { + qscript => def this() = this(null) var reference: File = config("reference", required = true) - var finalBamFiles: List[File] = Nil - /* - class LibraryOutput extends AbstractLibraryOutput { - var mappedBamFile: File = _ - } - class SampleOutput extends AbstractSampleOutput { - var vcf: Map[String, List[File]] = Map() - var mappedBamFile: File = _ - } -*/ - override def init() { - if (outputDir == null) - throw new IllegalStateException("Output directory is not specified in the config / argument") - else if (!outputDir.endsWith("/")) - outputDir += "/" - } + def makeSample(id: String) = new Sample(id) + class Sample(sampleId: String) extends AbstractSample(sampleId) { - def biopetScript() { - // write the pipeline here - // start with QC, alignment, call sambamba, call sv callers, reporting + val alignmentDir: String = sampleDir + "alignment/" + val svcallingDir: String = sampleDir + "svcalls/" - // read config and set all parameters for the pipeline - logger.info("Starting YAM SV Pipeline") - //runSamplesJobs - // + def makeLibrary(id: String) = new Library(id) + class Library(libraryId: String) extends AbstractLibrary(libraryId) { - } + // val runDir: String = alignmentDir + "run_" + libraryId + "/" - override def onExecutionDone(jobs: Map[QFunction, JobRunInfo], success: Boolean) { - logger.info("YAM SV Pipeline has run .......................") - } - /* - def runSingleSampleJobs(sampleID: String): SampleOutput = { - val sampleOutput = new SampleOutput - var libraryBamfiles: List[File] = List() - var outputFiles: Map[String, List[File]] = Map() - var libraryFastqFiles: List[File] = List() - val sampleDir: String = outputDir + sampleID + "/" - val alignmentDir: String = sampleDir + "alignment/" + val mapping = new Mapping(qscript) + mapping.libraryId = libraryId + mapping.sampleId = sampleId - val svcallingDir: String = sampleDir + "svcalls/" + protected def addJobs(): Unit = { + mapping.input_R1 = config("R1", required = true) + mapping.input_R2 = config("R2", required = true) + mapping.outputDir = libDir - sampleOutput.libraries = runLibraryJobs(sampleID) - for ((libraryID, libraryOutput) <- sampleOutput.libraries) { - // this is extending the libraryBamfiles list like '~=' in D or .append in Python or .push_back in C++ - libraryBamfiles ++= List(libraryOutput.mappedBamFile) + mapping.init + mapping.biopetScript + qscript.addAll(mapping.functions) + } } - - val bamFile: File = - if (libraryBamfiles.size == 1) { - // When the sample has only 1 run, make a link in the main alignment directory - val alignmentlink = Ln(this, libraryBamfiles.head, - alignmentDir + sampleID + ".merged.bam", true) - add(alignmentlink, isIntermediate=true) + protected def addJobs(): Unit = { + addLibsJobs() + val libraryBamfiles = libraries.map(_._2.mapping.finalBamFile).toList + + val bamFile: File = if (libraryBamfiles.size == 1) { + val alignmentlink = Ln(qscript, libraryBamfiles.head, + alignmentDir + sampleId + ".merged.bam", true) + alignmentlink.isIntermediate = true + add(alignmentlink) alignmentlink.out } else if (libraryBamfiles.size > 1) { - val mergeSamFiles = new SambambaMerge(this) + val mergeSamFiles = new SambambaMerge(qscript) mergeSamFiles.input = libraryBamfiles - mergeSamFiles.output = alignmentDir + sampleID + ".merged.bam" - add(mergeSamFiles, isIntermediate=true) + mergeSamFiles.output = sampleDir + sampleId + ".merged.bam" + mergeSamFiles.isIntermediate = true + add(mergeSamFiles) mergeSamFiles.output } else null - val bamMarkDup = SambambaMarkdup(this, bamFile) - add(bamMarkDup) + val bamMarkDup = SambambaMarkdup(qscript, bamFile) + add(bamMarkDup) - addAll(BamMetrics(this, bamMarkDup.output, alignmentDir + "metrics/").functions) + addAll(BamMetrics(qscript, bamMarkDup.output, alignmentDir + "metrics" + File.separator).functions) - // create an IGV TDF file - val tdfCount = IGVToolsCount(this, bamMarkDup.output, config("genomename", default = "hg19")) - add(tdfCount) + // create an IGV TDF file + val tdfCount = IGVToolsCount(qscript, bamMarkDup.output, config("genome_name", default = "hg19")) + add(tdfCount) - /// bamfile will be used as input for the SV callers. First run Clever - // val cleverVCF : File = sampleDir + "/" + sampleID + ".clever.vcf" + /// bamfile will be used as input for the SV callers. First run Clever + // val cleverVCF : File = sampleDir + "/" + sampleID + ".clever.vcf" - val cleverDir = svcallingDir + sampleID + ".clever/" - val clever = CleverCaller(this, bamMarkDup.output, this.reference, svcallingDir, cleverDir) - sampleOutput.vcf += ("clever" -> List(clever.outputvcf)) - add(clever) + val cleverDir = svcallingDir + sampleId + ".clever/" + val clever = CleverCaller(qscript, bamMarkDup.output, qscript.reference, svcallingDir, cleverDir) + add(clever) - val clever_vcf = Ln(this, clever.outputvcf, svcallingDir + sampleID + ".clever.vcf", relative = true) - add(clever_vcf) + val clever_vcf = Ln(qscript, clever.outputvcf, svcallingDir + sampleId + ".clever.vcf", relative = true) + add(clever_vcf) - val breakdancerDir = svcallingDir + sampleID + ".breakdancer/" - val breakdancer = Breakdancer(this, bamMarkDup.output, this.reference, breakdancerDir) - sampleOutput.vcf += ("breakdancer" -> List(breakdancer.outputvcf)) - addAll(breakdancer.functions) + val breakdancerDir = svcallingDir + sampleId + ".breakdancer/" + val breakdancer = Breakdancer(qscript, bamMarkDup.output, qscript.reference, breakdancerDir) + addAll(breakdancer.functions) - val bd_vcf = Ln(this, breakdancer.outputvcf, svcallingDir + sampleID + ".breakdancer.vcf", relative = true) - add(bd_vcf) + val bd_vcf = Ln(qscript, breakdancer.outputvcf, svcallingDir + sampleId + ".breakdancer.vcf", relative = true) + add(bd_vcf) - val dellyDir = svcallingDir + sampleID + ".delly/" - val delly = Delly(this, bamMarkDup.output, dellyDir) - sampleOutput.vcf += ("delly" -> List(delly.outputvcf)) - addAll(delly.functions) + val dellyDir = svcallingDir + sampleId + ".delly/" + val delly = Delly(qscript, bamMarkDup.output, dellyDir) + addAll(delly.functions) + + val delly_vcf = Ln(qscript, delly.outputvcf, svcallingDir + sampleId + ".delly.vcf", relative = true) + add(delly_vcf) + + // for pindel we should use per library config collected into one config file + // val pindelDir = svcallingDir + sampleID + ".pindel/" + // val pindel = Pindel(qscript, analysisBam, this.reference, pindelDir) + // sampleOutput.vcf += ("pindel" -> List(pindel.outputvcf)) + // addAll(pindel.functions) + // + // val pindel_vcf = Ln(qscript, pindel.outputvcf, svcallingDir + sampleID + ".pindel.vcf", relative = true) + // add(pindel_vcf) + // + } + } - val delly_vcf = Ln(this, delly.outputvcf, svcallingDir + sampleID + ".delly.vcf", relative = true) - add(delly_vcf) + def init() { + } - // for pindel we should use per library config collected into one config file - // val pindelDir = svcallingDir + sampleID + ".pindel/" - // val pindel = Pindel(this, analysisBam, this.reference, pindelDir) - // sampleOutput.vcf += ("pindel" -> List(pindel.outputvcf)) - // addAll(pindel.functions) - // - // val pindel_vcf = Ln(this, pindel.outputvcf, svcallingDir + sampleID + ".pindel.vcf", relative = true) - // add(pindel_vcf) - // - return sampleOutput + def biopetScript() { + logger.info("Starting YAM SV Pipeline") + addSamplesJobs } - // Called for each run from a sample - - def runSingleLibraryJobs(libraryId: String, sampleID: String): LibraryOutput = { - val libraryOutput = new LibraryOutput - - val alignmentDir: String = outputDir + sampleID + "/alignment/" - val runDir: String = alignmentDir + "run_" + libraryId + "/" - - if (config.contains("R1")) { - val mapping = new Mapping(this) - - // TODO: check and test config[aligner] in json - // yamsvp/aligner -> value - // this setting causes error if not defined? - mapping.aligner = config("aligner", default = "bwa") - mapping.skipFlexiprep = false - mapping.skipMarkduplicates = true // we do the dedup marking using Sambamba - - mapping.input_R1 = config("R1") - mapping.input_R2 = config("R2") - mapping.paired = (mapping.input_R2 != null) - mapping.RGLB = libraryId - mapping.RGSM = sampleID - mapping.RGPL = config("PL") - mapping.RGPU = config("PU") - mapping.RGCN = config("CN") - mapping.outputDir = runDir - - mapping.init - mapping.biopetScript - addAll(mapping.functions) - - // start sambamba dedup - - libraryOutput.mappedBamFile = mapping.outputFiles("finalBamFile") - } else this.logger.error("Sample: " + sampleID + ": No R1 found for library: " + libraryId) - return libraryOutput - // logger.debug(outputFiles) - // return outputFiles + override def onExecutionDone(jobs: Map[QFunction, JobRunInfo], success: Boolean) { + logger.info("YAM SV Pipeline has run .......................") } - */ } object Yamsvp extends PipelineCommand \ No newline at end of file