Skip to content
Snippets Groups Projects
Commit 0f475cfa authored by Wai Yi Leung's avatar Wai Yi Leung
Browse files

Refactor Yamsvp to new sample/lib model

parent a283a8f3
No related branches found
No related tags found
No related merge requests found
......@@ -19,13 +19,15 @@
package nl.lumc.sasc.biopet.pipelines.yamsvp
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.core.{ BiopetQScript, MultiSampleQScript, PipelineCommand }
import nl.lumc.sasc.biopet.extensions.Ln
import nl.lumc.sasc.biopet.extensions.igvtools.IGVToolsCount
import nl.lumc.sasc.biopet.extensions.sambamba.{ SambambaIndex, SambambaMerge, SambambaMarkdup }
import nl.lumc.sasc.biopet.extensions.svcallers.pindel.Pindel
import nl.lumc.sasc.biopet.extensions.sambamba.{ SambambaMerge, SambambaMarkdup }
//import nl.lumc.sasc.biopet.extensions.svcallers.pindel.Pindel
import nl.lumc.sasc.biopet.extensions.svcallers.{ Breakdancer, Delly, CleverCaller }
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
......@@ -35,163 +37,112 @@ import org.broadinstitute.gatk.queue.QScript
import org.broadinstitute.gatk.queue.function._
import org.broadinstitute.gatk.queue.engine.JobRunInfo
class Yamsvp(val root: Configurable) extends QScript with BiopetQScript { //with MultiSampleQScript {
class Yamsvp(val root: Configurable) extends QScript with MultiSampleQScript {
qscript =>
def this() = this(null)
var reference: File = config("reference", required = true)
var finalBamFiles: List[File] = Nil
/*
class LibraryOutput extends AbstractLibraryOutput {
var mappedBamFile: File = _
}
class SampleOutput extends AbstractSampleOutput {
var vcf: Map[String, List[File]] = Map()
var mappedBamFile: File = _
}
*/
override def init() {
if (outputDir == null)
throw new IllegalStateException("Output directory is not specified in the config / argument")
else if (!outputDir.endsWith("/"))
outputDir += "/"
}
def makeSample(id: String) = new Sample(id)
class Sample(sampleId: String) extends AbstractSample(sampleId) {
def biopetScript() {
// write the pipeline here
// start with QC, alignment, call sambamba, call sv callers, reporting
val alignmentDir: String = sampleDir + "alignment/"
val svcallingDir: String = sampleDir + "svcalls/"
// read config and set all parameters for the pipeline
logger.info("Starting YAM SV Pipeline")
//runSamplesJobs
//
def makeLibrary(id: String) = new Library(id)
class Library(libraryId: String) extends AbstractLibrary(libraryId) {
}
// val runDir: String = alignmentDir + "run_" + libraryId + "/"
override def onExecutionDone(jobs: Map[QFunction, JobRunInfo], success: Boolean) {
logger.info("YAM SV Pipeline has run .......................")
}
/*
def runSingleSampleJobs(sampleID: String): SampleOutput = {
val sampleOutput = new SampleOutput
var libraryBamfiles: List[File] = List()
var outputFiles: Map[String, List[File]] = Map()
var libraryFastqFiles: List[File] = List()
val sampleDir: String = outputDir + sampleID + "/"
val alignmentDir: String = sampleDir + "alignment/"
val mapping = new Mapping(qscript)
mapping.libraryId = libraryId
mapping.sampleId = sampleId
val svcallingDir: String = sampleDir + "svcalls/"
protected def addJobs(): Unit = {
mapping.input_R1 = config("R1", required = true)
mapping.input_R2 = config("R2", required = true)
mapping.outputDir = libDir
sampleOutput.libraries = runLibraryJobs(sampleID)
for ((libraryID, libraryOutput) <- sampleOutput.libraries) {
// this is extending the libraryBamfiles list like '~=' in D or .append in Python or .push_back in C++
libraryBamfiles ++= List(libraryOutput.mappedBamFile)
mapping.init
mapping.biopetScript
qscript.addAll(mapping.functions)
}
}
val bamFile: File =
if (libraryBamfiles.size == 1) {
// When the sample has only 1 run, make a link in the main alignment directory
val alignmentlink = Ln(this, libraryBamfiles.head,
alignmentDir + sampleID + ".merged.bam", true)
add(alignmentlink, isIntermediate=true)
protected def addJobs(): Unit = {
addLibsJobs()
val libraryBamfiles = libraries.map(_._2.mapping.finalBamFile).toList
val bamFile: File = if (libraryBamfiles.size == 1) {
val alignmentlink = Ln(qscript, libraryBamfiles.head,
alignmentDir + sampleId + ".merged.bam", true)
alignmentlink.isIntermediate = true
add(alignmentlink)
alignmentlink.out
} else if (libraryBamfiles.size > 1) {
val mergeSamFiles = new SambambaMerge(this)
val mergeSamFiles = new SambambaMerge(qscript)
mergeSamFiles.input = libraryBamfiles
mergeSamFiles.output = alignmentDir + sampleID + ".merged.bam"
add(mergeSamFiles, isIntermediate=true)
mergeSamFiles.output = sampleDir + sampleId + ".merged.bam"
mergeSamFiles.isIntermediate = true
add(mergeSamFiles)
mergeSamFiles.output
} else null
val bamMarkDup = SambambaMarkdup(this, bamFile)
add(bamMarkDup)
val bamMarkDup = SambambaMarkdup(qscript, bamFile)
add(bamMarkDup)
addAll(BamMetrics(this, bamMarkDup.output, alignmentDir + "metrics/").functions)
addAll(BamMetrics(qscript, bamMarkDup.output, alignmentDir + "metrics" + File.separator).functions)
// create an IGV TDF file
val tdfCount = IGVToolsCount(this, bamMarkDup.output, config("genomename", default = "hg19"))
add(tdfCount)
// create an IGV TDF file
val tdfCount = IGVToolsCount(qscript, bamMarkDup.output, config("genome_name", default = "hg19"))
add(tdfCount)
/// bamfile will be used as input for the SV callers. First run Clever
// val cleverVCF : File = sampleDir + "/" + sampleID + ".clever.vcf"
/// bamfile will be used as input for the SV callers. First run Clever
// val cleverVCF : File = sampleDir + "/" + sampleID + ".clever.vcf"
val cleverDir = svcallingDir + sampleID + ".clever/"
val clever = CleverCaller(this, bamMarkDup.output, this.reference, svcallingDir, cleverDir)
sampleOutput.vcf += ("clever" -> List(clever.outputvcf))
add(clever)
val cleverDir = svcallingDir + sampleId + ".clever/"
val clever = CleverCaller(qscript, bamMarkDup.output, qscript.reference, svcallingDir, cleverDir)
add(clever)
val clever_vcf = Ln(this, clever.outputvcf, svcallingDir + sampleID + ".clever.vcf", relative = true)
add(clever_vcf)
val clever_vcf = Ln(qscript, clever.outputvcf, svcallingDir + sampleId + ".clever.vcf", relative = true)
add(clever_vcf)
val breakdancerDir = svcallingDir + sampleID + ".breakdancer/"
val breakdancer = Breakdancer(this, bamMarkDup.output, this.reference, breakdancerDir)
sampleOutput.vcf += ("breakdancer" -> List(breakdancer.outputvcf))
addAll(breakdancer.functions)
val breakdancerDir = svcallingDir + sampleId + ".breakdancer/"
val breakdancer = Breakdancer(qscript, bamMarkDup.output, qscript.reference, breakdancerDir)
addAll(breakdancer.functions)
val bd_vcf = Ln(this, breakdancer.outputvcf, svcallingDir + sampleID + ".breakdancer.vcf", relative = true)
add(bd_vcf)
val bd_vcf = Ln(qscript, breakdancer.outputvcf, svcallingDir + sampleId + ".breakdancer.vcf", relative = true)
add(bd_vcf)
val dellyDir = svcallingDir + sampleID + ".delly/"
val delly = Delly(this, bamMarkDup.output, dellyDir)
sampleOutput.vcf += ("delly" -> List(delly.outputvcf))
addAll(delly.functions)
val dellyDir = svcallingDir + sampleId + ".delly/"
val delly = Delly(qscript, bamMarkDup.output, dellyDir)
addAll(delly.functions)
val delly_vcf = Ln(qscript, delly.outputvcf, svcallingDir + sampleId + ".delly.vcf", relative = true)
add(delly_vcf)
// for pindel we should use per library config collected into one config file
// val pindelDir = svcallingDir + sampleID + ".pindel/"
// val pindel = Pindel(qscript, analysisBam, this.reference, pindelDir)
// sampleOutput.vcf += ("pindel" -> List(pindel.outputvcf))
// addAll(pindel.functions)
//
// val pindel_vcf = Ln(qscript, pindel.outputvcf, svcallingDir + sampleID + ".pindel.vcf", relative = true)
// add(pindel_vcf)
//
}
}
val delly_vcf = Ln(this, delly.outputvcf, svcallingDir + sampleID + ".delly.vcf", relative = true)
add(delly_vcf)
def init() {
}
// for pindel we should use per library config collected into one config file
// val pindelDir = svcallingDir + sampleID + ".pindel/"
// val pindel = Pindel(this, analysisBam, this.reference, pindelDir)
// sampleOutput.vcf += ("pindel" -> List(pindel.outputvcf))
// addAll(pindel.functions)
//
// val pindel_vcf = Ln(this, pindel.outputvcf, svcallingDir + sampleID + ".pindel.vcf", relative = true)
// add(pindel_vcf)
//
return sampleOutput
def biopetScript() {
logger.info("Starting YAM SV Pipeline")
addSamplesJobs
}
// Called for each run from a sample
def runSingleLibraryJobs(libraryId: String, sampleID: String): LibraryOutput = {
val libraryOutput = new LibraryOutput
val alignmentDir: String = outputDir + sampleID + "/alignment/"
val runDir: String = alignmentDir + "run_" + libraryId + "/"
if (config.contains("R1")) {
val mapping = new Mapping(this)
// TODO: check and test config[aligner] in json
// yamsvp/aligner -> value
// this setting causes error if not defined?
mapping.aligner = config("aligner", default = "bwa")
mapping.skipFlexiprep = false
mapping.skipMarkduplicates = true // we do the dedup marking using Sambamba
mapping.input_R1 = config("R1")
mapping.input_R2 = config("R2")
mapping.paired = (mapping.input_R2 != null)
mapping.RGLB = libraryId
mapping.RGSM = sampleID
mapping.RGPL = config("PL")
mapping.RGPU = config("PU")
mapping.RGCN = config("CN")
mapping.outputDir = runDir
mapping.init
mapping.biopetScript
addAll(mapping.functions)
// start sambamba dedup
libraryOutput.mappedBamFile = mapping.outputFiles("finalBamFile")
} else this.logger.error("Sample: " + sampleID + ": No R1 found for library: " + libraryId)
return libraryOutput
// logger.debug(outputFiles)
// return outputFiles
override def onExecutionDone(jobs: Map[QFunction, JobRunInfo], success: Boolean) {
logger.info("YAM SV Pipeline has run .......................")
}
*/
}
object Yamsvp extends PipelineCommand
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment