Commit 348a42c4 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Changed multisample trait

parent 6b28d218
package nl.lumc.sasc.biopet.core
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
trait MultiSampleQScript extends BiopetQScript {
var samples: Map[String, Any] = Map()
type LibraryOutput <: AbstractLibraryOutput
type SampleOutput[LibraryOutput] <: AbstractSampleOutput[LibraryOutput]
abstract class AbstractLibraryOutput
abstract class AbstractSampleOutput[LibraryOutput] {
protected[MultiSampleQScript] var libraries: Map[String, LibraryOutput] = Map()
def getAllLibraries = libraries
def getLibrary(key:String) = libraries(key)
}
var samplesConfig: Map[String, Any] = Map()
var samplesOutput: Map[String, SampleOutput[LibraryOutput]] = Map()
def globalSampleDir: String = outputDir + "samples/"
final def runSamplesJobs: Map[String, Map[String, File]] = {
var output: Map[String, Map[String, File]] = Map()
samples = config("samples")
if (samples == null) samples = Map()
if (globalConfig.contains("samples")) for ((key, value) <- samples) {
final def runSamplesJobs() {
samplesConfig = config("samples")
if (samplesConfig == null) samplesConfig = Map()
if (globalConfig.contains("samples")) for ((key, value) <- samplesConfig) {
var sample = Configurable.any2map(value)
if (!sample.contains("ID")) sample += ("ID" -> key)
if (sample("ID") == key) {
var files: Map[String, List[File]] = runSingleSampleJobs(sample)
} else logger.warn("Key is not the same as ID on value for sample")
val output = runSingleSampleJobs(sample)
if (samplesOutput.contains(key)) output.libraries = samplesOutput(key).libraries
samplesOutput += key -> output
}
else logger.warn("Key is not the same as ID on value for sample")
}
else logger.warn("No Samples found in config")
return output
}
def runSingleSampleJobs(sampleConfig: Map[String, Any]): Map[String, List[File]]
def runSingleSampleJobs(sample: String): Map[String, List[File]] = {
return runSingleSampleJobs(Configurable.any2map(samples(sample)))
def runSingleSampleJobs(sampleConfig: Map[String, Any]): SampleOutput[LibraryOutput]
def runSingleSampleJobs(sample: String): SampleOutput[LibraryOutput] = {
return runSingleSampleJobs(Configurable.any2map(samplesConfig(sample)))
}
final def runLibraryJobs(sampleConfig: Map[String, Any]): Map[String, Map[String, File]] = {
var output: Map[String, Map[String, File]] = Map()
final def runLibraryJobs(sampleConfig: Map[String, Any]): Map[String, LibraryOutput] = {
var output: Map[String, LibraryOutput] = Map()
val sampleID = sampleConfig("ID")
if (sampleConfig.contains("libraries")) {
val runs = Configurable.any2map(sampleConfig("libraries"))
......@@ -37,10 +48,11 @@ trait MultiSampleQScript extends BiopetQScript {
if (!library.contains("ID")) library += ("ID" -> key)
if (library("ID") == key) {
output += key -> runSingleLibraryJobs(library, sampleConfig)
} else logger.warn("Key is not the same as ID on value for run of sample: " + sampleID)
}
else logger.warn("Key is not the same as ID on value for run of sample: " + sampleID)
}
} else logger.warn("No runs found in config for sample: " + sampleID)
return output
}
def runSingleLibraryJobs(runConfig: Map[String, Any], sampleConfig: Map[String, Any]): Map[String, File]
def runSingleLibraryJobs(runConfig: Map[String, Any], sampleConfig: Map[String, Any]): LibraryOutput
}
......@@ -29,6 +29,15 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
var gvcfFiles: List[File] = Nil
var finalBamFiles: List[File] = Nil
class LibraryOutput extends AbstractLibraryOutput {
var mappedBamFile: File = _
var preProcesBamFile: File = _
}
class SampleOutput[LibraryOutput] extends AbstractSampleOutput[LibraryOutput] {
}
def init() {
reference = config("reference", required = true)
dbsnp = config("dbsnp")
......@@ -45,7 +54,7 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
def biopetScript() {
if (onlySample.isEmpty) {
runSamplesJobs
//SampleWide jobs
if (mergeGvcfs && gvcfFiles.size > 0) {
val newFile = outputDir + "merged.gvcf.vcf"
......@@ -76,14 +85,14 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
}
// Called for each sample
def runSingleSampleJobs(sampleConfig: Map[String, Any]): Map[String, List[File]] = {
var outputFiles: Map[String, List[File]] = Map()
def runSingleSampleJobs(sampleConfig: Map[String, Any]): SampleOutput[LibraryOutput] = {
val sampleOutput = new SampleOutput[LibraryOutput]
var libraryBamfiles: List[File] = List()
var sampleID: String = sampleConfig("ID").toString
for ((library, libraryFiles) <- runLibraryJobs(sampleConfig)) {
libraryBamfiles +:= libraryFiles("FinalBam")
libraryBamfiles +:= libraryFiles.preProcesBamFile
}
outputFiles += ("final_bam" -> libraryBamfiles)
//outputFiles += ("final_bam" -> libraryBamfiles)
if (libraryBamfiles.size > 0) {
finalBamFiles ++= libraryBamfiles
......@@ -98,12 +107,12 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
addAll(gatkVariantcalling.functions)
gvcfFiles :+= gatkVariantcalling.outputFile
} else logger.warn("No bamfiles for variant calling for sample: " + sampleID)
return outputFiles
return sampleOutput
}
// Called for each run from a sample
def runSingleLibraryJobs(runConfig: Map[String, Any], sampleConfig: Map[String, Any]): Map[String, File] = {
var outputFiles: Map[String, File] = Map()
def runSingleLibraryJobs(runConfig: Map[String, Any], sampleConfig: Map[String, Any]): LibraryOutput = {
val libraryOutput = new LibraryOutput
val runID: String = runConfig("ID").toString
val sampleID: String = sampleConfig("ID").toString
val runDir: String = globalSampleDir + sampleID + "/run_" + runID + "/"
......@@ -113,8 +122,7 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
if (runConfig.contains("R1")) {
val mapping = Mapping.loadFromLibraryConfig(this, runConfig, sampleConfig, runDir)
addAll(mapping.functions) // Add functions of mapping to curent function pool
outputFiles += ("mapped_bam" -> mapping.outputFiles("finalBamFile"))
libraryOutput.mappedBamFile = mapping.outputFiles("finalBamFile")
} else if (runConfig.contains("bam")) {
var bamFile = new File(runConfig("bam").toString)
if (!bamFile.exists) throw new IllegalStateException("Bam in config does not exist, file: " + bamFile)
......@@ -147,20 +155,20 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
"\nPossible to set 'correct_readgroups' to true on config to automatic fix this")
}
outputFiles += ("mapped_bam" -> bamFile)
libraryOutput.mappedBamFile = bamFile
} else logger.error("Sample: " + sampleID + ": No R1 found for run: " + runConfig)
val gatkVariantcalling = new GatkVariantcalling(this)
gatkVariantcalling.inputBams = List(outputFiles("mapped_bam"))
gatkVariantcalling.inputBams = List(libraryOutput.mappedBamFile)
gatkVariantcalling.outputDir = runDir
gatkVariantcalling.variantcalling = config("library_variantcalling", default = false)
gatkVariantcalling.preProcesBams = true
gatkVariantcalling.init
gatkVariantcalling.biopetScript
addAll(gatkVariantcalling.functions)
outputFiles += "final_bam" -> gatkVariantcalling.outputFiles("final_bam")
libraryOutput.preProcesBamFile = gatkVariantcalling.outputFiles("final_bam")
return outputFiles
return libraryOutput
}
}
......
......@@ -35,6 +35,15 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript {
)
)
class LibraryOutput extends AbstractLibraryOutput {
var mappedBamFile: File = _
var prefixFastq: File = _
}
class SampleOutput[LibraryOutput] extends AbstractSampleOutput[LibraryOutput] {
}
def init() {
if (!outputDir.endsWith("/")) outputDir += "/"
if (countBed == null) countBed = config("count_bed")
......@@ -69,15 +78,15 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript {
}
// Called for each sample
def runSingleSampleJobs(sampleConfig: Map[String, Any]): Map[String, List[File]] = {
var outputFiles: Map[String, List[File]] = Map()
def runSingleSampleJobs(sampleConfig: Map[String, Any]): SampleOutput[LibraryOutput] = {
val sampleOutput = new SampleOutput[LibraryOutput]
var libraryBamfiles: List[File] = List()
var libraryFastqFiles: List[File] = List()
val sampleID: String = sampleConfig("ID").toString
val sampleDir: String = globalSampleDir + sampleID + "/"
for ((library, libraryFiles) <- runLibraryJobs(sampleConfig)) {
libraryFastqFiles +:= libraryFiles("prefix_fastq")
libraryBamfiles +:= libraryFiles("FinalBam")
libraryFastqFiles +:= libraryFiles.prefixFastq
libraryBamfiles +:= libraryFiles.mappedBamFile
}
val bamFile: File = if (libraryBamfiles.size == 1) libraryBamfiles.head
......@@ -96,12 +105,12 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript {
addBedtoolsCounts(bamFile, sampleID, sampleDir)
addTablibCounts(fastqFile, sampleID, sampleDir)
return outputFiles
return sampleOutput
}
// Called for each run from a sample
def runSingleLibraryJobs(runConfig: Map[String, Any], sampleConfig: Map[String, Any]): Map[String, File] = {
var outputFiles: Map[String, File] = Map()
def runSingleLibraryJobs(runConfig: Map[String, Any], sampleConfig: Map[String, Any]): LibraryOutput = {
val libraryOutput = new LibraryOutput
val runID: String = runConfig("ID").toString
val sampleID: String = sampleConfig("ID").toString
val runDir: String = globalSampleDir + sampleID + "/run_" + runID + "/"
......@@ -122,7 +131,7 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript {
prefixFastq.prefix = config("sage_tag", default = "CATG")
prefixFastq.deps +:= flexiprep.outputFiles("fastq_input_R1")
add(prefixFastq)
outputFiles += ("prefix_fastq" -> prefixFastq.output)
libraryOutput.prefixFastq = prefixFastq.output
val mapping = new Mapping(this)
mapping.skipFlexiprep = true
......@@ -144,9 +153,9 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript {
addTablibCounts(prefixFastq.output, sampleID + "-" + runID, runDir)
}
outputFiles += ("FinalBam" -> mapping.outputFiles("finalBamFile"))
libraryOutput.mappedBamFile = mapping.outputFiles("finalBamFile")
} else this.logger.error("Sample: " + sampleID + ": No R1 found for run: " + runConfig)
return outputFiles
return libraryOutput
}
def addBedtoolsCounts(bamFile:File, outputPrefix: String, outputDir: String) {
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment