Commit 33e6ef14 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Changing pipeline to improved multisampleQScript

parent a5fb226c
......@@ -116,12 +116,11 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
}
// Called for each sample
def runSingleSampleJobs(sampleConfig: Map[String, Any]): SampleOutput = {
def runSingleSampleJobs(sampleID: String): SampleOutput = {
val sampleOutput = new SampleOutput
val sampleID: String = sampleConfig("ID").toString
val sampleDir = globalSampleDir + sampleID + "/"
sampleOutput.libraries = runLibraryJobs(sampleConfig)
sampleOutput.libraries = runLibraryJobs(sampleID)
sampleOutput.output = addGenerateFasta(sampleID, sampleDir)
sampleOutput.outputSnps = addGenerateFasta(sampleID, sampleDir, snpsOnly = true)
......@@ -130,12 +129,10 @@ class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
}
// Called for each run from a sample
def runSingleLibraryJobs(runConfig: Map[String, Any], sampleConfig: Map[String, Any]): LibraryOutput = {
def runSingleLibraryJobs(libraryID: String, sampleID: String): LibraryOutput = {
val libraryOutput = new LibraryOutput
val runID: String = runConfig("ID").toString
val sampleID: String = sampleConfig("ID").toString
val runDir: String = globalSampleDir + sampleID + "/run_" + runID + "/"
val runDir: String = globalSampleDir + sampleID + "/run_" + libraryID + "/"
return libraryOutput
}
......
......@@ -23,9 +23,6 @@ import org.broadinstitute.gatk.utils.commandline.{ Argument }
class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScript {
def this() = this(null)
@Argument(doc = "Only Sample", shortName = "sample", required = false)
val onlySample: List[String] = Nil
@Argument(doc = "Skip Genotyping step", shortName = "skipgenotyping", required = false)
var skipGenotyping: Boolean = false
......@@ -68,73 +65,70 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
}
def biopetScript() {
if (onlySample.isEmpty) {
runSamplesJobs
//SampleWide jobs
if (mergeGvcfs && gvcfFiles.size > 0) {
val newFile = outputDir + "merged.gvcf.vcf.gz"
add(CombineGVCFs(this, gvcfFiles, newFile))
gvcfFiles = List(newFile)
}
runSamplesJobs
if (!skipGenotyping && gvcfFiles.size > 0) {
if (jointGenotyping) {
val gatkGenotyping = new GatkGenotyping(this)
gatkGenotyping.inputGvcfs = gvcfFiles
gatkGenotyping.outputDir = outputDir + "genotyping/"
gatkGenotyping.init
gatkGenotyping.biopetScript
addAll(gatkGenotyping.functions)
var vcfFile = gatkGenotyping.outputFile
}
} else logger.warn("No gVCFs to genotype")
if (jointVariantcalling) {
val allBamfiles = for (
(sampleID, sampleOutput) <- samplesOutput;
file <- sampleOutput.variantcalling.bamFiles
) yield file
val allRawVcfFiles = for ((sampleID, sampleOutput) <- samplesOutput) yield sampleOutput.variantcalling.rawFilterVcfFile
val gatkVariantcalling = new GatkVariantcalling(this) {
override def configName = "gatkvariantcalling"
override def configPath: List[String] = super.configPath ::: "multisample" :: Nil
}
//SampleWide jobs
if (mergeGvcfs && gvcfFiles.size > 0) {
val newFile = outputDir + "merged.gvcf.vcf.gz"
add(CombineGVCFs(this, gvcfFiles, newFile))
gvcfFiles = List(newFile)
}
if (gatkVariantcalling.useMpileup) {
val cvRaw = CombineVariants(this, allRawVcfFiles.toList, outputDir + "variantcalling/multisample.raw.vcf.gz")
add(cvRaw)
gatkVariantcalling.rawVcfInput = cvRaw.out
}
if (!skipGenotyping && gvcfFiles.size > 0) {
if (jointGenotyping) {
val gatkGenotyping = new GatkGenotyping(this)
gatkGenotyping.inputGvcfs = gvcfFiles
gatkGenotyping.outputDir = outputDir + "genotyping/"
gatkGenotyping.init
gatkGenotyping.biopetScript
addAll(gatkGenotyping.functions)
var vcfFile = gatkGenotyping.outputFile
}
} else logger.warn("No gVCFs to genotype")
if (jointVariantcalling) {
val allBamfiles = for (
(sampleID, sampleOutput) <- samplesOutput;
file <- sampleOutput.variantcalling.bamFiles
) yield file
val allRawVcfFiles = for ((sampleID, sampleOutput) <- samplesOutput) yield sampleOutput.variantcalling.rawFilterVcfFile
val gatkVariantcalling = new GatkVariantcalling(this) {
override def configName = "gatkvariantcalling"
override def configPath: List[String] = super.configPath ::: "multisample" :: Nil
}
multisampleVariantcalling.preProcesBams = false
multisampleVariantcalling.doublePreProces = false
multisampleVariantcalling.inputBams = allBamfiles.toList
multisampleVariantcalling.outputDir = outputDir + "variantcalling"
multisampleVariantcalling.outputName = "multisample"
multisampleVariantcalling.init
multisampleVariantcalling.biopetScript
addAll(multisampleVariantcalling.functions)
if (config("inputtype", default = "dna").asString != "rna" && config("recalibration", default = false).asBoolean) {
val recalibration = new GatkVariantRecalibration(this)
recalibration.inputVcf = multisampleVariantcalling.scriptOutput.finalVcfFile
recalibration.bamFiles = finalBamFiles
recalibration.outputDir = outputDir + "recalibration/"
recalibration.init
recalibration.biopetScript
}
if (gatkVariantcalling.useMpileup) {
val cvRaw = CombineVariants(this, allRawVcfFiles.toList, outputDir + "variantcalling/multisample.raw.vcf.gz")
add(cvRaw)
gatkVariantcalling.rawVcfInput = cvRaw.out
}
} else for (sample <- onlySample) runSingleSampleJobs(sample)
multisampleVariantcalling.preProcesBams = false
multisampleVariantcalling.doublePreProces = false
multisampleVariantcalling.inputBams = allBamfiles.toList
multisampleVariantcalling.outputDir = outputDir + "variantcalling"
multisampleVariantcalling.outputName = "multisample"
multisampleVariantcalling.init
multisampleVariantcalling.biopetScript
addAll(multisampleVariantcalling.functions)
if (config("inputtype", default = "dna").asString != "rna" && config("recalibration", default = false).asBoolean) {
val recalibration = new GatkVariantRecalibration(this)
recalibration.inputVcf = multisampleVariantcalling.scriptOutput.finalVcfFile
recalibration.bamFiles = finalBamFiles
recalibration.outputDir = outputDir + "recalibration/"
recalibration.init
recalibration.biopetScript
}
}
}
// Called for each sample
def runSingleSampleJobs(sampleConfig: Map[String, Any]): SampleOutput = {
def runSingleSampleJobs(sampleID: String): SampleOutput = {
val sampleOutput = new SampleOutput
var libraryBamfiles: List[File] = List()
val sampleID: String = getCurrentSample
sampleOutput.libraries = runLibraryJobs(sampleConfig)
sampleOutput.libraries = runLibraryJobs(sampleID)
val sampleDir = globalSampleDir + sampleID
for ((libraryID, libraryOutput) <- sampleOutput.libraries) {
libraryBamfiles ++= libraryOutput.variantcalling.bamFiles
......@@ -161,11 +155,9 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
}
// Called for each run from a sample
def runSingleLibraryJobs(runConfig: Map[String, Any], sampleConfig: Map[String, Any]): LibraryOutput = {
def runSingleLibraryJobs(libraryId: String, sampleID: String): LibraryOutput = {
val libraryOutput = new LibraryOutput
val runID: String = getCurrentLibrary
val sampleID: String = getCurrentSample
val runDir: String = globalSampleDir + sampleID + "/run_" + runID + "/"
val runDir: String = globalSampleDir + sampleID + "/run_" + libraryId + "/"
var inputType: String = config("inputtype", default = "dna")
def loadFromLibraryConfig(startJobs: Boolean = true): Mapping = {
......@@ -173,7 +165,7 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
mapping.input_R1 = config("R1")
mapping.input_R2 = config("R2")
mapping.RGLB = runID
mapping.RGLB = libraryId
mapping.RGSM = sampleID
mapping.RGPL = config("PL")
mapping.RGPU = config("PU")
......@@ -196,8 +188,8 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
if (!bamFile.exists) throw new IllegalStateException("Bam in config does not exist, file: " + bamFile)
if (config("bam_to_fastq", default = false).asBoolean) {
val samToFastq = SamToFastq(this, bamFile, runDir + sampleID + "-" + runID + ".R1.fastq",
runDir + sampleID + "-" + runID + ".R2.fastq")
val samToFastq = SamToFastq(this, bamFile, runDir + sampleID + "-" + libraryId + ".R1.fastq",
runDir + sampleID + "-" + libraryId + ".R2.fastq")
add(samToFastq, isIntermediate = true)
val mapping = loadFromLibraryConfig(startJobs = false)
mapping.input_R1 = samToFastq.fastqR1
......@@ -212,17 +204,17 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
val header = inputSam.getFileHeader.getReadGroups
for (readGroup <- inputSam.getFileHeader.getReadGroups) {
if (readGroup.getSample != sampleID) logger.warn("Sample ID readgroup in bam file is not the same")
if (readGroup.getLibrary != runID) logger.warn("Library ID readgroup in bam file is not the same")
if (readGroup.getSample != sampleID || readGroup.getLibrary != runID) readGroupOke = false
if (readGroup.getLibrary != libraryId) logger.warn("Library ID readgroup in bam file is not the same")
if (readGroup.getSample != sampleID || readGroup.getLibrary != libraryId) readGroupOke = false
}
inputSam.close
if (!readGroupOke) {
if (config("correct_readgroups", default = false)) {
logger.info("Correcting readgroups, file:" + bamFile)
val aorrg = AddOrReplaceReadGroups(this, bamFile, new File(runDir + sampleID + "-" + runID + ".bam"))
aorrg.RGID = sampleID + "-" + runID
aorrg.RGLB = runID
val aorrg = AddOrReplaceReadGroups(this, bamFile, new File(runDir + sampleID + "-" + libraryId + ".bam"))
aorrg.RGID = sampleID + "-" + libraryId
aorrg.RGLB = libraryId
aorrg.RGSM = sampleID
aorrg.RGPL = config("PL", default = "illumina")
aorrg.RGPU = config("PU", default = "na")
......@@ -237,7 +229,7 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri
libraryOutput.mappedBamFile = bamFile
}
} else {
logger.error("Sample: " + sampleID + ": No R1 found for run: " + runID)
logger.error("Sample: " + sampleID + ": No R1 found for run: " + libraryId)
return libraryOutput
}
......
......@@ -92,13 +92,12 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript {
}
// Called for each sample
def runSingleSampleJobs(sampleConfig: Map[String, Any]): SampleOutput = {
def runSingleSampleJobs(sampleID: String): SampleOutput = {
val sampleOutput = new SampleOutput
var libraryBamfiles: List[File] = List()
var libraryFastqFiles: List[File] = List()
val sampleID: String = sampleConfig("ID").toString
val sampleDir: String = globalSampleDir + sampleID + "/"
for ((library, libraryFiles) <- runLibraryJobs(sampleConfig)) {
for ((library, libraryFiles) <- runLibraryJobs(sampleID)) {
libraryFastqFiles +:= libraryFiles.prefixFastq
libraryBamfiles +:= libraryFiles.mappedBamFile
}
......@@ -123,19 +122,17 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript {
}
// Called for each run from a sample
def runSingleLibraryJobs(runConfig: Map[String, Any], sampleConfig: Map[String, Any]): LibraryOutput = {
def runSingleLibraryJobs(libraryID: String, sampleID: String): LibraryOutput = {
val libraryOutput = new LibraryOutput
val runID: String = runConfig("ID").toString
val sampleID: String = sampleConfig("ID").toString
val runDir: String = globalSampleDir + sampleID + "/run_" + runID + "/"
if (runConfig.contains("R1")) {
val runDir: String = globalSampleDir + sampleID + "/run_" + libraryID + "/"
if (config.contains("R1")) {
val flexiprep = new Flexiprep(this)
flexiprep.outputDir = runDir + "flexiprep/"
flexiprep.input_R1 = new File(runConfig("R1").toString)
flexiprep.input_R1 = config("R1")
flexiprep.skipClip = true
flexiprep.skipTrim = true
flexiprep.sampleName = sampleID
flexiprep.libraryName = runID
flexiprep.libraryName = libraryID
flexiprep.init
flexiprep.biopetScript
addAll(flexiprep.functions)
......@@ -152,23 +149,23 @@ class Sage(val root: Configurable) extends QScript with MultiSampleQScript {
mapping.skipMarkduplicates = true
mapping.aligner = config("aligner", default = "bowtie")
mapping.input_R1 = prefixFastq.outputFastq
mapping.RGLB = runConfig("ID").toString
mapping.RGSM = sampleConfig("ID").toString
if (runConfig.contains("PL")) mapping.RGPL = runConfig("PL").toString
if (runConfig.contains("PU")) mapping.RGPU = runConfig("PU").toString
if (runConfig.contains("CN")) mapping.RGCN = runConfig("CN").toString
mapping.RGLB = libraryID
mapping.RGSM = sampleID
mapping.RGPL = config("PL")
mapping.RGPU = config("PU")
mapping.RGCN = config("CN")
mapping.outputDir = runDir
mapping.init
mapping.biopetScript
addAll(mapping.functions)
if (config("library_counts", default = false).asBoolean) {
addBedtoolsCounts(mapping.outputFiles("finalBamFile"), sampleID + "-" + runID, runDir)
addTablibCounts(prefixFastq.outputFastq, sampleID + "-" + runID, runDir)
addBedtoolsCounts(mapping.outputFiles("finalBamFile"), sampleID + "-" + libraryID, runDir)
addTablibCounts(prefixFastq.outputFastq, sampleID + "-" + libraryID, runDir)
}
libraryOutput.mappedBamFile = mapping.outputFiles("finalBamFile")
} else this.logger.error("Sample: " + sampleID + ": No R1 found for run: " + runConfig)
} else this.logger.error("Sample: " + sampleID + ": No R1 found for library: " + libraryID)
return libraryOutput
}
......
......@@ -70,18 +70,17 @@ class Yamsvp(val root: Configurable) extends QScript with MultiSampleQScript {
logger.info("YAM SV Pipeline has run .......................")
}
def runSingleSampleJobs(sampleConfig: Map[String, Any]): SampleOutput = {
def runSingleSampleJobs(sampleID: String): SampleOutput = {
val sampleOutput = new SampleOutput
var libraryBamfiles: List[File] = List()
var outputFiles: Map[String, List[File]] = Map()
var libraryFastqFiles: List[File] = List()
val sampleID: String = sampleConfig("ID").toString
val sampleDir: String = outputDir + sampleID + "/"
val alignmentDir: String = sampleDir + "alignment/"
val svcallingDir: String = sampleDir + "svcalls/"
sampleOutput.libraries = runLibraryJobs(sampleConfig)
sampleOutput.libraries = runLibraryJobs(sampleID)
for ((libraryID, libraryOutput) <- sampleOutput.libraries) {
// this is extending the libraryBamfiles list like '~=' in D or .append in Python or .push_back in C++
libraryBamfiles ++= List(libraryOutput.mappedBamFile)
......@@ -126,29 +125,27 @@ class Yamsvp(val root: Configurable) extends QScript with MultiSampleQScript {
// Called for each run from a sample
def runSingleLibraryJobs(runConfig: Map[String, Any], sampleConfig: Map[String, Any]): LibraryOutput = {
def runSingleLibraryJobs(libraryId: String, sampleID: String): LibraryOutput = {
val libraryOutput = new LibraryOutput
val runID: String = runConfig("ID").toString
val sampleID: String = sampleConfig("ID").toString
val alignmentDir: String = outputDir + sampleID + "/alignment/"
val runDir: String = alignmentDir + "run_" + runID + "/"
val runDir: String = alignmentDir + "run_" + libraryId + "/"
if (runConfig.contains("R1")) {
if (config.contains("R1")) {
val mapping = new Mapping(this)
mapping.aligner = config("aligner", default = "stampy")
mapping.skipFlexiprep = false
mapping.skipMarkduplicates = true // we do the dedup marking using Sambamba
if (runConfig.contains("R1")) mapping.input_R1 = new File(runConfig("R1").toString)
if (runConfig.contains("R2")) mapping.input_R2 = new File(runConfig("R2").toString)
mapping.input_R1 = config("R1")
mapping.input_R2 = config("R2")
mapping.paired = (mapping.input_R2 != null)
mapping.RGLB = runConfig("ID").toString
mapping.RGSM = sampleConfig("ID").toString
if (runConfig.contains("PL")) mapping.RGPL = runConfig("PL").toString
if (runConfig.contains("PU")) mapping.RGPU = runConfig("PU").toString
if (runConfig.contains("CN")) mapping.RGCN = runConfig("CN").toString
mapping.RGLB = libraryId
mapping.RGSM = sampleID
mapping.RGPL = config("PL")
mapping.RGPU = config("PU")
mapping.RGCN = config("CN")
mapping.outputDir = runDir
mapping.init
......@@ -158,7 +155,7 @@ class Yamsvp(val root: Configurable) extends QScript with MultiSampleQScript {
// start sambamba dedup
libraryOutput.mappedBamFile = mapping.outputFiles("finalBamFile")
} else this.logger.error("Sample: " + sampleID + ": No R1 found for run: " + runConfig)
} else this.logger.error("Sample: " + sampleID + ": No R1 found for library: " + libraryId)
return libraryOutput
// logger.debug(outputFiles)
// return outputFiles
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment