Commit 764fcb73 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Merge remote-tracking branch 'remotes/origin/develop' into feature-singlefile

parents 8c52cdc2 725ed53a
......@@ -6,7 +6,7 @@ import nl.lumc.sasc.biopet.core.summary.Summarizable
import nl.lumc.sasc.biopet.core.{ Reference, ToolCommandFunction }
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.Input
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/**
* Created by pjvanthof on 18/11/2016.
......@@ -36,6 +36,9 @@ class BamStats(val root: Configurable) extends ToolCommandFunction with Referenc
}
}
@Output
private var outputFiles: List[File] = Nil
def bamstatsSummary: File = new File(outputDir, "bamstats.summary.json")
def flagstatSummaryFile(contig: Option[String] = None): File = getOutputFile("flagstats.summary.json", contig)
def mappingQualityFile(contig: Option[String] = None): File = getOutputFile("mapping_quality.tsv", contig)
......@@ -44,6 +47,10 @@ class BamStats(val root: Configurable) extends ToolCommandFunction with Referenc
override def beforeGraph() {
super.beforeGraph()
deps :+= new File(bamFile.getAbsolutePath.replaceAll(".bam$", ".bai"))
outputFiles :+= bamstatsSummary
outputFiles :+= flagstatSummaryFile()
outputFiles :+= mappingQualityFile()
outputFiles :+= clipingFile()
jobOutputFile = new File(outputDir, ".bamstats.out")
if (reference == null) reference = referenceFasta()
}
......
......@@ -58,6 +58,7 @@ All other values should be provided in the config. Specific config values toward
| readgroup_sequencing_center | String (optional) | Read group sequencing center |
| readgroup_description | String (optional) | Read group description |
| predicted_insertsize | Integer (optional) | Read group predicted insert size |
| keep_final_bam_file | Boolean (default true) | when needed the pipeline can remove the bam file after it's not required anymore for other jobs |
It is possible to provide any config value as a command line argument as well, using the `-cv` flag.
E.g. `-cv reference=<path/to/reference>` would set value `reference`.
......
......@@ -93,19 +93,23 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
/** Readgroup predicted insert size */
protected var predictedInsertsize: Option[Int] = config("predicted_insertsize")
val keepFinalBamFile: Boolean = config("keep_final_bam_file", default = true)
protected var paired: Boolean = false
val flexiprep = new Flexiprep(this)
def finalBamFile: File = new File(outputDir, outputName + ".final.bam")
def finalBamFile: File = if (skipMarkduplicates) {
new File(outputDir, outputName + ".bam")
} else new File(outputDir, outputName + ".dedup.bam")
/** location of summary file */
def summaryFile = new File(outputDir, sampleId.getOrElse("x") + "-" + libId.getOrElse("x") + ".summary.json")
override def defaults = Map(
override def defaults: Map[String, Any] = Map(
"gsnap" -> Map("batch" -> 4),
"star" -> Map("outsamunmapped" -> "Within")
)
override def fixedValues = Map(
override def fixedValues: Map[String, Any] = Map(
"gsnap" -> Map("format" -> "sam"),
"bowtie" -> Map("sam" -> true)
)
......@@ -255,11 +259,13 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
var bamFile = bamFiles.head
if (!skipMarkduplicates) {
bamFile = new File(outputDir, outputName + ".dedup.bam")
val md = MarkDuplicates(this, bamFiles, bamFile)
val md = MarkDuplicates(this, bamFiles, finalBamFile)
md.isIntermediate = !keepFinalBamFile
add(md)
addSummarizable(md, "mark_duplicates")
} else if (skipMarkduplicates && chunking) {
val mergeSamFile = MergeSamFiles(this, bamFiles, new File(outputDir, outputName + ".merge.bam"))
val mergeSamFile = MergeSamFiles(this, bamFiles, finalBamFile)
mergeSamFile.isIntermediate = !keepFinalBamFile
add(mergeSamFile)
bamFile = mergeSamFile.output
}
......@@ -270,9 +276,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
addSummaryQScript(bamMetrics)
}
add(Ln(this, swapExt(outputDir, bamFile, ".bam", ".bai"), swapExt(outputDir, finalBamFile, ".bam", ".bai")))
add(Ln(this, bamFile, finalBamFile))
outputFiles += ("finalBamFile" -> finalBamFile.getAbsoluteFile)
outputFiles += ("finalBamFile" -> finalBamFile)
if (config("unmapped_to_gears", default = false).asBoolean) {
val gears = new GearsSingle(this)
......@@ -331,7 +335,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
}
val sortSam = SortSam(this, samFile, output)
if (chunking || !skipMarkduplicates) sortSam.isIntermediate = true
sortSam.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
add(sortSam)
sortSam.output
}
......@@ -345,7 +349,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
val sortSam = new SortSam(this)
sortSam.output = output
val pipe = bwaCommand | sortSam
pipe.isIntermediate = chunking || !skipMarkduplicates
pipe.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
pipe.threadsCorrection = -1
add(pipe)
output
......@@ -363,6 +367,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
val ar = addAddOrReplaceReadGroups(reorderSam.output, output)
val pipe = new BiopetFifoPipe(this, gsnapCommand :: ar._1 :: reorderSam :: Nil)
pipe.threadsCorrection = -2
pipe.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
add(pipe)
ar._2
}
......@@ -386,7 +391,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
val sortSam = new SortSam(this)
sortSam.output = output
val pipe = hisat2 | sortSam
pipe.isIntermediate = chunking || !skipMarkduplicates
pipe.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
pipe.threadsCorrection = 1
add(pipe)
......@@ -430,9 +435,11 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
val reorderSam = new ReorderSam(this)
reorderSam.input = mergeSamFile.output
reorderSam.output = swapExt(output.getParent, output, ".merge.bam", ".reordered.bam")
reorderSam.isIntermediate = true
add(reorderSam)
val ar = addAddOrReplaceReadGroups(reorderSam.output, output)
ar._1.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
add(ar._1)
ar._2
}
......@@ -459,7 +466,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
stampyCmd.isIntermediate = true
add(stampyCmd)
val sortSam = SortSam(this, stampyCmd.output, output)
if (chunking || !skipMarkduplicates) sortSam.isIntermediate = true
sortSam.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
add(sortSam)
sortSam.output
}
......@@ -478,6 +485,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
val ar = addAddOrReplaceReadGroups(bowtie.output, output)
val pipe = new BiopetFifoPipe(this, (Some(bowtie) :: Some(ar._1) :: Nil).flatten)
pipe.threadsCorrection = -1
pipe.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
add(pipe)
ar._2
}
......@@ -495,7 +503,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
val sortSam = new SortSam(this)
sortSam.output = output
val pipe = bowtie2 | sortSam
pipe.isIntermediate = chunking || !skipMarkduplicates
pipe.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
pipe.threadsCorrection = -1
add(pipe)
output
......@@ -517,6 +525,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
pipe.threadsCorrection = -3
zcatR1._1.foreach(x => pipe.threadsCorrection -= 1)
zcatR2.foreach(_._1.foreach(x => pipe.threadsCorrection -= 1))
pipe.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
add(pipe)
reorderSam.output
}
......@@ -531,6 +540,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
val starCommand = Star._2pass(this, zcatR1._2, zcatR2.map(_._2), outputDir, isIntermediate = true)
addAll(starCommand._2)
val ar = addAddOrReplaceReadGroups(starCommand._1, output)
ar._1.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
add(ar._1)
ar._2
}
......@@ -547,7 +557,7 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
addOrReplaceReadGroups.RGSM = sampleId.get
if (readgroupSequencingCenter.isDefined) addOrReplaceReadGroups.RGCN = readgroupSequencingCenter.get
if (readgroupDescription.isDefined) addOrReplaceReadGroups.RGDS = readgroupDescription.get
if (!skipMarkduplicates) addOrReplaceReadGroups.isIntermediate = true
addOrReplaceReadGroups.isIntermediate = chunking || !skipMarkduplicates || !keepFinalBamFile
(addOrReplaceReadGroups, addOrReplaceReadGroups.output)
}
......
......@@ -83,10 +83,10 @@ trait MultisampleMappingTrait extends MultiSampleQScript
"merge_strategy" -> mergeStrategy.toString)
def makeSample(id: String) = new Sample(id)
class Sample(sampleId: String) extends AbstractSample(sampleId) {
class Sample(sampleId: String) extends AbstractSample(sampleId) { sample =>
def makeLibrary(id: String) = new Library(id)
class Library(libId: String) extends AbstractLibrary(libId) {
class Library(libId: String) extends AbstractLibrary(libId) { lib =>
/** By default the bams files are put in the summary, more files can be added here */
def summaryFiles: Map[String, File] = (inputR1.map("input_R1" -> _) :: inputR2.map("input_R2" -> _) ::
......@@ -101,22 +101,28 @@ trait MultisampleMappingTrait extends MultiSampleQScript
lazy val bamToFastq: Boolean = config("bam_to_fastq", default = false)
lazy val correctReadgroups: Boolean = config("correct_readgroups", default = false)
lazy val mapping = if (inputR1.isDefined || (inputBam.isDefined && bamToFastq)) {
val m = new Mapping(qscript)
def keepFinalBamfile = samples(sampleId).libraries.size == 1
lazy val mapping: Option[Mapping] = if (inputR1.isDefined || (inputBam.isDefined && bamToFastq)) {
val m: Mapping = new Mapping(qscript) {
override def configNamespace = "mapping"
override def defaults: Map[String, Any] = super.defaults ++
Map("keep_final_bamfile" -> keepFinalBamfile)
}
m.sampleId = Some(sampleId)
m.libId = Some(libId)
m.outputDir = libDir
Some(m)
} else None
def bamFile = mapping match {
def bamFile: Option[File] = mapping match {
case Some(m) => Some(m.finalBamFile)
case _ if inputBam.isDefined => Some(new File(libDir, s"$sampleId-$libId.bam"))
case _ => None
}
/** By default the preProcessBam is the same as the normal bamFile. A pipeline can extend this is there are preprocess steps */
def preProcessBam = bamFile
def preProcessBam: Option[File] = bamFile
/** This method can be extended to add jobs to the pipeline, to do this the super call of this function must be called by the pipelines */
def addJobs(): Unit = {
......
......@@ -86,6 +86,8 @@ class Shiva(val root: Configurable) extends QScript with MultisampleMappingTrait
c && br.knownSites.nonEmpty
}
override def keepFinalBamfile = super.keepFinalBamfile && !useIndelRealigner && !useBaseRecalibration
override def preProcessBam = if (useIndelRealigner && useBaseRecalibration)
bamFile.map(swapExt(libDir, _, ".bam", ".realign.baserecal.bam"))
else if (useIndelRealigner) bamFile.map(swapExt(libDir, _, ".bam", ".realign.bam"))
......@@ -179,7 +181,7 @@ class Shiva(val root: Configurable) extends QScript with MultisampleMappingTrait
override def addMultiSampleJobs() = {
super.addMultiSampleJobs()
addAll(dbsnpVcfFile.map(Shiva.makeValidateVcfJobs(this, _, referenceFasta())).getOrElse(Nil))
addAll(dbsnpVcfFile.map(Shiva.makeValidateVcfJobs(this, _, referenceFasta(), new File(outputDir, ".validate"))).getOrElse(Nil))
multisampleVariantCalling.foreach(vc => {
vc.outputDir = new File(outputDir, "variantcalling")
......@@ -262,16 +264,18 @@ object Shiva extends PipelineCommand {
// This is used to only execute 1 validation per vcf file
private var validateVcfSeen: Set[(File, File)] = Set()
def makeValidateVcfJobs(root: Configurable, vcfFile: File, referenceFile: File): List[QFunction] = {
def makeValidateVcfJobs(root: Configurable, vcfFile: File, referenceFile: File, outputDir: File): List[QFunction] = {
if (validateVcfSeen.contains((vcfFile, referenceFile))) Nil
else {
validateVcfSeen ++= Set((vcfFile, referenceFile))
val validateVcf = new ValidateVcf(root)
validateVcf.inputVcf = vcfFile
validateVcf.reference = referenceFile
validateVcf.jobOutputFile = new File(outputDir, vcfFile.getAbsolutePath + ".validateVcf.out")
val checkValidateVcf = new CheckValidateVcf
checkValidateVcf.inputLogFile = validateVcf.jobOutputFile
checkValidateVcf.jobOutputFile = new File(outputDir, vcfFile.getAbsolutePath + ".checkValidateVcf.out")
List(validateVcf, checkValidateVcf)
}
......
......@@ -85,7 +85,7 @@ class ShivaVariantcalling(val root: Configurable) extends QScript
require(inputBams.nonEmpty, "No input bams found")
require(callers.nonEmpty, "must select at least 1 variantcaller, choices are: " + callersList.map(_.name).mkString(", "))
addAll(dbsnpVcfFile.map(Shiva.makeValidateVcfJobs(this, _, referenceFasta())).getOrElse(Nil))
addAll(dbsnpVcfFile.map(Shiva.makeValidateVcfJobs(this, _, referenceFasta(), new File(outputDir, ".validate"))).getOrElse(Nil))
val cv = new CombineVariants(qscript)
cv.out = finalFile
......
......@@ -37,11 +37,11 @@ class HaploTypeCallerGvcfTest extends TestNGSuite with Matchers {
}
def createInputMap(samples: List[String]): Map[String, File] = {
samples map { x =>
samples.map({ x =>
val file = File.createTempFile(x, ".bam")
file.deleteOnExit()
x -> file
} toMap
}).toMap
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment