Commit 2dca0473 authored by rhpvorderman's avatar rhpvorderman

Merge remote-tracking branch 'origin' into fix-BIOPET-604 21-4-2017

parents f7855ebb 42e2ffe3
......@@ -98,7 +98,7 @@ class BastyTest extends TestNGSuite with Matchers {
val numberLibs = (if (sample1) 1 else 0) + (if (sample2) 2 else 0)
val numberSamples = (if (sample1) 1 else 0) + (if (sample2) 1 else 0)
pipeline.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (numberLibs + (if (sample2) 1 else 0))
pipeline.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (numberLibs + numberSamples)
// Gatk preprocess
pipeline.functions.count(_.isInstanceOf[IndelRealigner]) shouldBe (numberLibs * (if (realign) 1 else 0) + (if (sample2 && realign) 1 else 0))
......
......@@ -14,6 +14,7 @@ rownames(heat) <- heat[,1]
heat<- heat[,-1]
heat<- as.matrix(heat)
textMargin <- max(sapply(rownames(heat), nchar)) + 4
colNumber <- 50
col <- rev(colorRampPalette(brewer.pal(11, "Spectral"))(colNumber))
for (i in (colNumber+1):(colNumber+round((dist(range(heat)) - dist(range(heat[heat < 1]))) / dist(range(heat[heat < 1])) * colNumber))) {
......@@ -22,7 +23,7 @@ for (i in (colNumber+1):(colNumber+round((dist(range(heat)) - dist(range(heat[he
col[length(col)] <- "#00FF00"
png(file = outputArg, width = 1200, height = 1200)
heatmap.2(heat, trace = 'none', col = col, Colv=NA, Rowv=NA, dendrogram="none", margins = c(12, 12), na.color="#00FF00")
heatmap.2(heat, trace = 'none', col = col, Colv=NA, Rowv=NA, dendrogram="none", margins = c(textMargin, textMargin), na.color="#00FF00")
dev.off()
hc <- hclust(d = dist(heat))
......@@ -31,5 +32,5 @@ plot(as.dendrogram(hc), horiz=TRUE, asp=0.02)
dev.off()
png(file = outputArgClustering, width = 1200, height = 1200)
heatmap.2(heat, trace = 'none', col = col, Colv="Rowv", dendrogram="row",margins = c(12, 12), na.color="#00FF00")
heatmap.2(heat, trace = 'none', col = col, Colv="Rowv", dendrogram="row",margins = c(textMargin, textMargin), na.color="#00FF00")
dev.off()
......@@ -54,7 +54,7 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
}
writer.println("set -eubf")
writer.println("set -o pipefail")
writer.println(this.commandLine)
writer.println(lines.mkString("\n"))
jobDelayTime.foreach(x => writer.println(s"sleep $x"))
writer.close()
}
......@@ -99,7 +99,13 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
beforeGraph()
internalBeforeGraph()
if (jobOutputFile != null) this.commandDirectory = this.jobOutputFile.getAbsoluteFile.getParentFile
if (jobOutputFile != null) {
this.commandDirectory = this.jobOutputFile.getAbsoluteFile.getParentFile
this match {
case s: ScatterGatherableFunction => s.scatterGatherDirectory = new File(this.commandDirectory, ".scatter")
case _ =>
}
}
super.freezeFieldValues()
}
......
......@@ -56,6 +56,8 @@ class HaplotypeCaller(val parent: Configurable) extends CommandLineGATK with Sca
@Argument(fullName = "maxGGAARExtension", shortName = "maxGGAARExtension", doc = "the maximum extent into the full active region extension that we're willing to go in genotyping our events for GGA mode", required = false, exclusiveOf = "", validation = "")
var maxGGAARExtension: Option[Int] = config("maxGGAARExtension")
var useNewAFCalculator: Boolean = config("useNewAFCalculator", default = false)
/** Include at least this many bases around an event for calling indels */
@Argument(fullName = "paddingAroundIndels", shortName = "paddingAroundIndels", doc = "Include at least this many bases around an event for calling indels", required = false, exclusiveOf = "", validation = "")
var paddingAroundIndels: Option[Int] = config("paddingAroundIndels")
......@@ -441,6 +443,7 @@ class HaplotypeCaller(val parent: Configurable) extends CommandLineGATK with Sca
optional("-paddingAroundSNPs", paddingAroundSNPs, spaceSeparated = true, escape = true, format = "%s") +
repeat("-comp", comp, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") +
repeat("-A", annotation, spaceSeparated = true, escape = true, format = "%s") +
conditional(useNewAFCalculator, "--useNewAFCalculator") +
repeat("-XA", excludeAnnotation, spaceSeparated = true, escape = true, format = "%s") +
repeat("-G", group, spaceSeparated = true, escape = true, format = "%s") +
conditional(debug, "-debug", escape = true, format = "%s") +
......
......@@ -25,6 +25,6 @@ abstract class Sambamba extends BiopetCommandLineFunction with Version {
executable = config("exe", default = "sambamba", namespace = "sambamba", freeVar = false)
def versionCommand = executable
def versionRegex = """sambamba v(.*)""".r
def versionRegex = """sambamba v?(.*)""".r
override def versionExitcode = List(0, 1)
}
\ No newline at end of file
......@@ -21,7 +21,6 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/** Extension for sambemba markdup */
class SambambaMarkdup(val parent: Configurable) extends Sambamba {
override def defaultThreads = 4
@Input(doc = "Bam File")
var input: File = _
......@@ -32,17 +31,29 @@ class SambambaMarkdup(val parent: Configurable) extends Sambamba {
var removeDuplicates: Boolean = config("remove_duplicates", default = false)
// @doc: compression_level 6 is average, 0 = no compression, 9 = best
val compressionLevel: Option[Int] = config("compression_level", default = 6)
val hashTableSize: Option[Int] = config("hash-table-size", default = 262144)
val overflowListSize: Option[Int] = config("overflow-list-size", default = 200000)
val ioBufferSize: Option[Int] = config("io-buffer-size", default = 128)
val compressionLevel: Option[Int] = config("compression_level")
val hashTableSize: Option[Int] = config("hash-table-size")
val overflowListSize: Option[Int] = config("overflow-list-size")
val ioBufferSize: Option[Int] = config("io-buffer-size")
val showProgress: Boolean = config("show-progress", default = true)
override def defaultThreads = 4
override def defaultCoreMemory = 4.0
@Output
private var indexOutput: File = _
override def beforeGraph(): Unit = {
indexOutput = new File(output + ".bai")
}
/** Returns command to execute */
def cmdLine = required(executable) +
def cmdLine: String = required(executable) +
required("markdup") +
conditional(removeDuplicates, "--remove-duplicates") +
optional("-t", nCoresRequest) +
optional("-l", compressionLevel) +
conditional(showProgress, "--show-progress") +
optional("--hash-table-size=", hashTableSize, spaceSeparated = false) +
optional("--overflow-list-size=", overflowListSize, spaceSeparated = false) +
optional("--io-buffer-size=", ioBufferSize, spaceSeparated = false) +
......@@ -51,10 +62,11 @@ class SambambaMarkdup(val parent: Configurable) extends Sambamba {
}
object SambambaMarkdup {
def apply(root: Configurable, input: File, output: File): SambambaMarkdup = {
def apply(root: Configurable, input: File, output: File, isIntermediate: Boolean = false): SambambaMarkdup = {
val markdup = new SambambaMarkdup(root)
markdup.input = input
markdup.output = output
markdup.isIntermediate = isIntermediate
markdup
}
......
......@@ -21,7 +21,6 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/** Extension for sambemba merge */
class SambambaMerge(val parent: Configurable) extends Sambamba {
override def defaultThreads = 4
@Input(doc = "Bam File[s]")
var input: List[File] = Nil
......@@ -30,13 +29,29 @@ class SambambaMerge(val parent: Configurable) extends Sambamba {
var output: File = _
// @doc: compression_level 6 is average, 0 = no compression, 9 = best
val compressionLevel: Option[Int] = config("compression_level", default = 6)
val compressionLevel: Option[Int] = config("compression_level")
val header: Boolean = config("header", default = false)
val showProgress: Boolean = config("show-progress", default = true)
val filter: Option[String] = config("filter")
override def defaultThreads = 4
override def defaultCoreMemory = 4.0
@Output
private var indexOutput: File = _
override def beforeGraph(): Unit = {
indexOutput = new File(output + ".bai")
}
/** Returns command to execute */
def cmdLine = required(executable) +
def cmdLine: String = required(executable) +
required("merge") +
optional("-t", nCoresRequest) +
optional("-l", compressionLevel) +
optional("-F", filter) +
conditional(header, "--header") +
conditional(showProgress, "--show-progress") +
required(output) +
repeat("", input)
repeat(input)
}
......@@ -14,6 +14,7 @@ rownames(heat) <- heat[,1]
heat<- heat[,-1]
heat<- as.matrix(heat)
textMargin <- max(sapply(rownames(heat), nchar)) + 4
colNumber <- 50
col <- rev(colorRampPalette(brewer.pal(11, "Spectral"))(colNumber))
for (i in (colNumber+1):(colNumber+round((dist(range(heat)) - dist(range(heat[heat < 1]))) / dist(range(heat[heat < 1])) * colNumber))) {
......@@ -22,7 +23,7 @@ for (i in (colNumber+1):(colNumber+round((dist(range(heat)) - dist(range(heat[he
col[length(col)] <- "#00FF00"
png(file = outputArg, width = 1200, height = 1200)
heatmap.2(heat, trace = 'none', col = col, Colv=NA, Rowv=NA, dendrogram="none", margins = c(12, 12), na.color="#00FF00")
heatmap.2(heat, trace = 'none', col = col, Colv=NA, Rowv=NA, dendrogram="none", margins = c(textMargin, textMargin), na.color="#00FF00")
dev.off()
hc <- hclust(d = dist(heat))
......@@ -31,5 +32,5 @@ plot(as.dendrogram(hc), horiz=TRUE, asp=0.02)
dev.off()
png(file = outputArgClustering, width = 1200, height = 1200)
heatmap.2(heat, trace = 'none', col = col, Colv="Rowv", dendrogram="row",margins = c(12, 12), na.color="#00FF00")
heatmap.2(heat, trace = 'none', col = col, Colv="Rowv", dendrogram="row",margins = c(textMargin, textMargin), na.color="#00FF00")
dev.off()
......@@ -17,7 +17,6 @@ package nl.lumc.sasc.biopet.utils.intervals
import java.io.{ File, PrintWriter }
import htsjdk.samtools.SAMSequenceDictionary
import htsjdk.samtools.reference.FastaSequenceFile
import scala.collection.JavaConversions._
import scala.collection.mutable
......@@ -112,6 +111,15 @@ case class BedRecordList(val chrRecords: Map[String, List[BedRecord]], val heade
allRecords.foreach(writer.println)
writer.close()
}
/** This return the fraction of the regions comparing to a length */
def fractionOf(length: Long): Double = length.toDouble / length
/** This return the fraction of the regions comparing to a reference */
def fractionOfReference(dict: SAMSequenceDictionary): Double = fractionOf(dict.getReferenceLength)
/** This return the fraction of the regions comparing to a reference */
def fractionOfReference(file: File): Double = fractionOfReference(FastaUtils.getCachedDict(file))
}
object BedRecordList {
......@@ -131,6 +139,32 @@ object BedRecordList {
def fromList(records: TraversableOnce[BedRecord]): BedRecordList = fromListWithHeader(records, Nil)
/**
* This creates a [[BedRecordList]] based on multiple files. This method combines overlapping regions
*
* @param bedFiles Input bed files
* @return
*/
def fromFilesCombine(bedFiles: File*) = {
fromFiles(bedFiles, combine = true)
}
/**
* This creates a [[BedRecordList]] based on multiple files
*
* @param bedFiles Input bed files
* @param combine When true overlaping regions are merged
* @return
*/
def fromFiles(bedFiles: Seq[File], combine: Boolean = false) = {
val list = bedFiles.foldLeft(empty)((a, b) => fromList(fromFile(b).allRecords ++ a.allRecords))
if (combine) list.combineOverlap
else list
}
/** This created a empty [[BedRecordList]] */
def empty = fromList(Nil)
def fromFile(bedFile: File) = {
val reader = Source.fromFile(bedFile)
val all = reader.getLines().toList
......
......@@ -118,6 +118,11 @@ At this moment the following variant callers can be used
| shiva | correct_readgroups | Boolean | false | Attempt to correct read groups | Only used when input is a bam file |
| shiva | amplicon_bed | Path | | Path to target bed file | all |
| shiva | regions_of_interest | Array of paths | | Array of paths to region of interest (e.g. gene panels) bed files | all |
| shivavariantcalling | gender_aware_calling | Boolean | false | Enables gander aware variantcalling | haplotypecaller_gvcf |
| shivavariantcalling | hap̦loid_regions | Bed file | | Haploid regions for all genders | haplotypecaller_gvcf |
| shivavariantcalling | hap̦loid_regions_male | Bed file | | Haploid regions for males | haplotypecaller_gvcf |
| shivavariantcalling | hap̦loid_regions_female | Bed file | | Haploid regions for females | haplotypecaller_gvcf |
| shiva | amplicon_bed | Path | | Path to target bed file | all |
| vcffilter | min_sample_depth | Integer | 8 | Filter variants with at least x coverage | raw |
| vcffilter | min_alternate_depth | Integer | 2 | Filter variants with at least x depth on the alternate allele | raw |
| vcffilter | min_samples_pass | Integer | 1 | Minimum amount of samples which pass custom filter (requires additional flags) | raw |
......@@ -130,6 +135,14 @@ For all the options, please see the corresponding documentation for the mapping
## Advanced usage
### Gender aware variantcalling
In Shiva and ShivaVariantcalling while using haplotypecaller_gvcf it is possible to do gender aware variantcalling. In this mode it required to supply bed files to define haploid regions (see config values).
- For males, `hap̦loid_regions` and `hap̦loid_regions_male` is used.
- For females, `hap̦loid_regions` and `hap̦loid_regions_female` is used.
The pipeline will use a union of those files. At least 1 file is required while using this mode.
### Reporting modes
Shiva furthermore supports three modes. The default and recommended option is `multisample_variantcalling`.
......
Subproject commit 86ee6287b66562544e701f9ffa7cbbf0af9f3eba
Subproject commit 677d757419f515779766b10149b0e92eff71e5b2
......@@ -19,7 +19,7 @@ import java.io.File
import htsjdk.samtools.SamReaderFactory
import htsjdk.samtools.reference.FastaSequenceFile
import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension
import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference, MultiSampleQScript }
import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, PipelineCommand, Reference }
import nl.lumc.sasc.biopet.extensions.Ln
import nl.lumc.sasc.biopet.extensions.picard._
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
......@@ -28,8 +28,8 @@ import nl.lumc.sasc.biopet.pipelines.gears.GearsSingle
import nl.lumc.sasc.biopet.utils.Logging
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
import MultisampleMapping.MergeStrategy
import nl.lumc.sasc.biopet.extensions.sambamba.{ SambambaMarkdup, SambambaMerge }
import scala.collection.JavaConversions._
......@@ -125,7 +125,7 @@ trait MultisampleMappingTrait extends MultiSampleQScript
} else None
def bamFile: Option[File] = mapping match {
case Some(m) => Some(m.finalBamFile)
case Some(m) => Some(m.mergedBamFile)
case _ if inputBam.isDefined => Some(new File(libDir, s"$sampleId-$libId.bam"))
case _ => None
}
......@@ -247,9 +247,9 @@ trait MultisampleMappingTrait extends MultiSampleQScript
mergeStrategy match {
case MergeStrategy.None =>
case (MergeStrategy.MergeSam | MergeStrategy.MarkDuplicates) if libraries.flatMap(_._2.bamFile).size == 1 =>
case (MergeStrategy.MergeSam) if libraries.flatMap(_._2.bamFile).size == 1 =>
add(Ln.linkBamFile(qscript, libraries.flatMap(_._2.bamFile).head, bamFile.get): _*)
case (MergeStrategy.PreProcessMergeSam | MergeStrategy.PreProcessMarkDuplicates) if libraries.flatMap(_._2.preProcessBam).size == 1 =>
case (MergeStrategy.PreProcessMergeSam) if libraries.flatMap(_._2.preProcessBam).size == 1 =>
add(Ln.linkBamFile(qscript, libraries.flatMap(_._2.preProcessBam).head, bamFile.get): _*)
case MergeStrategy.MergeSam =>
add(MergeSamFiles(qscript, libraries.flatMap(_._2.bamFile).toList, bamFile.get, isIntermediate = !keepMergedFiles))
......@@ -259,6 +259,20 @@ trait MultisampleMappingTrait extends MultiSampleQScript
add(MarkDuplicates(qscript, libraries.flatMap(_._2.bamFile).toList, bamFile.get, isIntermediate = !keepMergedFiles))
case MergeStrategy.PreProcessMarkDuplicates =>
add(MarkDuplicates(qscript, libraries.flatMap(_._2.preProcessBam).toList, bamFile.get, isIntermediate = !keepMergedFiles))
case MergeStrategy.PreProcessSambambaMarkdup =>
val mergedBam = if (libraries.flatMap(_._2.bamFile).size == 1) {
add(Ln.linkBamFile(qscript, libraries.flatMap(_._2.preProcessBam).head, new File(sampleDir, "merged.bam")): _*)
libraries.flatMap(_._2.preProcessBam).head
} else {
val merge = new SambambaMerge(qscript)
merge.input = libraries.flatMap(_._2.preProcessBam).toList
merge.output = new File(sampleDir, "merged.bam")
merge.isIntermediate = true
add(merge)
merge.output
}
add(SambambaMarkdup(qscript, mergedBam, bamFile.get, isIntermediate = !keepMergedFiles))
add(Ln(qscript, bamFile.get + ".bai", bamFile.get.getAbsolutePath.stripSuffix(".bam") + ".bai"))
case _ => throw new IllegalStateException("This should not be possible, unimplemented MergeStrategy?")
}
......@@ -301,7 +315,7 @@ class MultisampleMapping(val parent: Configurable) extends QScript with Multisam
object MultisampleMapping extends PipelineCommand {
object MergeStrategy extends Enumeration {
val None, MergeSam, MarkDuplicates, PreProcessMergeSam, PreProcessMarkDuplicates = Value
val None, MergeSam, MarkDuplicates, PreProcessMergeSam, PreProcessMarkDuplicates, PreProcessSambambaMarkdup = Value
}
/** When file is not absolute an error is raise att the end of the script of a pipeline */
......
......@@ -20,6 +20,7 @@ import com.google.common.io.Files
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.extensions.centrifuge.Centrifuge
import nl.lumc.sasc.biopet.extensions.picard.{ MarkDuplicates, MergeSamFiles }
import nl.lumc.sasc.biopet.extensions.sambamba.SambambaMarkdup
import nl.lumc.sasc.biopet.utils.{ ConfigUtils, Logging }
import nl.lumc.sasc.biopet.utils.config.Config
import org.apache.commons.io.FileUtils
......@@ -91,16 +92,22 @@ trait MultisampleMappingTestTrait extends TestNGSuite with Matchers {
pipeline.script()
val numberFastqLibs = (if (sample1) 1 else 0) + (if (sample2) 2 else 0) + (if (sample3 && bamToFastq) 1 else 0) + (if (sample4 && bamToFastq) 1 else 0)
val numberSamples = (if (sample1) 1 else 0) + (if (sample2) 1 else 0)
val numberSamples = (if (sample1) 1 else 0) + (if (sample2) 1 else 0) + (if (sample3) 1 else 0) + (if (sample4) 1 else 0)
val pipesJobs = pipeline.functions.filter(_.isInstanceOf[BiopetCommandLineFunction])
.flatMap(_.asInstanceOf[BiopetCommandLineFunction].pipesJobs)
if (merge == MultisampleMapping.MergeStrategy.PreProcessMarkDuplicates) {
""
}
import MultisampleMapping.MergeStrategy
pipeline.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (numberFastqLibs +
(if (sample2 && (merge == MergeStrategy.MarkDuplicates || merge == MergeStrategy.PreProcessMarkDuplicates)) 1 else 0))
(if (merge == MergeStrategy.MarkDuplicates || merge == MergeStrategy.PreProcessMarkDuplicates) numberSamples else 0))
pipeline.functions.count(_.isInstanceOf[MergeSamFiles]) shouldBe (
(if (sample2 && (merge == MergeStrategy.MergeSam || merge == MergeStrategy.PreProcessMergeSam)) 1 else 0))
pipeline.functions.count(_.isInstanceOf[SambambaMarkdup]) shouldBe
(if (merge == MergeStrategy.PreProcessSambambaMarkdup) numberSamples else 0)
pipeline.samples.foreach {
case (sampleName, sample) =>
if (merge == MergeStrategy.None) sample.bamFile shouldBe None
......@@ -211,6 +218,7 @@ object MultisampleMappingTestTrait {
"sickle" -> Map("exe" -> "test"),
"cutadapt" -> Map("exe" -> "test"),
"bwa" -> Map("exe" -> "test"),
"sambamba" -> Map("exe" -> "test"),
"samtools" -> Map("exe" -> "test"),
"igvtools" -> Map("exe" -> "test", "igvtools_jar" -> "test"),
"wigtobigwig" -> Map("exe" -> "test"),
......@@ -232,7 +240,7 @@ object MultisampleMappingTestTrait {
)))
val sample2 = Map(
"samples" -> Map("sample3" -> Map("libraries" -> Map(
"samples" -> Map("sample2" -> Map("libraries" -> Map(
"lib1" -> Map(
"R1" -> inputTouch("2_1_R1.fq"),
"R2" -> inputTouch("2_1_R2.fq")
......
......@@ -60,11 +60,13 @@ class Shiva(val parent: Configurable) extends QScript with MultisampleMappingTra
override def namePrefix = "multisample"
override def configNamespace: String = "shivavariantcalling"
override def configPath: List[String] = super.configPath ::: "multisample" :: Nil
genders = samples.map { case (sampleName, sample) => sampleName -> sample.gender }
}
else new ShivaVariantcalling(qscript) {
override def configNamespace = "shivavariantcalling"
sampleId = sample
libId = library
genders = sample.map(x => x -> samples(x).gender).toMap
}
}
......@@ -103,8 +105,6 @@ class Shiva(val parent: Configurable) extends QScript with MultisampleMappingTra
override def keepFinalBamfile: Boolean = super.keepFinalBamfile && !useIndelRealigner && !useBaseRecalibration
override def bamFile: Option[Mapping#File] = mapping.map(_.mergedBamFile)
override def preProcessBam: Option[Mapping#File] = if (useIndelRealigner && usePrintReads && useBaseRecalibration)
bamFile.map(swapExt(libDir, _, ".bam", ".realign.baserecal.bam"))
else if (useIndelRealigner) bamFile.map(swapExt(libDir, _, ".bam", ".realign.bam"))
......@@ -187,7 +187,7 @@ class Shiva(val parent: Configurable) extends QScript with MultisampleMappingTra
Some(makeVariantcalling(multisample = false, sample = Some(sampleId)))
} else None
override def keepMergedFiles: Boolean = config("keep_merged_files", default = !useIndelRealigner)
override def keepMergedFiles: Boolean = config("keep_merged_files", default = !useIndelRealigner || (libraries.size == 1))
lazy val useIndelRealigner: Boolean = config("use_indel_realigner", default = true)
......
......@@ -14,7 +14,8 @@
*/
package nl.lumc.sasc.biopet.pipelines.shiva
import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference, SampleLibraryTag }
import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, PipelineCommand, Reference, SampleLibraryTag }
import MultiSampleQScript.Gender
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.extensions.Tabix
import nl.lumc.sasc.biopet.extensions.gatk.{ CombineVariants, GenotypeConcordance }
......@@ -48,9 +49,22 @@ class ShivaVariantcalling(val parent: Configurable) extends QScript
var inputBqsrFiles: Map[String, File] = Map()
var genders: Map[String, Gender.Value] = _
/** Executed before script */
def init(): Unit = {
if (inputBamsArg.nonEmpty) inputBams = BamUtils.sampleBamMap(inputBamsArg)
if (Option(genders).isEmpty) genders = {
val samples: Map[String, Any] = config("genders", default = Map())
samples.map {
case (sampleName, gender) =>
sampleName -> (gender.toString.toLowerCase match {
case "male" => Gender.Male
case "female" => Gender.Female
case _ => Gender.Unknown
})
}
}
}
var referenceVcf: Option[File] = config("reference_vcf")
......@@ -100,6 +114,7 @@ class ShivaVariantcalling(val parent: Configurable) extends QScript
caller.inputBqsrFiles = inputBqsrFiles
caller.namePrefix = namePrefix
caller.outputDir = new File(outputDir, caller.name)
caller.genders = genders
add(caller)
addStats(caller.outputFile, caller.name)
val normalize: Boolean = config("execute_vt_normalize", default = false, namespace = caller.configNamespace)
......
......@@ -19,8 +19,12 @@
*/
package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers
import nl.lumc.sasc.biopet.core.MultiSampleQScript.Gender
import nl.lumc.sasc.biopet.extensions.gatk
import nl.lumc.sasc.biopet.extensions.gatk.CombineGVCFs
import nl.lumc.sasc.biopet.utils.Logging
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.utils.intervals.BedRecordList
/** Gvcf mode for haplotypecaller */
class HaplotypeCallerGvcf(val parent: Configurable) extends Variantcaller {
......@@ -34,14 +38,76 @@ class HaplotypeCallerGvcf(val parent: Configurable) extends Variantcaller {
def getGvcfs = gVcfFiles
val genderAwareCalling: Boolean = config("gender_aware_calling", default = false)
val haploidRegions: Option[File] = config("hap̦loid_regions")
val haploidRegionsMale: Option[File] = config("haploid_regions_male")
val haploidRegionsFemale: Option[File] = config("haploid_regions_female")
lazy val fractionMale: Double = BedRecordList.fromFiles(Seq(haploidRegions, haploidRegionsMale).flatten, true).fractionOfReference(referenceDict)
lazy val fractionFemale: Double = BedRecordList.fromFiles(Seq(haploidRegions, haploidRegionsFemale).flatten, true).fractionOfReference(referenceDict)
lazy val fractionUnknown: Double = BedRecordList.fromFiles(Seq(haploidRegions).flatten, true).fractionOfReference(referenceDict)
override def fixedValues = Map("haplotypecaller" -> Map("emitRefConfidence" -> "GVCF"))
override def defaults = Map("haplotypecaller" -> Map(
"variant_index_type" -> "LINEAR",
"variant_index_parameter" -> 128000)
)
override def init(): Unit = {
super.init()
if (genderAwareCalling && haploidRegions.isEmpty && haploidRegionsMale.isEmpty && haploidRegionsFemale.isEmpty)
Logging.addError("Gender aware variantcalling is enabled but no haploid bed files are given")
}
def biopetScript() {
gVcfFiles = for ((sample, inputBam) <- inputBams) yield {
val hc = gatk.HaplotypeCaller(this, List(inputBam), new File(outputDir, sample + ".gvcf.vcf.gz"))
hc.BQSR = inputBqsrFiles.get(sample)
add(hc)
sample -> hc.out
if (genderAwareCalling) {
val finalFile = new File(outputDir, sample + ".gvcf.vcf.gz")
val gender = genders.getOrElse(sample, Gender.Unknown)
val haploidBedFiles: List[File] = gender match {
case Gender.Female => haploidRegions.toList ::: haploidRegionsFemale.toList ::: Nil
case Gender.Male => haploidRegions.toList ::: haploidRegionsMale.toList ::: Nil
case _ => haploidRegions.toList
}
val fraction: Double = gender match {
case Gender.Female => fractionFemale
case Gender.Male => fractionMale
case _ => fractionUnknown
}
val haploidGvcf = if (haploidBedFiles.nonEmpty) {
val hc = gatk.HaplotypeCaller(this, List(inputBam), new File(outputDir, sample + ".haploid.gvcf.vcf.gz"))
hc.BQSR = inputBqsrFiles.get(sample)
hc.intervals = haploidBedFiles
hc.scatterCount = (hc.scatterCount * fraction).toInt
hc.sample_ploidy = Some(1)
add(hc)
Some(hc.out)
} else None
val hcDiploid = gatk.HaplotypeCaller(this, List(inputBam), new File(outputDir, sample + ".diploid.gvcf.vcf.gz"))
hcDiploid.BQSR = inputBqsrFiles.get(sample)
hcDiploid.excludeIntervals = haploidBedFiles
hcDiploid.scatterCount = (hcDiploid.scatterCount * (1 - fraction)).toInt
add(hcDiploid)
haploidGvcf match {
case Some(file) =>
val combine = new CombineGVCFs(this)
combine.variant = Seq(hcDiploid.out, file)
combine.out = new File(outputDir, sample + ".gvcf.vcf.gz")
add(combine)
sample -> combine.out
case _ => sample -> hcDiploid.out
}
} else {
val hc = gatk.HaplotypeCaller(this, List(inputBam), new File(outputDir, sample + ".gvcf.vcf.gz"))
hc.BQSR = inputBqsrFiles.get(sample)
add(hc)
sample -> hc.out
}
}
val genotypeGVCFs = gatk.GenotypeGVCFs(this, gVcfFiles.values.toList, outputFile)
......
......@@ -14,6 +14,7 @@
*/
package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers
import nl.lumc.sasc.biopet.core.MultiSampleQScript.Gender
import nl.lumc.sasc.biopet.core.{ BiopetQScript, Reference }
import org.broadinstitute.gatk.queue.QScript
......@@ -27,6 +28,8 @@ trait Variantcaller extends QScript with BiopetQScript with Reference {
var namePrefix: String = _
var genders: Map[String, Gender.Value] = _
val mergeVcfResults: Boolean = config("merge_vcf_results", default = true)
/**
......
......@@ -62,6 +62,7 @@ trait ShivaTestTrait extends TestNGSuite with Matchers {
def svCalling = false
def cnvCalling = false
def annotation = false
def usePrintReads = true
private var dirs: List[File] = Nil
......@@ -83,7 +84,8 @@ trait ShivaTestTrait extends TestNGSuite with Matchers {
"use_base_recalibration" -> baseRecalibration,
"sv_calling" -> svCalling,
"cnv_calling" -> cnvCalling,
"annotation" -> annotation), m)
"annotation" -> annotation,
"use_printreads" -> usePrintReads), m)
}
......@@ -99,13 +101,13 @@ trait ShivaTestTrait extends TestNGSuite with Matchers {
val numberLibs = (if (sample1) 1 else 0) + (if (sample2) 2 else 0)
val numberSamples = (if (sample1) 1 else 0) + (if (sample2) 1 else 0)
pipeline.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (numberLibs + (if (sample2) 1 else 0))
pipeline.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (numberLibs + numberSamples)
// Gatk preprocess
pipeline.functions.count(_.isInstanceOf[IndelRealigner]) shouldBe (numberLibs * (if (realign) 1 else 0) + (if (sample2 && realign) 1 else 0))
pipeline.functions.count(_.isInstanceOf[RealignerTargetCreator]) shouldBe (numberLibs * (if (realign) 1 else 0) + (if (sample2 && realign) 1 else 0))
pipeline.functions.count(_.isInstanceOf[BaseRecalibrator]) shouldBe (if (dbsnp && baseRecalibration) (numberLibs * 2) else 0)
pipeline.functions.count(_.isInstanceOf[PrintReads]) shouldBe (if (dbsnp && baseRecalibration) numberLibs else 0)
pipeline.functions.count(_.isInstanceOf[PrintReads]) shouldBe (if (dbsnp && baseRecalibration && usePrintReads) numberLibs else 0)
pipeline.summarySettings.get("annotation") shouldBe Some(annotation)
pipeline.summarySettings.get("sv_calling") shouldBe Some(svCalling)
......@@ -143,6 +145,10 @@ class ShivaNoDbsnpTest extends ShivaTestTrait {
override def realignProvider = Array(true)
override def dbsnp = false
}
class ShivaNoPrintReadsTest extends ShivaTestTrait {
override def realignProvider = Array(false)
override def usePrintReads = false
}
class ShivaLibraryCallingTest extends ShivaTestTrait {
override def sample1 = Array(true, false)
override def sample2 = Array(false, true)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment