Commit dc1777d2 authored by Peter van 't Hof's avatar Peter van 't Hof Committed by GitHub

Merge branch 'develop' into fix-BIOPET-655

parents 490fb781 b031a6e7
......@@ -98,7 +98,7 @@ class BastyTest extends TestNGSuite with Matchers {
val numberLibs = (if (sample1) 1 else 0) + (if (sample2) 2 else 0)
val numberSamples = (if (sample1) 1 else 0) + (if (sample2) 1 else 0)
pipeline.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (numberLibs + (if (sample2) 1 else 0))
pipeline.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (numberLibs + numberSamples)
// Gatk preprocess
pipeline.functions.count(_.isInstanceOf[IndelRealigner]) shouldBe (numberLibs * (if (realign) 1 else 0) + (if (sample2 && realign) 1 else 0))
......
......@@ -56,6 +56,8 @@ class HaplotypeCaller(val parent: Configurable) extends CommandLineGATK with Sca
@Argument(fullName = "maxGGAARExtension", shortName = "maxGGAARExtension", doc = "the maximum extent into the full active region extension that we're willing to go in genotyping our events for GGA mode", required = false, exclusiveOf = "", validation = "")
var maxGGAARExtension: Option[Int] = config("maxGGAARExtension")
var useNewAFCalculator: Boolean = config("useNewAFCalculator", default = false)
/** Include at least this many bases around an event for calling indels */
@Argument(fullName = "paddingAroundIndels", shortName = "paddingAroundIndels", doc = "Include at least this many bases around an event for calling indels", required = false, exclusiveOf = "", validation = "")
var paddingAroundIndels: Option[Int] = config("paddingAroundIndels")
......@@ -441,6 +443,7 @@ class HaplotypeCaller(val parent: Configurable) extends CommandLineGATK with Sca
optional("-paddingAroundSNPs", paddingAroundSNPs, spaceSeparated = true, escape = true, format = "%s") +
repeat("-comp", comp, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") +
repeat("-A", annotation, spaceSeparated = true, escape = true, format = "%s") +
conditional(useNewAFCalculator, "--useNewAFCalculator") +
repeat("-XA", excludeAnnotation, spaceSeparated = true, escape = true, format = "%s") +
repeat("-G", group, spaceSeparated = true, escape = true, format = "%s") +
conditional(debug, "-debug", escape = true, format = "%s") +
......
......@@ -25,6 +25,6 @@ abstract class Sambamba extends BiopetCommandLineFunction with Version {
executable = config("exe", default = "sambamba", namespace = "sambamba", freeVar = false)
def versionCommand = executable
def versionRegex = """sambamba v(.*)""".r
def versionRegex = """sambamba v?(.*)""".r
override def versionExitcode = List(0, 1)
}
\ No newline at end of file
......@@ -21,7 +21,6 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/** Extension for sambemba markdup */
class SambambaMarkdup(val parent: Configurable) extends Sambamba {
override def defaultThreads = 4
@Input(doc = "Bam File")
var input: File = _
......@@ -32,17 +31,29 @@ class SambambaMarkdup(val parent: Configurable) extends Sambamba {
var removeDuplicates: Boolean = config("remove_duplicates", default = false)
// @doc: compression_level 6 is average, 0 = no compression, 9 = best
val compressionLevel: Option[Int] = config("compression_level", default = 6)
val hashTableSize: Option[Int] = config("hash-table-size", default = 262144)
val overflowListSize: Option[Int] = config("overflow-list-size", default = 200000)
val ioBufferSize: Option[Int] = config("io-buffer-size", default = 128)
val compressionLevel: Option[Int] = config("compression_level")
val hashTableSize: Option[Int] = config("hash-table-size")
val overflowListSize: Option[Int] = config("overflow-list-size")
val ioBufferSize: Option[Int] = config("io-buffer-size")
val showProgress: Boolean = config("show-progress", default = true)
override def defaultThreads = 4
override def defaultCoreMemory = 4.0
@Output
private var indexOutput: File = _
override def beforeGraph(): Unit = {
indexOutput = new File(output + ".bai")
}
/** Returns command to execute */
def cmdLine = required(executable) +
def cmdLine: String = required(executable) +
required("markdup") +
conditional(removeDuplicates, "--remove-duplicates") +
optional("-t", nCoresRequest) +
optional("-l", compressionLevel) +
conditional(showProgress, "--show-progress") +
optional("--hash-table-size=", hashTableSize, spaceSeparated = false) +
optional("--overflow-list-size=", overflowListSize, spaceSeparated = false) +
optional("--io-buffer-size=", ioBufferSize, spaceSeparated = false) +
......@@ -51,10 +62,11 @@ class SambambaMarkdup(val parent: Configurable) extends Sambamba {
}
object SambambaMarkdup {
def apply(root: Configurable, input: File, output: File): SambambaMarkdup = {
def apply(root: Configurable, input: File, output: File, isIntermediate: Boolean = false): SambambaMarkdup = {
val markdup = new SambambaMarkdup(root)
markdup.input = input
markdup.output = output
markdup.isIntermediate = isIntermediate
markdup
}
......
......@@ -21,7 +21,6 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/** Extension for sambemba merge */
class SambambaMerge(val parent: Configurable) extends Sambamba {
override def defaultThreads = 4
@Input(doc = "Bam File[s]")
var input: List[File] = Nil
......@@ -30,13 +29,29 @@ class SambambaMerge(val parent: Configurable) extends Sambamba {
var output: File = _
// @doc: compression_level 6 is average, 0 = no compression, 9 = best
val compressionLevel: Option[Int] = config("compression_level", default = 6)
val compressionLevel: Option[Int] = config("compression_level")
val header: Boolean = config("header", default = false)
val showProgress: Boolean = config("show-progress", default = true)
val filter: Option[String] = config("filter")
override def defaultThreads = 4
override def defaultCoreMemory = 4.0
@Output
private var indexOutput: File = _
override def beforeGraph(): Unit = {
indexOutput = new File(output + ".bai")
}
/** Returns command to execute */
def cmdLine = required(executable) +
def cmdLine: String = required(executable) +
required("merge") +
optional("-t", nCoresRequest) +
optional("-l", compressionLevel) +
optional("-F", filter) +
conditional(header, "--header") +
conditional(showProgress, "--show-progress") +
required(output) +
repeat("", input)
repeat(input)
}
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.tools
import java.io.File
import nl.lumc.sasc.biopet.core.summary.Summarizable
import nl.lumc.sasc.biopet.core.ToolCommandFunction
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.utils.ConfigUtils
import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output }
class VcfStatsForSv(val parent: Configurable) extends ToolCommandFunction with Summarizable {
def toolObject = nl.lumc.sasc.biopet.tools.vcfstats.VcfStatsForSv
mainFunction = false
@Input(required = true)
var inputFile: File = _
@Argument(required = true)
var histogramBinBoundaries: Array[Int] = _
@Output(required = true)
var outputFile: File = _
override def defaultCoreMemory = 1.0
override def cmdLine = super.cmdLine +
required("-i", inputFile) +
required("-o", outputFile) +
repeat("--histBinBoundaries", histogramBinBoundaries)
def summaryStats: Map[String, Any] = ConfigUtils.fileToConfigMap(outputFile)
def summaryFiles: Map[String, File] = Map.empty
}
package nl.lumc.sasc.biopet.tools.vcfstats
import java.io.File
import htsjdk.variant.vcf.VCFFileReader
import nl.lumc.sasc.biopet.utils.{ ConfigUtils, ToolCommand }
import scala.collection.JavaConversions._
object VcfStatsForSv extends ToolCommand {
/** Commandline arguments */
case class Args(inputFile: File = null, outputFile: File = null, histBinBoundaries: Array[Int] = Array()) extends AbstractArgs
/** Parsing commandline arguments */
class OptParser extends AbstractOptParser {
opt[File]('i', "inputFile") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
c.copy(inputFile = x)
} validate {
x => if (x.exists) success else failure("Input VCF required")
} text "Input VCF file (required)"
opt[File]('o', "outputFile") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
c.copy(outputFile = x)
} text "Output file (required)"
opt[Int]("histBinBoundaries") required () unbounded () action { (x, c) =>
c.copy(histBinBoundaries = c.histBinBoundaries :+ x)
} text "When counting the records, sv-s are divided to different size classes, this parameter should give the boundaries between these classes."
}
protected var cmdArgs: Args = _
def main(args: Array[String]): Unit = {
val argsParser = new OptParser
cmdArgs = argsParser.parse(args, Args()) getOrElse (throw new IllegalArgumentException)
logger.info(s"Parsing file: ${cmdArgs.inputFile}")
val stats: Map[String, Any] = getVariantCounts(cmdArgs.inputFile, cmdArgs.histBinBoundaries)
ConfigUtils.mapToYamlFile(stats, cmdArgs.outputFile)
}
/** Parses a vcf-file and counts sv-s by type and size. Sv-s are divided to different size classes, the parameter histogramBinBoundaries gives the boundaries between these classes. */
def getVariantCounts(vcfFile: File, histogramBinBoundaries: Array[Int]): Map[String, Any] = {
val delCounts, insCounts, dupCounts, invCounts = Array.fill(histogramBinBoundaries.size + 1) { 0 }
var traCount = 0
val reader = new VCFFileReader(vcfFile, false)
for (record <- reader) {
record.getAttributeAsString("SVTYPE", "") match {
case "TRA" | "CTX" | "ITX" => traCount += 1
case svType => {
val size = record.getEnd - record.getStart
var i = 0
while (i < histogramBinBoundaries.size && size > histogramBinBoundaries(i)) i += 1
svType match {
case "DEL" => delCounts(i) += 1
case "INS" => insCounts(i) += 1
case "DUP" => dupCounts(i) += 1
case "INV" => invCounts(i) += 1
case _ => logger.warn(s"Vcf file contains a record of unknown type: file-$vcfFile, type-$svType")
}
}
}
}
reader.close()
Map("DEL" -> delCounts, "INS" -> insCounts, "DUP" -> dupCounts, "INV" -> invCounts, "TRA" -> traCount)
}
}
......@@ -13,6 +13,11 @@ parser$add_argument('--llabel', dest='llabel', type='character')
parser$add_argument('--title', dest='title', type='character')
parser$add_argument('--hideLegend', dest='hideLegend', type='character', default="false")
parser$add_argument('--removeZero', dest='removeZero', type='character', default="false")
parser$add_argument('--xLog10', dest='xLog10', type='character', default="false")
parser$add_argument('--yLog10', dest='yLog10', type='character', default="false")
parser$add_argument('--xLog10Breaks', dest='xLog10Breaks', nargs='+', type='integer')
parser$add_argument('--xLog10Labels', dest='xLog10Labels', nargs='+', type='character')
arguments <- parser$parse_args()
......@@ -31,14 +36,29 @@ DF1 <- melt(DF, id.var="Rank")
if (arguments$removeZero == "true") DF1 <- DF1[DF1$value > 0, ]
if (arguments$removeZero == "true") print("Removed 0 values")
ggplot(DF1, aes(x = Rank, y = value, group = variable, color = variable)) +
plot = ggplot(DF1, aes(x = Rank, y = value, group = variable, color = variable)) +
xlab(xlab) +
ylab(arguments$ylabel) +
guides(fill=guide_legend(title=arguments$llabel)) +
guides(color=guide_legend(title=arguments$llabel)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 8)) +
ggtitle(arguments$title) +
theme_bw() +
theme(legend.position = legendPosition) +
geom_line()
if (arguments$xLog10 == "true") {
if (!is.null(arguments$xLog10Labels)) {
scale_x <- scale_x_log10(breaks = arguments$xLog10Breaks, labels=arguments$xLog10Labels)
} else {
scale_x <- scale_x_log10()
}
plot <- plot + scale_x
}
if (arguments$yLog10 == "true") {
plot <- plot + scale_y_log10()
}
plot
dev.off()
......@@ -17,7 +17,6 @@ package nl.lumc.sasc.biopet.utils.intervals
import java.io.{ File, PrintWriter }
import htsjdk.samtools.SAMSequenceDictionary
import htsjdk.samtools.reference.FastaSequenceFile
import scala.collection.JavaConversions._
import scala.collection.mutable
......@@ -112,6 +111,15 @@ case class BedRecordList(val chrRecords: Map[String, List[BedRecord]], val heade
allRecords.foreach(writer.println)
writer.close()
}
/** This return the fraction of the regions comparing to a length */
def fractionOf(length: Long): Double = length.toDouble / length
/** This return the fraction of the regions comparing to a reference */
def fractionOfReference(dict: SAMSequenceDictionary): Double = fractionOf(dict.getReferenceLength)
/** This return the fraction of the regions comparing to a reference */
def fractionOfReference(file: File): Double = fractionOfReference(FastaUtils.getCachedDict(file))
}
object BedRecordList {
......@@ -131,6 +139,32 @@ object BedRecordList {
def fromList(records: TraversableOnce[BedRecord]): BedRecordList = fromListWithHeader(records, Nil)
/**
* This creates a [[BedRecordList]] based on multiple files. This method combines overlapping regions
*
* @param bedFiles Input bed files
* @return
*/
def fromFilesCombine(bedFiles: File*) = {
fromFiles(bedFiles, combine = true)
}
/**
* This creates a [[BedRecordList]] based on multiple files
*
* @param bedFiles Input bed files
* @param combine When true overlaping regions are merged
* @return
*/
def fromFiles(bedFiles: Seq[File], combine: Boolean = false) = {
val list = bedFiles.foldLeft(empty)((a, b) => fromList(fromFile(b).allRecords ++ a.allRecords))
if (combine) list.combineOverlap
else list
}
/** This created a empty [[BedRecordList]] */
def empty = fromList(Nil)
def fromFile(bedFile: File) = {
val reader = Source.fromFile(bedFile)
val all = reader.getLines().toList
......
......@@ -39,6 +39,13 @@ class LinePlot(val parent: Configurable) extends Rscript {
var hideLegend: Boolean = config("hide_legend", default = false)
var removeZero: Boolean = config("removeZero", default = false)
// whether to use log scale for x and y axis
var xLog10: Boolean = false
var yLog10: Boolean = false
var xLog10AxisTicks: Seq[String] = Seq.empty
var xLog10AxisLabels: Seq[String] = Seq.empty
override def cmd = super.cmd ++
Seq("--input", input.getAbsolutePath) ++
Seq("--output", output.getAbsolutePath) ++
......@@ -49,7 +56,11 @@ class LinePlot(val parent: Configurable) extends Rscript {
llabel.map(Seq("--llabel", _)).getOrElse(Seq()) ++
title.map(Seq("--title", _)).getOrElse(Seq()) ++
(if (hideLegend) Seq("--hideLegend", "true") else Seq()) ++
(if (removeZero) Seq("--removeZero", "true") else Seq())
(if (removeZero) Seq("--removeZero", "true") else Seq()) ++
(if (xLog10) Seq("--xLog10", "true") else Seq()) ++
(if (yLog10) Seq("--yLog10", "true") else Seq()) ++
(if (xLog10AxisTicks.nonEmpty) xLog10AxisTicks.+:("--xLog10Breaks") else Seq()) ++
(if (xLog10AxisLabels.nonEmpty) xLog10AxisLabels.+:("--xLog10Labels") else Seq())
}
object LinePlot {
......
......@@ -118,6 +118,11 @@ At this moment the following variant callers can be used
| shiva | correct_readgroups | Boolean | false | Attempt to correct read groups | Only used when input is a bam file |
| shiva | amplicon_bed | Path | | Path to target bed file | all |
| shiva | regions_of_interest | Array of paths | | Array of paths to region of interest (e.g. gene panels) bed files | all |
| shivavariantcalling | gender_aware_calling | Boolean | false | Enables gander aware variantcalling | haplotypecaller_gvcf |
| shivavariantcalling | hap̦loid_regions | Bed file | | Haploid regions for all genders | haplotypecaller_gvcf |
| shivavariantcalling | hap̦loid_regions_male | Bed file | | Haploid regions for males | haplotypecaller_gvcf |
| shivavariantcalling | hap̦loid_regions_female | Bed file | | Haploid regions for females | haplotypecaller_gvcf |
| shiva | amplicon_bed | Path | | Path to target bed file | all |
| vcffilter | min_sample_depth | Integer | 8 | Filter variants with at least x coverage | raw |
| vcffilter | min_alternate_depth | Integer | 2 | Filter variants with at least x depth on the alternate allele | raw |
| vcffilter | min_samples_pass | Integer | 1 | Minimum amount of samples which pass custom filter (requires additional flags) | raw |
......@@ -130,6 +135,14 @@ For all the options, please see the corresponding documentation for the mapping
## Advanced usage
### Gender aware variantcalling
In Shiva and ShivaVariantcalling while using haplotypecaller_gvcf it is possible to do gender aware variantcalling. In this mode it required to supply bed files to define haploid regions (see config values).
- For males, `hap̦loid_regions` and `hap̦loid_regions_male` is used.
- For females, `hap̦loid_regions` and `hap̦loid_regions_female` is used.
The pipeline will use a union of those files. At least 1 file is required while using this mode.
### Reporting modes
Shiva furthermore supports three modes. The default and recommended option is `multisample_variantcalling`.
......
......@@ -19,7 +19,7 @@ import java.io.File
import htsjdk.samtools.SamReaderFactory
import htsjdk.samtools.reference.FastaSequenceFile
import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension
import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference, MultiSampleQScript }
import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, PipelineCommand, Reference }
import nl.lumc.sasc.biopet.extensions.Ln
import nl.lumc.sasc.biopet.extensions.picard._
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
......@@ -28,8 +28,8 @@ import nl.lumc.sasc.biopet.pipelines.gears.GearsSingle
import nl.lumc.sasc.biopet.utils.Logging
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
import MultisampleMapping.MergeStrategy
import nl.lumc.sasc.biopet.extensions.sambamba.{ SambambaMarkdup, SambambaMerge }
import scala.collection.JavaConversions._
......@@ -125,7 +125,7 @@ trait MultisampleMappingTrait extends MultiSampleQScript
} else None
def bamFile: Option[File] = mapping match {
case Some(m) => Some(m.finalBamFile)
case Some(m) => Some(m.mergedBamFile)
case _ if inputBam.isDefined => Some(new File(libDir, s"$sampleId-$libId.bam"))
case _ => None
}
......@@ -247,9 +247,9 @@ trait MultisampleMappingTrait extends MultiSampleQScript
mergeStrategy match {
case MergeStrategy.None =>
case (MergeStrategy.MergeSam | MergeStrategy.MarkDuplicates) if libraries.flatMap(_._2.bamFile).size == 1 =>
case (MergeStrategy.MergeSam) if libraries.flatMap(_._2.bamFile).size == 1 =>
add(Ln.linkBamFile(qscript, libraries.flatMap(_._2.bamFile).head, bamFile.get): _*)
case (MergeStrategy.PreProcessMergeSam | MergeStrategy.PreProcessMarkDuplicates) if libraries.flatMap(_._2.preProcessBam).size == 1 =>
case (MergeStrategy.PreProcessMergeSam) if libraries.flatMap(_._2.preProcessBam).size == 1 =>
add(Ln.linkBamFile(qscript, libraries.flatMap(_._2.preProcessBam).head, bamFile.get): _*)
case MergeStrategy.MergeSam =>
add(MergeSamFiles(qscript, libraries.flatMap(_._2.bamFile).toList, bamFile.get, isIntermediate = !keepMergedFiles))
......@@ -259,6 +259,20 @@ trait MultisampleMappingTrait extends MultiSampleQScript
add(MarkDuplicates(qscript, libraries.flatMap(_._2.bamFile).toList, bamFile.get, isIntermediate = !keepMergedFiles))
case MergeStrategy.PreProcessMarkDuplicates =>
add(MarkDuplicates(qscript, libraries.flatMap(_._2.preProcessBam).toList, bamFile.get, isIntermediate = !keepMergedFiles))
case MergeStrategy.PreProcessSambambaMarkdup =>
val mergedBam = if (libraries.flatMap(_._2.bamFile).size == 1) {
add(Ln.linkBamFile(qscript, libraries.flatMap(_._2.preProcessBam).head, new File(sampleDir, "merged.bam")): _*)
libraries.flatMap(_._2.preProcessBam).head
} else {
val merge = new SambambaMerge(qscript)
merge.input = libraries.flatMap(_._2.preProcessBam).toList
merge.output = new File(sampleDir, "merged.bam")
merge.isIntermediate = true
add(merge)
merge.output
}
add(SambambaMarkdup(qscript, mergedBam, bamFile.get, isIntermediate = !keepMergedFiles))
add(Ln(qscript, bamFile.get + ".bai", bamFile.get.getAbsolutePath.stripSuffix(".bam") + ".bai"))
case _ => throw new IllegalStateException("This should not be possible, unimplemented MergeStrategy?")
}
......@@ -301,7 +315,7 @@ class MultisampleMapping(val parent: Configurable) extends QScript with Multisam
object MultisampleMapping extends PipelineCommand {
object MergeStrategy extends Enumeration {
val None, MergeSam, MarkDuplicates, PreProcessMergeSam, PreProcessMarkDuplicates = Value
val None, MergeSam, MarkDuplicates, PreProcessMergeSam, PreProcessMarkDuplicates, PreProcessSambambaMarkdup = Value
}
/** When file is not absolute an error is raise att the end of the script of a pipeline */
......
......@@ -20,6 +20,7 @@ import com.google.common.io.Files
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.extensions.centrifuge.Centrifuge
import nl.lumc.sasc.biopet.extensions.picard.{ MarkDuplicates, MergeSamFiles }
import nl.lumc.sasc.biopet.extensions.sambamba.SambambaMarkdup
import nl.lumc.sasc.biopet.utils.{ ConfigUtils, Logging }
import nl.lumc.sasc.biopet.utils.config.Config
import org.apache.commons.io.FileUtils
......@@ -91,16 +92,22 @@ trait MultisampleMappingTestTrait extends TestNGSuite with Matchers {
pipeline.script()
val numberFastqLibs = (if (sample1) 1 else 0) + (if (sample2) 2 else 0) + (if (sample3 && bamToFastq) 1 else 0) + (if (sample4 && bamToFastq) 1 else 0)
val numberSamples = (if (sample1) 1 else 0) + (if (sample2) 1 else 0)
val numberSamples = (if (sample1) 1 else 0) + (if (sample2) 1 else 0) + (if (sample3) 1 else 0) + (if (sample4) 1 else 0)
val pipesJobs = pipeline.functions.filter(_.isInstanceOf[BiopetCommandLineFunction])
.flatMap(_.asInstanceOf[BiopetCommandLineFunction].pipesJobs)
if (merge == MultisampleMapping.MergeStrategy.PreProcessMarkDuplicates) {
""
}
import MultisampleMapping.MergeStrategy
pipeline.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (numberFastqLibs +
(if (sample2 && (merge == MergeStrategy.MarkDuplicates || merge == MergeStrategy.PreProcessMarkDuplicates)) 1 else 0))
(if (merge == MergeStrategy.MarkDuplicates || merge == MergeStrategy.PreProcessMarkDuplicates) numberSamples else 0))
pipeline.functions.count(_.isInstanceOf[MergeSamFiles]) shouldBe (
(if (sample2 && (merge == MergeStrategy.MergeSam || merge == MergeStrategy.PreProcessMergeSam)) 1 else 0))
pipeline.functions.count(_.isInstanceOf[SambambaMarkdup]) shouldBe
(if (merge == MergeStrategy.PreProcessSambambaMarkdup) numberSamples else 0)
pipeline.samples.foreach {
case (sampleName, sample) =>
if (merge == MergeStrategy.None) sample.bamFile shouldBe None
......@@ -211,6 +218,7 @@ object MultisampleMappingTestTrait {
"sickle" -> Map("exe" -> "test"),
"cutadapt" -> Map("exe" -> "test"),
"bwa" -> Map("exe" -> "test"),
"sambamba" -> Map("exe" -> "test"),
"samtools" -> Map("exe" -> "test"),
"igvtools" -> Map("exe" -> "test", "igvtools_jar" -> "test"),
"wigtobigwig" -> Map("exe" -> "test"),
......@@ -232,7 +240,7 @@ object MultisampleMappingTestTrait {
)))
val sample2 = Map(
"samples" -> Map("sample3" -> Map("libraries" -> Map(
"samples" -> Map("sample2" -> Map("libraries" -> Map(
"lib1" -> Map(
"R1" -> inputTouch("2_1_R1.fq"),
"R2" -> inputTouch("2_1_R2.fq")
......
#import(nl.lumc.sasc.biopet.utils.summary.db.SummaryDb)
#import(nl.lumc.sasc.biopet.utils.summary.db.Schema.Sample)
#import(java.io.File)
<%@ var summary: SummaryDb %>
<%@ var allSamples: Seq[Sample]%>
<%@ var rootPath: String %>
<%@ var outputDir: File %>
<%@ var runId: Int %>
<%@ var sampleId: Option[Int] = None %>
<%@ var showPlot: Boolean = false %>
<%@ var showTable: Boolean = true %>
<%@ var showIntro: Boolean = true %>
#{
val sampleNames: Seq[String] = sampleId match {
case Some(sampleId) => Seq(allSamples.filter(s => s.id == sampleId).head.name)
case _ => allSamples.collect({case s: Sample => s.name}).sorted
}
val counts: Map[String, Map[String, Array[Long]]] = ShivaSvCallingReport.parseSummaryForSvCounts(summary, runId, sampleNames)
val traCounts: Map[String, Long] = ShivaSvCallingReport.parseSummaryForTranslocations(summary, runId, sampleNames)
var svTypes = List(
SvTypeForReport("DEL", "Deletions", "svSummaryDeletions.tsv", "svSummaryDeletions.png"),
SvTypeForReport("DUP", "Duplications", "svSummaryDuplications.tsv", "svSummaryDuplications.png"),
SvTypeForReport("INS", "Insertions", "svSummaryInsertions.tsv", "svSummaryInsertions.png"),
SvTypeForReport("INV", "Inversions", "svSummaryInversions.tsv", "svSummaryInversions.png"))
svTypes = svTypes.filter(sv => counts.contains(sv.svType))
val tsvAllTypes = "svSummary.tsv"
ShivaSvCallingReport.writeTsvFiles(sampleNames, counts, svTypes, tsvAllTypes, outputDir)
ShivaSvCallingReport.createPlots(svTypes, outputDir)
}#
#if (showPlot)
<div class="panel-body">
#for (sv <- svTypes)
<img src="${sv.pngFileName}" />
#end
</div>
#if (traCounts.nonEmpty)
<div class="panel-body">
<h5>Number of translocation events detected:</h5>
<table class="table table-condensed" style="width:auto">
<thead><tr>
#for (sampleName <- sampleNames)
<th>${sampleName}</th>
#end
</tr></thead>
<tbody><tr>
#for (sampleName <- sampleNames)
#{
val sampleCount: String = traCounts.get(sampleName) match {
case Some(c) => c.toString()
case None => "-"
}
}#
<td>${sampleCount}</td>
#end
</tr></tbody>
</table>
</div>
#end
<div class="panel-footer">
<button type="button" class="btn btn-info" data-toggle="collapse" data-target="#svSummaryTable">
#if (showTable)
<i class="glyphicon glyphicon-eye-close"></i> Hide tables
#else
<i class="glyphicon glyphicon-eye-open"></i> Show tables
#end
</button>
<a href="${tsvAllTypes}"><button type="button" class="btn btn-info"><i class="glyphicon glyphicon-cloud-download"></i> TSV file</button></a>
</div>
#end
<div class="panel-body collapse #if (showTable)in#end" id="svSummaryTable">
#for (sv <- svTypes)
#{
val countsForSvType: Map[String, Array[Long]] = counts(sv.svType)
val missingCounts: Array[String] = Array.fill(ShivaSvCallingReport.histogramText.size) { "-" }
}#
<h3>${sv.displayText}</h3>
<table class="table sortable-theme-bootstrap" data-sortable>
<thead><tr><th data-sorted="true" data-sorted-direction="ascending">Sample</th>
#for (text <- ShivaSvCallingReport.histogramText)
<th>${text}</th>
#end
</tr></thead>
<tbody>
#for (sampleName <- sampleNames)
<tr>
<td><a href="${rootPath}Samples/${sampleName}/index.html">${sampleName}</a></td>
#{
val sampleCounts: Array[String] = countsForSvType.get(sampleName) match {
case Some(c: Array[_]) => c.collect({case x => x.toString()})
case None => missingCounts
}
}#
#for (countForSize <- sampleCounts)
<td>${countForSize}</td>
#end
</tr>
#end
</tbody>
</table>
#end
</div>
......@@ -60,11 +60,13 @@ class Shiva(val parent: Configurable) extends QScript with MultisampleMappingTra
override def namePrefix = "multisample"
override def configNamespace: String = "shivavariantcalling"
override def configPath: List[String] = super.configPath ::: "multisample" :: Nil
genders = samples.map { case (sampleName, sample) => sampleName -> sample.gender }
}
else new ShivaVariantcalling(qscript) {
override def configNamespace = "shivavariantcalling"
sampleId = sample
libId = library