Commit 1825f7be authored by rhpvorderman's avatar rhpvorderman

Merge branch 'Fix-BIOPET-479' of github.com:biopet/biopet into Fix-BIOPET-479

parents e1e6552e 52b9c669
......@@ -98,7 +98,7 @@ class BastyTest extends TestNGSuite with Matchers {
val numberLibs = (if (sample1) 1 else 0) + (if (sample2) 2 else 0)
val numberSamples = (if (sample1) 1 else 0) + (if (sample2) 1 else 0)
pipeline.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (numberLibs + (if (sample2) 1 else 0))
pipeline.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (numberLibs + numberSamples)
// Gatk preprocess
pipeline.functions.count(_.isInstanceOf[IndelRealigner]) shouldBe (numberLibs * (if (realign) 1 else 0) + (if (sample2 && realign) 1 else 0))
......
......@@ -14,6 +14,7 @@ rownames(heat) <- heat[,1]
heat<- heat[,-1]
heat<- as.matrix(heat)
textMargin <- max(sapply(rownames(heat), nchar)) + 4
colNumber <- 50
col <- rev(colorRampPalette(brewer.pal(11, "Spectral"))(colNumber))
for (i in (colNumber+1):(colNumber+round((dist(range(heat)) - dist(range(heat[heat < 1]))) / dist(range(heat[heat < 1])) * colNumber))) {
......@@ -22,7 +23,7 @@ for (i in (colNumber+1):(colNumber+round((dist(range(heat)) - dist(range(heat[he
col[length(col)] <- "#00FF00"
png(file = outputArg, width = 1200, height = 1200)
heatmap.2(heat, trace = 'none', col = col, Colv=NA, Rowv=NA, dendrogram="none", margins = c(12, 12), na.color="#00FF00")
heatmap.2(heat, trace = 'none', col = col, Colv=NA, Rowv=NA, dendrogram="none", margins = c(textMargin, textMargin), na.color="#00FF00")
dev.off()
hc <- hclust(d = dist(heat))
......@@ -31,5 +32,5 @@ plot(as.dendrogram(hc), horiz=TRUE, asp=0.02)
dev.off()
png(file = outputArgClustering, width = 1200, height = 1200)
heatmap.2(heat, trace = 'none', col = col, Colv="Rowv", dendrogram="row",margins = c(12, 12), na.color="#00FF00")
heatmap.2(heat, trace = 'none', col = col, Colv="Rowv", dendrogram="row",margins = c(textMargin, textMargin), na.color="#00FF00")
dev.off()
......@@ -54,7 +54,7 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
}
writer.println("set -eubf")
writer.println("set -o pipefail")
writer.println(this.commandLine)
writer.println(lines.mkString("\n"))
jobDelayTime.foreach(x => writer.println(s"sleep $x"))
writer.close()
}
......@@ -99,7 +99,13 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
beforeGraph()
internalBeforeGraph()
if (jobOutputFile != null) this.commandDirectory = this.jobOutputFile.getAbsoluteFile.getParentFile
if (jobOutputFile != null) {
this.commandDirectory = this.jobOutputFile.getAbsoluteFile.getParentFile
this match {
case s: ScatterGatherableFunction => s.scatterGatherDirectory = new File(this.commandDirectory, ".scatter")
case _ =>
}
}
super.freezeFieldValues()
}
......
......@@ -126,17 +126,21 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
}
})
logger.info("Adding report")
this match {
case q: MultiSampleQScript if q.onlySamples.nonEmpty && !q.samples.forall(x => q.onlySamples.contains(x._1)) =>
logger.info("Write report is skipped because sample flag is used")
case _ => reportClass.foreach { report =>
for (f <- functions) f match {
case w: WriteSummary => report.deps :+= w.jobOutputFile
case _ =>
val writeHtmlReport: Boolean = config("write_html_report", default = true)
if (writeHtmlReport) {
logger.info("Adding report")
this match {
case q: MultiSampleQScript if q.onlySamples.nonEmpty && !q.samples.forall(x => q.onlySamples.contains(x._1)) =>
logger.info("Write report is skipped because sample flag is used")
case _ => reportClass.foreach { report =>
for (f <- functions) f match {
case w: WriteSummary => report.deps :+= w.jobOutputFile
case _ =>
}
report.jobOutputFile = new File(report.outputDir, ".report.out")
add(report)
}
report.jobOutputFile = new File(report.outputDir, ".report.out")
add(report)
}
}
......
......@@ -33,7 +33,7 @@ class BwaMem(val parent: Configurable) extends Bwa with Reference {
var R2: File = _
@Input(doc = "The reference file for the bam files.", shortName = "R")
var reference: File = null
var reference: File = _
@Output(doc = "Output file SAM", shortName = "output")
var output: File = _
......@@ -42,7 +42,7 @@ class BwaMem(val parent: Configurable) extends Bwa with Reference {
var k: Option[Int] = config("k")
var r: Option[Float] = config("r")
var S: Boolean = config("S", default = false)
var M: Boolean = config("M", default = true)
var M: Boolean = config("M", default = false)
var w: Option[Int] = config("w")
var d: Option[Int] = config("d")
var c: Option[Int] = config("c")
......@@ -75,7 +75,7 @@ class BwaMem(val parent: Configurable) extends Bwa with Reference {
if (reference == null) reference = referenceFasta()
}
def cmdLine = {
def cmdLine: String = {
required(executable) +
required("mem") +
optional("-k", k) +
......
......@@ -38,6 +38,8 @@ class CleverFixVCF(val parent: Configurable) extends BiopetJavaCommandLineFuncti
@Argument(doc = "Samplename")
var sampleName: String = _
override def defaultCoreMemory = 4.0
override def cmdLine = super.cmdLine +
required("-i", input) +
required("-o", output) +
......
......@@ -56,6 +56,8 @@ class HaplotypeCaller(val parent: Configurable) extends CommandLineGATK with Sca
@Argument(fullName = "maxGGAARExtension", shortName = "maxGGAARExtension", doc = "the maximum extent into the full active region extension that we're willing to go in genotyping our events for GGA mode", required = false, exclusiveOf = "", validation = "")
var maxGGAARExtension: Option[Int] = config("maxGGAARExtension")
var useNewAFCalculator: Boolean = config("useNewAFCalculator", default = false)
/** Include at least this many bases around an event for calling indels */
@Argument(fullName = "paddingAroundIndels", shortName = "paddingAroundIndels", doc = "Include at least this many bases around an event for calling indels", required = false, exclusiveOf = "", validation = "")
var paddingAroundIndels: Option[Int] = config("paddingAroundIndels")
......@@ -441,6 +443,7 @@ class HaplotypeCaller(val parent: Configurable) extends CommandLineGATK with Sca
optional("-paddingAroundSNPs", paddingAroundSNPs, spaceSeparated = true, escape = true, format = "%s") +
repeat("-comp", comp, formatPrefix = TaggedFile.formatCommandLineParameter, spaceSeparated = true, escape = true, format = "%s") +
repeat("-A", annotation, spaceSeparated = true, escape = true, format = "%s") +
conditional(useNewAFCalculator, "--useNewAFCalculator") +
repeat("-XA", excludeAnnotation, spaceSeparated = true, escape = true, format = "%s") +
repeat("-G", group, spaceSeparated = true, escape = true, format = "%s") +
conditional(debug, "-debug", escape = true, format = "%s") +
......
......@@ -25,6 +25,6 @@ abstract class Sambamba extends BiopetCommandLineFunction with Version {
executable = config("exe", default = "sambamba", namespace = "sambamba", freeVar = false)
def versionCommand = executable
def versionRegex = """sambamba v(.*)""".r
def versionRegex = """sambamba v?(.*)""".r
override def versionExitcode = List(0, 1)
}
\ No newline at end of file
......@@ -21,7 +21,6 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/** Extension for sambemba markdup */
class SambambaMarkdup(val parent: Configurable) extends Sambamba {
override def defaultThreads = 4
@Input(doc = "Bam File")
var input: File = _
......@@ -32,17 +31,29 @@ class SambambaMarkdup(val parent: Configurable) extends Sambamba {
var removeDuplicates: Boolean = config("remove_duplicates", default = false)
// @doc: compression_level 6 is average, 0 = no compression, 9 = best
val compressionLevel: Option[Int] = config("compression_level", default = 6)
val hashTableSize: Option[Int] = config("hash-table-size", default = 262144)
val overflowListSize: Option[Int] = config("overflow-list-size", default = 200000)
val ioBufferSize: Option[Int] = config("io-buffer-size", default = 128)
val compressionLevel: Option[Int] = config("compression_level")
val hashTableSize: Option[Int] = config("hash-table-size")
val overflowListSize: Option[Int] = config("overflow-list-size")
val ioBufferSize: Option[Int] = config("io-buffer-size")
val showProgress: Boolean = config("show-progress", default = true)
override def defaultThreads = 4
override def defaultCoreMemory = 4.0
@Output
private var indexOutput: File = _
override def beforeGraph(): Unit = {
indexOutput = new File(output + ".bai")
}
/** Returns command to execute */
def cmdLine = required(executable) +
def cmdLine: String = required(executable) +
required("markdup") +
conditional(removeDuplicates, "--remove-duplicates") +
optional("-t", nCoresRequest) +
optional("-l", compressionLevel) +
conditional(showProgress, "--show-progress") +
optional("--hash-table-size=", hashTableSize, spaceSeparated = false) +
optional("--overflow-list-size=", overflowListSize, spaceSeparated = false) +
optional("--io-buffer-size=", ioBufferSize, spaceSeparated = false) +
......@@ -51,10 +62,11 @@ class SambambaMarkdup(val parent: Configurable) extends Sambamba {
}
object SambambaMarkdup {
def apply(root: Configurable, input: File, output: File): SambambaMarkdup = {
def apply(root: Configurable, input: File, output: File, isIntermediate: Boolean = false): SambambaMarkdup = {
val markdup = new SambambaMarkdup(root)
markdup.input = input
markdup.output = output
markdup.isIntermediate = isIntermediate
markdup
}
......
......@@ -21,7 +21,6 @@ import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/** Extension for sambemba merge */
class SambambaMerge(val parent: Configurable) extends Sambamba {
override def defaultThreads = 4
@Input(doc = "Bam File[s]")
var input: List[File] = Nil
......@@ -30,13 +29,29 @@ class SambambaMerge(val parent: Configurable) extends Sambamba {
var output: File = _
// @doc: compression_level 6 is average, 0 = no compression, 9 = best
val compressionLevel: Option[Int] = config("compression_level", default = 6)
val compressionLevel: Option[Int] = config("compression_level")
val header: Boolean = config("header", default = false)
val showProgress: Boolean = config("show-progress", default = true)
val filter: Option[String] = config("filter")
override def defaultThreads = 4
override def defaultCoreMemory = 4.0
@Output
private var indexOutput: File = _
override def beforeGraph(): Unit = {
indexOutput = new File(output + ".bai")
}
/** Returns command to execute */
def cmdLine = required(executable) +
def cmdLine: String = required(executable) +
required("merge") +
optional("-t", nCoresRequest) +
optional("-l", compressionLevel) +
optional("-F", filter) +
conditional(header, "--header") +
conditional(showProgress, "--show-progress") +
required(output) +
repeat("", input)
repeat(input)
}
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.tools
import java.io.File
import nl.lumc.sasc.biopet.core.summary.Summarizable
import nl.lumc.sasc.biopet.core.ToolCommandFunction
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.utils.ConfigUtils
import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output }
class VcfStatsForSv(val parent: Configurable) extends ToolCommandFunction with Summarizable {
def toolObject = nl.lumc.sasc.biopet.tools.vcfstats.VcfStatsForSv
mainFunction = false
@Input(required = true)
var inputFile: File = _
@Argument(required = true)
var histogramBinBoundaries: Array[Int] = _
@Output(required = true)
var outputFile: File = _
override def defaultCoreMemory = 1.0
override def cmdLine = super.cmdLine +
required("-i", inputFile) +
required("-o", outputFile) +
repeat("--histBinBoundaries", histogramBinBoundaries)
def summaryStats: Map[String, Any] = ConfigUtils.fileToConfigMap(outputFile)
def summaryFiles: Map[String, File] = Map.empty
}
......@@ -14,6 +14,7 @@ rownames(heat) <- heat[,1]
heat<- heat[,-1]
heat<- as.matrix(heat)
textMargin <- max(sapply(rownames(heat), nchar)) + 4
colNumber <- 50
col <- rev(colorRampPalette(brewer.pal(11, "Spectral"))(colNumber))
for (i in (colNumber+1):(colNumber+round((dist(range(heat)) - dist(range(heat[heat < 1]))) / dist(range(heat[heat < 1])) * colNumber))) {
......@@ -22,7 +23,7 @@ for (i in (colNumber+1):(colNumber+round((dist(range(heat)) - dist(range(heat[he
col[length(col)] <- "#00FF00"
png(file = outputArg, width = 1200, height = 1200)
heatmap.2(heat, trace = 'none', col = col, Colv=NA, Rowv=NA, dendrogram="none", margins = c(12, 12), na.color="#00FF00")
heatmap.2(heat, trace = 'none', col = col, Colv=NA, Rowv=NA, dendrogram="none", margins = c(textMargin, textMargin), na.color="#00FF00")
dev.off()
hc <- hclust(d = dist(heat))
......@@ -31,5 +32,5 @@ plot(as.dendrogram(hc), horiz=TRUE, asp=0.02)
dev.off()
png(file = outputArgClustering, width = 1200, height = 1200)
heatmap.2(heat, trace = 'none', col = col, Colv="Rowv", dendrogram="row",margins = c(12, 12), na.color="#00FF00")
heatmap.2(heat, trace = 'none', col = col, Colv="Rowv", dendrogram="row",margins = c(textMargin, textMargin), na.color="#00FF00")
dev.off()
package nl.lumc.sasc.biopet.tools.vcfstats
import java.io.File
import htsjdk.variant.vcf.VCFFileReader
import nl.lumc.sasc.biopet.utils.{ ConfigUtils, ToolCommand }
import scala.collection.JavaConversions._
object VcfStatsForSv extends ToolCommand {
/** Commandline arguments */
case class Args(inputFile: File = null, outputFile: File = null, histBinBoundaries: Array[Int] = Array()) extends AbstractArgs
/** Parsing commandline arguments */
class OptParser extends AbstractOptParser {
opt[File]('i', "inputFile") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
c.copy(inputFile = x)
} validate {
x => if (x.exists) success else failure("Input VCF required")
} text "Input VCF file (required)"
opt[File]('o', "outputFile") required () maxOccurs 1 valueName "<file>" action { (x, c) =>
c.copy(outputFile = x)
} text "Output file (required)"
opt[Int]("histBinBoundaries") required () unbounded () action { (x, c) =>
c.copy(histBinBoundaries = c.histBinBoundaries :+ x)
} text "When counting the records, sv-s are divided to different size classes, this parameter should give the boundaries between these classes."
}
protected var cmdArgs: Args = _
def main(args: Array[String]): Unit = {
val argsParser = new OptParser
cmdArgs = argsParser.parse(args, Args()) getOrElse (throw new IllegalArgumentException)
logger.info(s"Parsing file: ${cmdArgs.inputFile}")
val stats: Map[String, Any] = getVariantCounts(cmdArgs.inputFile, cmdArgs.histBinBoundaries)
ConfigUtils.mapToYamlFile(stats, cmdArgs.outputFile)
}
/** Parses a vcf-file and counts sv-s by type and size. Sv-s are divided to different size classes, the parameter histogramBinBoundaries gives the boundaries between these classes. */
def getVariantCounts(vcfFile: File, histogramBinBoundaries: Array[Int]): Map[String, Any] = {
val delCounts, insCounts, dupCounts, invCounts = Array.fill(histogramBinBoundaries.size + 1) { 0 }
var traCount = 0
val reader = new VCFFileReader(vcfFile, false)
for (record <- reader) {
record.getAttributeAsString("SVTYPE", "") match {
case "TRA" | "CTX" | "ITX" => traCount += 1
case svType => {
val size = record.getEnd - record.getStart
var i = 0
while (i < histogramBinBoundaries.size && size > histogramBinBoundaries(i)) i += 1
svType match {
case "DEL" => delCounts(i) += 1
case "INS" => insCounts(i) += 1
case "DUP" => dupCounts(i) += 1
case "INV" => invCounts(i) += 1
case _ => logger.warn(s"Vcf file contains a record of unknown type: file-$vcfFile, type-$svType")
}
}
}
}
reader.close()
Map("DEL" -> delCounts, "INS" -> insCounts, "DUP" -> dupCounts, "INV" -> invCounts, "TRA" -> traCount)
}
}
......@@ -13,6 +13,11 @@ parser$add_argument('--llabel', dest='llabel', type='character')
parser$add_argument('--title', dest='title', type='character')
parser$add_argument('--hideLegend', dest='hideLegend', type='character', default="false")
parser$add_argument('--removeZero', dest='removeZero', type='character', default="false")
parser$add_argument('--xLog10', dest='xLog10', type='character', default="false")
parser$add_argument('--yLog10', dest='yLog10', type='character', default="false")
parser$add_argument('--xLog10Breaks', dest='xLog10Breaks', nargs='+', type='integer')
parser$add_argument('--xLog10Labels', dest='xLog10Labels', nargs='+', type='character')
arguments <- parser$parse_args()
......@@ -31,14 +36,29 @@ DF1 <- melt(DF, id.var="Rank")
if (arguments$removeZero == "true") DF1 <- DF1[DF1$value > 0, ]
if (arguments$removeZero == "true") print("Removed 0 values")
ggplot(DF1, aes(x = Rank, y = value, group = variable, color = variable)) +
plot = ggplot(DF1, aes(x = Rank, y = value, group = variable, color = variable)) +
xlab(xlab) +
ylab(arguments$ylabel) +
guides(fill=guide_legend(title=arguments$llabel)) +
guides(color=guide_legend(title=arguments$llabel)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1, size = 8)) +
ggtitle(arguments$title) +
theme_bw() +
theme(legend.position = legendPosition) +
geom_line()
if (arguments$xLog10 == "true") {
if (!is.null(arguments$xLog10Labels)) {
scale_x <- scale_x_log10(breaks = arguments$xLog10Breaks, labels=arguments$xLog10Labels)
} else {
scale_x <- scale_x_log10()
}
plot <- plot + scale_x
}
if (arguments$yLog10 == "true") {
plot <- plot + scale_y_log10()
}
plot
dev.off()
......@@ -17,7 +17,6 @@ package nl.lumc.sasc.biopet.utils.intervals
import java.io.{ File, PrintWriter }
import htsjdk.samtools.SAMSequenceDictionary
import htsjdk.samtools.reference.FastaSequenceFile
import scala.collection.JavaConversions._
import scala.collection.mutable
......@@ -112,6 +111,15 @@ case class BedRecordList(val chrRecords: Map[String, List[BedRecord]], val heade
allRecords.foreach(writer.println)
writer.close()
}
/** This return the fraction of the regions comparing to a length */
def fractionOf(length: Long): Double = length.toDouble / length
/** This return the fraction of the regions comparing to a reference */
def fractionOfReference(dict: SAMSequenceDictionary): Double = fractionOf(dict.getReferenceLength)
/** This return the fraction of the regions comparing to a reference */
def fractionOfReference(file: File): Double = fractionOfReference(FastaUtils.getCachedDict(file))
}
object BedRecordList {
......@@ -131,6 +139,32 @@ object BedRecordList {
def fromList(records: TraversableOnce[BedRecord]): BedRecordList = fromListWithHeader(records, Nil)
/**
* This creates a [[BedRecordList]] based on multiple files. This method combines overlapping regions
*
* @param bedFiles Input bed files
* @return
*/
def fromFilesCombine(bedFiles: File*) = {
fromFiles(bedFiles, combine = true)
}
/**
* This creates a [[BedRecordList]] based on multiple files
*
* @param bedFiles Input bed files
* @param combine When true overlaping regions are merged
* @return
*/
def fromFiles(bedFiles: Seq[File], combine: Boolean = false) = {
val list = bedFiles.foldLeft(empty)((a, b) => fromList(fromFile(b).allRecords ++ a.allRecords))
if (combine) list.combineOverlap
else list
}
/** This created a empty [[BedRecordList]] */
def empty = fromList(Nil)
def fromFile(bedFile: File) = {
val reader = Source.fromFile(bedFile)
val all = reader.getLines().toList
......
......@@ -39,6 +39,13 @@ class LinePlot(val parent: Configurable) extends Rscript {
var hideLegend: Boolean = config("hide_legend", default = false)
var removeZero: Boolean = config("removeZero", default = false)
// whether to use log scale for x and y axis
var xLog10: Boolean = false
var yLog10: Boolean = false
var xLog10AxisTicks: Seq[String] = Seq.empty
var xLog10AxisLabels: Seq[String] = Seq.empty
override def cmd = super.cmd ++
Seq("--input", input.getAbsolutePath) ++
Seq("--output", output.getAbsolutePath) ++
......@@ -49,7 +56,11 @@ class LinePlot(val parent: Configurable) extends Rscript {
llabel.map(Seq("--llabel", _)).getOrElse(Seq()) ++
title.map(Seq("--title", _)).getOrElse(Seq()) ++
(if (hideLegend) Seq("--hideLegend", "true") else Seq()) ++
(if (removeZero) Seq("--removeZero", "true") else Seq())
(if (removeZero) Seq("--removeZero", "true") else Seq()) ++
(if (xLog10) Seq("--xLog10", "true") else Seq()) ++
(if (yLog10) Seq("--yLog10", "true") else Seq()) ++
(if (xLog10AxisTicks.nonEmpty) xLog10AxisTicks.+:("--xLog10Breaks") else Seq()) ++
(if (xLog10AxisLabels.nonEmpty) xLog10AxisLabels.+:("--xLog10Labels") else Seq())
}
object LinePlot {
......
......@@ -118,6 +118,11 @@ At this moment the following variant callers can be used
| shiva | correct_readgroups | Boolean | false | Attempt to correct read groups | Only used when input is a bam file |
| shiva | amplicon_bed | Path | | Path to target bed file | all |
| shiva | regions_of_interest | Array of paths | | Array of paths to region of interest (e.g. gene panels) bed files | all |
| shivavariantcalling | gender_aware_calling | Boolean | false | Enables gander aware variantcalling | haplotypecaller_gvcf |
| shivavariantcalling | hap̦loid_regions | Bed file | | Haploid regions for all genders | haplotypecaller_gvcf |
| shivavariantcalling | hap̦loid_regions_male | Bed file | | Haploid regions for males | haplotypecaller_gvcf |
| shivavariantcalling | hap̦loid_regions_female | Bed file | | Haploid regions for females | haplotypecaller_gvcf |
| shiva | amplicon_bed | Path | | Path to target bed file | all |
| vcffilter | min_sample_depth | Integer | 8 | Filter variants with at least x coverage | raw |
| vcffilter | min_alternate_depth | Integer | 2 | Filter variants with at least x depth on the alternate allele | raw |
| vcffilter | min_samples_pass | Integer | 1 | Minimum amount of samples which pass custom filter (requires additional flags) | raw |
......@@ -130,6 +135,14 @@ For all the options, please see the corresponding documentation for the mapping
## Advanced usage
### Gender aware variantcalling
In Shiva and ShivaVariantcalling while using haplotypecaller_gvcf it is possible to do gender aware variantcalling. In this mode it required to supply bed files to define haploid regions (see config values).
- For males, `hap̦loid_regions` and `hap̦loid_regions_male` is used.
- For females, `hap̦loid_regions` and `hap̦loid_regions_female` is used.
The pipeline will use a union of those files. At least 1 file is required while using this mode.
### Reporting modes
Shiva furthermore supports three modes. The default and recommended option is `multisample_variantcalling`.
......
Subproject commit 86ee6287b66562544e701f9ffa7cbbf0af9f3eba
Subproject commit 677d757419f515779766b10149b0e92eff71e5b2
......@@ -56,12 +56,7 @@ class GearsSingle(val parent: Configurable) extends QScript with SummaryQScript
if (sampleId == null || sampleId == None) Logging.addError("Missing sample ID on GearsSingle module")
if (outputName == null) {
if (fastqR1.nonEmpty) outputName = fastqR1.headOption.map(_.getName
.stripSuffix(".gz")
.stripSuffix(".fastq")
.stripSuffix(".fq"))
.getOrElse("noName")
else outputName = bamFile.map(_.getName.stripSuffix(".bam")).getOrElse("noName")
outputName = sampleId.getOrElse("noName") + libId.map("-" + _).getOrElse("")
}
if (fastqR1.nonEmpty) {
......
......@@ -108,7 +108,7 @@ abstract class TestGearsSingle extends TestNGSuite with Matchers {
gears.outputName shouldBe "test"
} else {
// in the following cases the filename should have been determined by the filename
gears.outputName shouldBe (if (inputMode == Some("bam")) "bamfile" else "R1")
gears.outputName shouldBe "sampleName-libName"
}
val pipesJobs = gears.functions.filter(_.isInstanceOf[BiopetCommandLineFunction])
......
......@@ -19,7 +19,7 @@ import java.io.File
import htsjdk.samtools.SamReaderFactory
import htsjdk.samtools.reference.FastaSequenceFile
import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension
import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference, MultiSampleQScript }
import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, PipelineCommand, Reference }
import nl.lumc.sasc.biopet.extensions.Ln
import nl.lumc.sasc.biopet.extensions.picard._
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
......@@ -28,8 +28,8 @@ import nl.lumc.sasc.biopet.pipelines.gears.GearsSingle
import nl.lumc.sasc.biopet.utils.Logging
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
import MultisampleMapping.MergeStrategy
import nl.lumc.sasc.biopet.extensions.sambamba.{ SambambaMarkdup, SambambaMerge }
import scala.collection.JavaConversions._
......@@ -125,7 +125,7 @@ trait MultisampleMappingTrait extends MultiSampleQScript
} else None
def bamFile: Option[File] = mapping match {
case Some(m) => Some(m.finalBamFile)
case Some(m) => Some(m.mergedBamFile)
case _ if inputBam.isDefined => Some(new File(libDir, s"$sampleId-$libId.bam"))
case _ => None
}
......@@ -247,9 +247,9 @@ trait MultisampleMappingTrait extends MultiSampleQScript
mergeStrategy match {
case MergeStrategy.None =>
case (MergeStrategy.MergeSam | MergeStrategy.MarkDuplicates) if libraries.flatMap(_._2.bamFile).size == 1 =>
case (MergeStrategy.MergeSam) if libraries.flatMap(_._2.bamFile).size == 1 =>
add(Ln.linkBamFile(qscript, libraries.flatMap(_._2.bamFile).head, bamFile.get): _*)
case (MergeStrategy.PreProcessMergeSam | MergeStrategy.PreProcessMarkDuplicates) if libraries.flatMap(_._2.preProcessBam).size == 1 =>
case (MergeStrategy.PreProcessMergeSam) if libraries.flatMap(_._2.preProcessBam).size == 1 =>
add(Ln.linkBamFile(qscript, libraries.flatMap(_._2.preProcessBam).head, bamFile.get): _*)
case MergeStrategy.MergeSam =>
add(MergeSamFiles(qscript, libraries.flatMap(_._2.bamFile).toList, bamFile.get, isIntermediate = !keepMergedFiles))
......@@ -259,6 +259,20 @@ trait MultisampleMappingTrait extends MultiSampleQScript
add(MarkDuplicates(qscript, libraries.flatMap(_._2.bamFile).toList, bamFile.get, isIntermediate = !keepMergedFiles))
case MergeStrategy.PreProcessMarkDuplicates =>
add(MarkDuplicates(qscript, libraries.flatMap(_._2.preProcessBam).toList, bamFile.get, isIntermediate = !keepMergedFiles))
case MergeStrategy.PreProcessSambambaMarkdup =>
val mergedBam = if (libraries.flatMap(_._2.bamFile).size == 1) {
add(Ln.linkBamFile(qscript, libraries.flatMap(_._2.preProcessBam).head, new File(sampleDir, "merged.bam")): _*)
libraries.flatMap(_._2.preProcessBam).head
} else {
val merge = new SambambaMerge(qscript)
merge.input = libraries.flatMap(_._2.preProcessBam).toList
merge.output = new File(sampleDir, "merged.bam")
merge.isIntermediate = true
add(merge)
merge.output
}
add(SambambaMarkdup(qscript, mergedBam, bamFile.get, isIntermediate = !keepMergedFiles))
add(Ln(qscript, bamFile.get + ".bai", bamFile.get.getAbsolutePath.stripSuffix(".ba