Commit 613c89ec authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Fixed scattering

parent 23fb609a
......@@ -8,7 +8,6 @@ package nl.lumc.sasc.biopet.extensions.gatk.broad
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.extensions.gatk.{ GATKScatterFunction, LocusScatterFunction }
import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ }
......
......@@ -8,7 +8,7 @@ package nl.lumc.sasc.biopet.extensions.gatk.broad
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.extensions.gatk.{ CatVariantsGatherer, GATKScatterFunction, LocusScatterFunction, TaggedFile }
import org.broadinstitute.gatk.queue.extensions.gatk.{ CatVariantsGatherer, TaggedFile }
import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
import nl.lumc.sasc.biopet.utils.VcfUtils
import org.broadinstitute.gatk.utils.commandline.Argument
......
......@@ -8,7 +8,7 @@ package nl.lumc.sasc.biopet.extensions.gatk.broad
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.extensions.gatk.{ GATKScatterFunction, ReadScatterFunction, TaggedFile }
import org.broadinstitute.gatk.queue.extensions.gatk.{ TaggedFile }
import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
import nl.lumc.sasc.biopet.utils.VcfUtils
import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ }
......@@ -17,7 +17,7 @@ import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ }
class BaseRecalibrator(val root: Configurable) extends CommandLineGATK /* with ScatterGatherableFunction */ {
def analysis_type = "BaseRecalibrator"
//TODO: check gathering
//scatterClass = classOf[ReadScatterFunction]
//scatterClass = classOf[ContigScatterFunction]
//setupScatterFunction = { case scatter: GATKScatterFunction => scatter.includeUnmapped = false }
/** A database of known polymorphic sites */
......
......@@ -8,7 +8,7 @@ package nl.lumc.sasc.biopet.extensions.gatk.broad
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.extensions.gatk.{ CatVariantsGatherer, GATKScatterFunction, LocusScatterFunction, TaggedFile }
import org.broadinstitute.gatk.queue.extensions.gatk.{ CatVariantsGatherer, TaggedFile }
import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
import nl.lumc.sasc.biopet.utils.VcfUtils
import org.broadinstitute.gatk.utils.commandline.{ Gather, Input, Output, _ }
......
......@@ -8,7 +8,7 @@ package nl.lumc.sasc.biopet.extensions.gatk.broad
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.extensions.gatk.{ CatVariantsGatherer, GATKScatterFunction, LocusScatterFunction, TaggedFile }
import org.broadinstitute.gatk.queue.extensions.gatk.{ CatVariantsGatherer, TaggedFile }
import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
import nl.lumc.sasc.biopet.utils.VcfUtils
import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ }
......
......@@ -5,6 +5,7 @@ import java.io.File
import nl.lumc.sasc.biopet.core.{ BiopetJavaCommandLineFunction, Reference, Version }
import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Input, Output }
import org.broadinstitute.gatk.utils.interval.{ IntervalMergingRule, IntervalSetRule }
trait CommandLineGATK extends BiopetJavaCommandLineFunction with Reference with Version {
analysisName = analysis_type
......@@ -68,11 +69,11 @@ trait CommandLineGATK extends BiopetJavaCommandLineFunction with Reference with
/** Set merging approach to use for combining interval inputs */
@Argument(fullName = "interval_set_rule", shortName = "isr", doc = "Set merging approach to use for combining interval inputs", required = false, exclusiveOf = "", validation = "")
var interval_set_rule: Option[String] = config("interval_set_rule")
var interval_set_rule: Option[IntervalSetRule] = None
/** Interval merging rule for abutting intervals */
@Argument(fullName = "interval_merging", shortName = "im", doc = "Interval merging rule for abutting intervals", required = false, exclusiveOf = "", validation = "")
var interval_merging: Option[String] = config("interval_merging")
var interval_merging: Option[IntervalMergingRule] = None
/** Amount of padding (in bp) to add to each interval */
@Argument(fullName = "interval_padding", shortName = "ip", doc = "Amount of padding (in bp) to add to each interval", required = false, exclusiveOf = "", validation = "")
......@@ -317,6 +318,14 @@ trait CommandLineGATK extends BiopetJavaCommandLineFunction with Reference with
override def beforeGraph() {
super.beforeGraph()
if (interval_set_rule.isEmpty) {
val v: Option[String] = config("interval_set_rule")
interval_set_rule = v.map(IntervalSetRule.valueOf(_))
}
if (interval_merging.isEmpty) {
val v: Option[String] = config("interval_merging")
interval_merging = v.map(IntervalMergingRule.valueOf(_))
}
if (reference_sequence == null) reference_sequence = referenceFasta()
input_fileIndexes ++= input_file.filter(orig => orig != null && orig.getName.endsWith(".bam")).flatMap(orig => Array(new File(orig.getPath.stripSuffix(".bam") + ".bai")))
if (num_threads.isDefined) nCoresRequest = num_threads
......
package nl.lumc.sasc.biopet.extensions.gatk.broad
import collection.JavaConversions._
import org.broadinstitute.gatk.utils.interval.IntervalUtils
import org.broadinstitute.gatk.queue.function.InProcessFunction
/**
* Splits intervals by contig instead of evenly.
*/
class ContigScatterFunction extends GATKScatterFunction with InProcessFunction {
override def scatterCount = if (intervalFilesExist) super.scatterCount min this.maxIntervals else super.scatterCount
protected override def maxIntervals = {
GATKScatterFunction.getGATKIntervals(this.originalGATK).contigs.size
}
def run() {
val gi = GATKScatterFunction.getGATKIntervals(this.originalGATK)
IntervalUtils.scatterContigIntervals(gi.samFileHeader, gi.locs, this.scatterOutputFiles)
}
}
package nl.lumc.sasc.biopet.extensions.gatk.broad
import org.broadinstitute.gatk.utils.interval.IntervalUtils
import java.io.File
import org.broadinstitute.gatk.queue.extensions.gatk.GATKIntervals
import org.broadinstitute.gatk.utils.io.IOUtils
import org.broadinstitute.gatk.queue.function.scattergather.{ CloneFunction, ScatterFunction }
import org.broadinstitute.gatk.utils.commandline.{ Output, _ }
trait GATKScatterFunction extends ScatterFunction {
/* The runtime field to set for specifying intervals. */
private final val intervalsField = "intervals"
private final val intervalsStringField = "intervalsString"
private final val excludeIntervalsField = "excludeIntervals"
private final val excludeIntervalsStringField = "excludeIntervalsString"
private final val intervalsSetRuleField = "interval_set_rule"
private final val intervalMergingField = "interval_merging"
private final val intervalPaddingField = "interval_padding"
@Output(doc = "Scatter function outputs")
var scatterOutputFiles: Seq[File] = Nil
/** The original GATK function. */
protected var originalGATK: CommandLineGATK = _
/** Whether the last scatter job should also include any unmapped reads. */
var includeUnmapped: Boolean = _
override def init() {
this.originalGATK = this.originalFunction.asInstanceOf[CommandLineGATK]
// If intervals have been specified check if unmapped is included
if (this.originalGATK.intervals.size + this.originalGATK.intervalsString.size > 0)
this.includeUnmapped = this.originalGATK.intervalsString.exists(interval => IntervalUtils.isUnmapped(interval))
}
override def isScatterGatherable = {
this.originalGATK.reference_sequence != null
}
override def initCloneInputs(cloneFunction: CloneFunction, index: Int) {
cloneFunction.setFieldValue(this.intervalsField, Seq(new File("scatter.intervals")))
if (index == this.scatterCount && this.includeUnmapped)
cloneFunction.setFieldValue(this.intervalsStringField, Seq("unmapped"))
else
cloneFunction.setFieldValue(this.intervalsStringField, Seq.empty[String])
cloneFunction.setFieldValue(this.intervalsSetRuleField, null)
cloneFunction.setFieldValue(this.intervalMergingField, null)
cloneFunction.setFieldValue(this.intervalPaddingField, None)
cloneFunction.setFieldValue(this.excludeIntervalsField, Seq.empty[File])
cloneFunction.setFieldValue(this.excludeIntervalsStringField, Seq.empty[String])
}
override def bindCloneInputs(cloneFunction: CloneFunction, index: Int) {
val scatterPart = cloneFunction.getFieldValue(this.intervalsField)
.asInstanceOf[Seq[File]]
.map(file => IOUtils.absolute(cloneFunction.commandDirectory, file))
cloneFunction.setFieldValue(this.intervalsField, scatterPart)
this.scatterOutputFiles ++= scatterPart
}
/**
* @return true if all interval files exist.
*/
protected def intervalFilesExist = {
!(this.originalGATK.intervals ++ this.originalGATK.excludeIntervals).exists(interval => !interval.exists())
}
/**
* @return the maximum number of intervals or this.scatterCount if the maximum can't be determined ahead of time.
*/
protected def maxIntervals: Int
}
object GATKScatterFunction {
var gatkIntervalsCache = Seq.empty[GATKIntervals]
def getGATKIntervals(originalFunction: CommandLineGATK) = {
val gatkIntervals = new GATKIntervals(
originalFunction.reference_sequence,
originalFunction.intervals.toSeq,
originalFunction.intervalsString.toSeq,
originalFunction.interval_set_rule.getOrElse(null),
originalFunction.interval_merging.getOrElse(null),
originalFunction.interval_padding,
originalFunction.excludeIntervals.toSeq, originalFunction.excludeIntervalsString.toSeq)
gatkIntervalsCache.find(_ == gatkIntervals) match {
case Some(existingGatkIntervals) => existingGatkIntervals
case None =>
gatkIntervalsCache :+= gatkIntervals
gatkIntervals
}
}
}
......@@ -8,7 +8,7 @@ package nl.lumc.sasc.biopet.extensions.gatk.broad
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.extensions.gatk.{ CatVariantsGatherer, GATKScatterFunction, LocusScatterFunction, TaggedFile }
import org.broadinstitute.gatk.queue.extensions.gatk.{ CatVariantsGatherer, TaggedFile }
import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
import nl.lumc.sasc.biopet.utils.VcfUtils
import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ }
......
......@@ -12,7 +12,6 @@ import org.broadinstitute.gatk.queue.extensions.gatk._
import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
import nl.lumc.sasc.biopet.utils.VcfUtils
import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Input, _ }
import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType
class HaplotypeCaller(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction {
def analysis_type = "HaplotypeCaller"
......
......@@ -8,14 +8,14 @@ package nl.lumc.sasc.biopet.extensions.gatk.broad
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.extensions.gatk.{ BamGatherFunction, GATKScatterFunction, ReadScatterFunction, TaggedFile }
import org.broadinstitute.gatk.queue.extensions.gatk.{ BamGatherFunction, TaggedFile }
import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
import nl.lumc.sasc.biopet.utils.VcfUtils
import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ }
class IndelRealigner(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction {
def analysis_type = "IndelRealigner"
scatterClass = classOf[ReadScatterFunction]
scatterClass = classOf[ContigScatterFunction]
setupScatterFunction = { case scatter: GATKScatterFunction => scatter.includeUnmapped = true }
/** Input VCF file(s) with known indels */
......
package nl.lumc.sasc.biopet.extensions.gatk.broad
import collection.JavaConversions._
import org.broadinstitute.gatk.utils.interval.IntervalUtils
import org.broadinstitute.gatk.queue.function.InProcessFunction
/**
* A scatter function that divides down to the locus level.
*/
class LocusScatterFunction extends GATKScatterFunction with InProcessFunction {
protected override def maxIntervals = scatterCount
def run() {
val gi = GATKScatterFunction.getGATKIntervals(this.originalGATK)
val splits = IntervalUtils.splitLocusIntervals(gi.locs, this.scatterOutputFiles.size)
IntervalUtils.scatterFixedIntervals(gi.samFileHeader, splits, this.scatterOutputFiles)
}
}
\ No newline at end of file
......@@ -8,13 +8,13 @@ package nl.lumc.sasc.biopet.extensions.gatk.broad
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.extensions.gatk.{ BamGatherFunction, GATKScatterFunction, ReadScatterFunction }
import org.broadinstitute.gatk.queue.extensions.gatk.{ BamGatherFunction }
import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
import org.broadinstitute.gatk.utils.commandline._
class PrintReads(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction {
def analysis_type = "PrintReads"
scatterClass = classOf[ReadScatterFunction]
scatterClass = classOf[ContigScatterFunction]
setupScatterFunction = { case scatter: GATKScatterFunction => scatter.includeUnmapped = true }
/** Write output to this BAM filename instead of STDOUT */
......
......@@ -8,7 +8,7 @@ package nl.lumc.sasc.biopet.extensions.gatk.broad
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.extensions.gatk.{ GATKScatterFunction, LocusScatterFunction, TaggedFile }
import org.broadinstitute.gatk.queue.extensions.gatk.{ TaggedFile }
import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
import nl.lumc.sasc.biopet.utils.VcfUtils
import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Input, _ }
......
......@@ -8,7 +8,7 @@ package nl.lumc.sasc.biopet.extensions.gatk.broad
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.extensions.gatk.{ CatVariantsGatherer, GATKScatterFunction, LocusScatterFunction, TaggedFile }
import org.broadinstitute.gatk.queue.extensions.gatk.{ CatVariantsGatherer, TaggedFile }
import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
import nl.lumc.sasc.biopet.utils.VcfUtils
import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ }
......
......@@ -8,7 +8,7 @@ package nl.lumc.sasc.biopet.extensions.gatk.broad
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.extensions.gatk.{ CatVariantsGatherer, GATKScatterFunction, LocusScatterFunction, TaggedFile }
import org.broadinstitute.gatk.queue.extensions.gatk.{ CatVariantsGatherer, TaggedFile }
import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
import nl.lumc.sasc.biopet.utils.VcfUtils
import org.broadinstitute.gatk.utils.commandline.{ Gather, Input, Output, _ }
......
......@@ -10,7 +10,7 @@ import java.io.File
import nl.lumc.sasc.biopet.core.ScatterGatherableFunction
import nl.lumc.sasc.biopet.utils.VcfUtils
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.extensions.gatk.{ CatVariantsGatherer, GATKScatterFunction, LocusScatterFunction, TaggedFile }
import org.broadinstitute.gatk.queue.extensions.gatk.{ CatVariantsGatherer, TaggedFile }
import org.broadinstitute.gatk.utils.commandline.{ Argument, Gather, Output, _ }
class VariantAnnotator(val root: Configurable) extends CommandLineGATK with ScatterGatherableFunction {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment