Skip to content
Snippets Groups Projects
Commit 5a474415 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Remove protected source file from public artifacts

parent def28b47
No related branches found
No related tags found
No related merge requests found
Showing
with 0 additions and 910 deletions
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
class AnalyzeCovariates(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.AnalyzeCovariates with GatkGeneral {
}
object AnalyzeCovariates {
def apply(root: Configurable, before: File, after: File, plots: File): AnalyzeCovariates = {
val ac = new AnalyzeCovariates(root)
ac.before = before
ac.after = after
ac.plots = plots
return ac
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
class ApplyRecalibration(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.ApplyRecalibration with GatkGeneral {
override def afterGraph {
super.afterGraph
if (config.contains("scattercount")) scatterCount = config("scattercount")
nt = Option(getThreads(3))
memoryLimit = Option(nt.getOrElse(1) * 2)
ts_filter_level = config("ts_filter_level")
}
}
object ApplyRecalibration {
def apply(root: Configurable, input: File, output: File, recal_file: File, tranches_file: File, indel: Boolean = false): ApplyRecalibration = {
val ar = if (indel) new ApplyRecalibration(root) {
mode = org.broadinstitute.gatk.tools.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL
defaults ++= Map("ts_filter_level" -> 99.0)
}
else new ApplyRecalibration(root) {
mode = org.broadinstitute.gatk.tools.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.SNP
defaults ++= Map("ts_filter_level" -> 99.5)
}
ar.input :+= input
ar.recal_file = recal_file
ar.tranches_file = tranches_file
ar.out = output
return ar
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
class BaseRecalibrator(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.BaseRecalibrator with GatkGeneral {
memoryLimit = Option(4)
override val defaultVmem = "8G"
if (config.contains("scattercount")) scatterCount = config("scattercount")
if (config.contains("dbsnp")) knownSites :+= new File(config("dbsnp").asString)
if (config.contains("known_sites")) knownSites :+= new File(config("known_sites").asString)
}
object BaseRecalibrator {
def apply(root: Configurable, input: File, output: File): BaseRecalibrator = {
val br = new BaseRecalibrator(root)
br.input_file :+= input
br.out = output
br.afterGraph
return br
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
class CombineGVCFs(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.CombineGVCFs with GatkGeneral {
if (config.contains("scattercount")) scatterCount = config("scattercount")
}
object CombineGVCFs {
def apply(root: Configurable, input: List[File], output: File): CombineGVCFs = {
val cg = new CombineGVCFs(root)
cg.variant = input
cg.o = output
return cg
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
class CombineVariants(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.CombineVariants with GatkGeneral {
if (config.contains("scattercount")) scatterCount = config("scattercount")
}
object CombineVariants {
def apply(root: Configurable, input: List[File], output: File): CombineVariants = {
val cv = new CombineVariants(root)
cv.variant = input
cv.out = output
return cv
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.gatk
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import org.broadinstitute.gatk.queue.extensions.gatk.CommandLineGATK
trait GatkGeneral extends CommandLineGATK with BiopetJavaCommandLineFunction {
memoryLimit = Option(3)
if (config.contains("gatk_jar")) jarFile = config("gatk_jar")
override val defaultVmem = "7G"
if (config.contains("intervals", submodule = "gatk")) intervals = config("intervals", submodule = "gatk").asFileList
if (config.contains("exclude_intervals", submodule = "gatk")) excludeIntervals = config("exclude_intervals", submodule = "gatk").asFileList
reference_sequence = config("reference", submodule = "gatk")
gatk_key = config("gatk_key", submodule = "gatk")
if (config.contains("pedigree", submodule = "gatk")) pedigree = config("pedigree", submodule = "gatk").asFileList
}
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
class GenotypeGVCFs(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.GenotypeGVCFs with GatkGeneral {
annotation ++= config("annotation", default = Seq("FisherStrand", "QualByDepth", "ChromosomeCounts")).asStringList
if (config.contains("dbsnp")) dbsnp = config("dbsnp")
if (config.contains("scattercount", "genotypegvcfs")) scatterCount = config("scattercount")
if (config("inputtype", default = "dna").asString == "rna") {
stand_call_conf = config("stand_call_conf", default = 20)
stand_emit_conf = config("stand_emit_conf", default = 0)
} else {
stand_call_conf = config("stand_call_conf", default = 30)
stand_emit_conf = config("stand_emit_conf", default = 0)
}
}
object GenotypeGVCFs {
def apply(root: Configurable, gvcfFiles: List[File], output: File): GenotypeGVCFs = {
val gg = new GenotypeGVCFs(root)
gg.variant = gvcfFiles
gg.out = output
return gg
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.gatk
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType
class HaplotypeCaller(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.HaplotypeCaller with GatkGeneral {
override def afterGraph {
super.afterGraph
min_mapping_quality_score = config("minMappingQualityScore", default = 20)
if (config.contains("scattercount")) scatterCount = config("scattercount")
if (config.contains("dbsnp")) this.dbsnp = config("dbsnp")
this.sample_ploidy = config("ploidy")
nct = config("threads", default = 1)
bamOutput = config("bamOutput")
memoryLimit = Option(nct.getOrElse(1) * 2)
if (config.contains("allSitePLs")) this.allSitePLs = config("allSitePLs")
if (config.contains("output_mode")) {
import org.broadinstitute.gatk.tools.walkers.genotyper.OutputMode._
config("output_mode").asString match {
case "EMIT_ALL_CONFIDENT_SITES" => output_mode = EMIT_ALL_CONFIDENT_SITES
case "EMIT_ALL_SITES" => output_mode = EMIT_ALL_SITES
case "EMIT_VARIANTS_ONLY" => output_mode = EMIT_VARIANTS_ONLY
case e => logger.warn("output mode '" + e + "' does not exist")
}
}
if (config("inputtype", default = "dna").asString == "rna") {
dontUseSoftClippedBases = config("dontusesoftclippedbases", default = true)
stand_call_conf = config("stand_call_conf", default = 5)
stand_emit_conf = config("stand_emit_conf", default = 0)
} else {
dontUseSoftClippedBases = config("dontusesoftclippedbases", default = false)
stand_call_conf = config("stand_call_conf", default = 5)
stand_emit_conf = config("stand_emit_conf", default = 0)
}
if (bamOutput != null && nct.getOrElse(1) > 1) {
nct = Option(1)
logger.warn("BamOutput is on, nct/threads is forced to set on 1, this option is only for debug")
}
}
def useGvcf() {
emitRefConfidence = org.broadinstitute.gatk.tools.walkers.haplotypecaller.ReferenceConfidenceMode.GVCF
variant_index_type = GATKVCFIndexType.LINEAR
variant_index_parameter = config("variant_index_parameter", default = 128000)
}
}
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
class IndelRealigner(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.IndelRealigner with GatkGeneral {
if (config.contains("scattercount")) scatterCount = config("scattercount")
}
object IndelRealigner {
def apply(root: Configurable, input: File, targetIntervals: File, outputDir: String): IndelRealigner = {
val ir = new IndelRealigner(root)
ir.input_file :+= input
ir.targetIntervals = targetIntervals
ir.out = new File(outputDir, input.getName.stripSuffix(".bam") + ".realign.bam")
return ir
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
class PrintReads(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.PrintReads with GatkGeneral {
memoryLimit = Option(4)
override val defaultVmem = "8G"
if (config.contains("scattercount")) scatterCount = config("scattercount")
}
object PrintReads {
def apply(root: Configurable, input: File, output: File): PrintReads = {
val br = new PrintReads(root)
br.input_file :+= input
br.out = output
return br
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
class RealignerTargetCreator(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.RealignerTargetCreator with GatkGeneral {
override val defaultVmem = "6G"
memoryLimit = Some(2.5)
if (config.contains("scattercount")) scatterCount = config("scattercount")
if (config.contains("known")) known ++= config("known").asFileList
}
object RealignerTargetCreator {
def apply(root: Configurable, input: File, outputDir: String): RealignerTargetCreator = {
val re = new RealignerTargetCreator(root)
re.input_file :+= input
re.out = new File(outputDir, input.getName.stripSuffix(".bam") + ".realign.intervals")
return re
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
class SelectVariants(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.SelectVariants with GatkGeneral {
if (config.contains("scattercount")) scatterCount = config("scattercount")
}
object SelectVariants {
def apply(root: Configurable, input: File, output: File): SelectVariants = {
val sv = new SelectVariants(root)
sv.variant = input
sv.out = output
return sv
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.gatk
import nl.lumc.sasc.biopet.core.config.Configurable
class UnifiedGenotyper(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.UnifiedGenotyper with GatkGeneral {
override def afterGraph {
super.afterGraph
genotype_likelihoods_model = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodsCalculationModel.Model.BOTH
if (config.contains("scattercount")) scatterCount = config("scattercount")
if (config.contains("dbsnp")) this.dbsnp = config("dbsnp")
this.sample_ploidy = config("ploidy")
nct = config("threads", default = 1)
memoryLimit = Option(nct.getOrElse(1) * 2)
if (config.contains("allSitePLs")) this.allSitePLs = config("allSitePLs")
if (config.contains("output_mode")) {
import org.broadinstitute.gatk.tools.walkers.genotyper.OutputMode._
config("output_mode").asString match {
case "EMIT_ALL_CONFIDENT_SITES" => output_mode = EMIT_ALL_CONFIDENT_SITES
case "EMIT_ALL_SITES" => output_mode = EMIT_ALL_SITES
case "EMIT_VARIANTS_ONLY" => output_mode = EMIT_VARIANTS_ONLY
case e => logger.warn("output mode '" + e + "' does not exist")
}
}
if (config("inputtype", default = "dna").asString == "rna") {
stand_call_conf = config("stand_call_conf", default = 5)
stand_emit_conf = config("stand_emit_conf", default = 0)
} else {
stand_call_conf = config("stand_call_conf", default = 5)
stand_emit_conf = config("stand_emit_conf", default = 0)
}
}
}
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
class VariantAnnotator(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.VariantAnnotator with GatkGeneral {
if (config.contains("scattercount")) scatterCount = config("scattercount")
dbsnp = config("dbsnp")
}
object VariantAnnotator {
def apply(root: Configurable, input: File, bamFiles: List[File], output: File): VariantAnnotator = {
val va = new VariantAnnotator(root)
va.variant = input
va.input_file = bamFiles
va.out = output
return va
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
class VariantEval(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.VariantEval with GatkGeneral {
override def afterGraph {
super.afterGraph
}
}
object VariantEval {
def apply(root: Configurable, sample: File, compareWith: File,
output: File): VariantEval = {
val vareval = new VariantEval(root)
vareval.eval = Seq(sample)
vareval.comp = Seq(compareWith)
vareval.out = output
vareval.afterGraph
return vareval
}
def apply(root: Configurable, sample: File, compareWith: File,
output: File, ST: Seq[String], EV: Seq[String]): VariantEval = {
val vareval = new VariantEval(root)
vareval.eval = Seq(sample)
vareval.comp = Seq(compareWith)
vareval.out = output
vareval.noST = true
vareval.ST = ST
vareval.noEV = true
vareval.EV = EV
vareval.afterGraph
return vareval
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
class VariantRecalibrator(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.VariantRecalibrator with GatkGeneral {
nt = Option(getThreads(4))
memoryLimit = Option(nt.getOrElse(1) * 2)
if (config.contains("dbsnp")) resource :+= new TaggedFile(config("dbsnp").asString, "known=true,training=false,truth=false,prior=2.0")
an = config("annotation", default = List("QD", "DP", "FS", "ReadPosRankSum", "MQRankSum")).asStringList
minNumBadVariants = config("minnumbadvariants")
maxGaussians = config("maxgaussians")
}
object VariantRecalibrator {
def apply(root: Configurable, input: File, recal_file: File, tranches_file: File, indel: Boolean = false): VariantRecalibrator = {
val vr = new VariantRecalibrator(root) {
override lazy val configName = "variantrecalibrator"
override def configPath: List[String] = (if (indel) "indel" else "snp") :: super.configPath
if (indel) {
mode = org.broadinstitute.gatk.tools.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL
defaults ++= Map("ts_filter_level" -> 99.0)
if (config.contains("mills")) resource :+= new TaggedFile(config("mills").asString, "known=false,training=true,truth=true,prior=12.0")
} else {
mode = org.broadinstitute.gatk.tools.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.SNP
defaults ++= Map("ts_filter_level" -> 99.5)
if (config.contains("hapmap")) resource +:= new TaggedFile(config("hapmap").asString, "known=false,training=true,truth=true,prior=15.0")
if (config.contains("omni")) resource +:= new TaggedFile(config("omni").asString, "known=false,training=true,truth=true,prior=12.0")
if (config.contains("1000G")) resource +:= new TaggedFile(config("1000G").asString, "known=false,training=true,truth=false,prior=10.0")
}
}
vr.input :+= input
vr.recal_file = recal_file
vr.tranches_file = tranches_file
return vr
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.pipelines.basty
import java.io.File
import nl.lumc.sasc.biopet.core.MultiSampleQScript
import nl.lumc.sasc.biopet.core.PipelineCommand
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.extensions.Cat
import nl.lumc.sasc.biopet.extensions.Raxml
import nl.lumc.sasc.biopet.pipelines.gatk.GatkPipeline
import nl.lumc.sasc.biopet.tools.BastyGenerateFasta
import org.broadinstitute.gatk.queue.QScript
class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
def this() = this(null)
class LibraryOutput extends AbstractLibraryOutput {
}
case class FastaOutput(variants: File, consensus: File, consensusVariants: File)
class SampleOutput extends AbstractSampleOutput {
var output: FastaOutput = _
var outputSnps: FastaOutput = _
}
defaults ++= Map("ploidy" -> 1, "use_haplotypecaller" -> false, "use_unifiedgenotyper" -> true, "joint_variantcalling" -> true)
var gatkPipeline: GatkPipeline = new GatkPipeline(this)
gatkPipeline.jointVariantcalling = true
def init() {
gatkPipeline.outputDir = outputDir
gatkPipeline.init
}
def biopetScript() {
gatkPipeline.biopetScript
addAll(gatkPipeline.functions)
val refVariants = addGenerateFasta(null, outputDir + "reference/", outputName = "reference")
val refVariantSnps = addGenerateFasta(null, outputDir + "reference/", outputName = "reference", snpsOnly = true)
runSamplesJobs()
val catVariants = Cat(this, refVariants.variants :: samplesOutput.map(_._2.output.variants).toList, outputDir + "fastas/variant.fasta")
add(catVariants)
val catVariantsSnps = Cat(this, refVariantSnps.variants :: samplesOutput.map(_._2.outputSnps.variants).toList, outputDir + "fastas/variant.snps_only.fasta")
add(catVariantsSnps)
val catConsensus = Cat(this, refVariants.consensus :: samplesOutput.map(_._2.output.consensus).toList, outputDir + "fastas/consensus.fasta")
add(catConsensus)
val catConsensusSnps = Cat(this, refVariantSnps.consensus :: samplesOutput.map(_._2.outputSnps.consensus).toList, outputDir + "fastas/consensus.snps_only.fasta")
add(catConsensusSnps)
val catConsensusVariants = Cat(this, refVariants.consensusVariants :: samplesOutput.map(_._2.output.consensusVariants).toList, outputDir + "fastas/consensus.variant.fasta")
add(catConsensusVariants)
val catConsensusVariantsSnps = Cat(this, refVariantSnps.consensusVariants :: samplesOutput.map(_._2.outputSnps.consensusVariants).toList, outputDir + "fastas/consensus.variant.snps_only.fasta")
add(catConsensusVariantsSnps)
val seed: Int = config("seed", default = 12345)
def addRaxml(input: File, outputDir: String, outputName: String) {
val raxmlMl = new Raxml(this)
raxmlMl.input = input
raxmlMl.m = config("raxml_ml_model", default = "GTRGAMMAX")
raxmlMl.p = seed
raxmlMl.n = outputName + "_ml"
raxmlMl.w = outputDir
raxmlMl.N = config("ml_runs", default = 20, submodule = "raxml")
add(raxmlMl)
val r = new scala.util.Random(seed)
val numBoot = config("boot_runs", default = 100, submodule = "raxml").asInt
val bootList = for (t <- 0 until numBoot) yield {
val raxmlBoot = new Raxml(this)
raxmlBoot.threads = 1
raxmlBoot.input = input
raxmlBoot.m = config("raxml_ml_model", default = "GTRGAMMAX")
raxmlBoot.p = seed
raxmlBoot.b = math.abs(r.nextInt)
raxmlBoot.w = outputDir
raxmlBoot.N = 1
raxmlBoot.n = outputName + "_boot_" + t
add(raxmlBoot)
raxmlBoot.getBootstrapFile
}
val cat = Cat(this, bootList.toList, outputDir + "/boot_list")
add(cat)
val raxmlBi = new Raxml(this)
raxmlBi.input = input
raxmlBi.t = raxmlMl.getBestTreeFile
raxmlBi.z = cat.output
raxmlBi.m = config("raxml_ml_model", default = "GTRGAMMAX")
raxmlBi.p = seed
raxmlBi.f = "b"
raxmlBi.n = outputName + "_bi"
raxmlBi.w = outputDir
add(raxmlBi)
}
addRaxml(catVariantsSnps.output, outputDir + "raxml", "snps")
}
// Called for each sample
def runSingleSampleJobs(sampleConfig: Map[String, Any]): SampleOutput = {
val sampleOutput = new SampleOutput
val sampleID: String = sampleConfig("ID").toString
val sampleDir = globalSampleDir + sampleID + "/"
sampleOutput.libraries = runLibraryJobs(sampleConfig)
sampleOutput.output = addGenerateFasta(sampleID, sampleDir)
sampleOutput.outputSnps = addGenerateFasta(sampleID, sampleDir, snpsOnly = true)
return sampleOutput
}
// Called for each run from a sample
def runSingleLibraryJobs(runConfig: Map[String, Any], sampleConfig: Map[String, Any]): LibraryOutput = {
val libraryOutput = new LibraryOutput
val runID: String = runConfig("ID").toString
val sampleID: String = sampleConfig("ID").toString
val runDir: String = globalSampleDir + sampleID + "/run_" + runID + "/"
return libraryOutput
}
def addGenerateFasta(sampleName: String, outputDir: String, outputName: String = null,
snpsOnly: Boolean = false): FastaOutput = {
val bastyGenerateFasta = new BastyGenerateFasta(this)
bastyGenerateFasta.outputName = if (outputName != null) outputName else sampleName
bastyGenerateFasta.inputVcf = gatkPipeline.multisampleVariantcalling.scriptOutput.finalVcfFile
if (gatkPipeline.samplesOutput.contains(sampleName)) {
bastyGenerateFasta.bamFile = gatkPipeline.samplesOutput(sampleName).variantcalling.bamFiles.head
}
bastyGenerateFasta.outputVariants = outputDir + bastyGenerateFasta.outputName + ".variants" + (if (snpsOnly) ".snps_only" else "") + ".fasta"
bastyGenerateFasta.outputConsensus = outputDir + bastyGenerateFasta.outputName + ".consensus" + (if (snpsOnly) ".snps_only" else "") + ".fasta"
bastyGenerateFasta.outputConsensusVariants = outputDir + bastyGenerateFasta.outputName + ".consensus_variants" + (if (snpsOnly) ".snps_only" else "") + ".fasta"
bastyGenerateFasta.sampleName = sampleName
bastyGenerateFasta.snpsOnly = snpsOnly
add(bastyGenerateFasta)
return FastaOutput(bastyGenerateFasta.outputVariants, bastyGenerateFasta.outputConsensus, bastyGenerateFasta.outputConsensusVariants)
}
}
object Basty extends PipelineCommand
package nl.lumc.sasc.biopet.pipelines.gatk
import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand }
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.queue.QScript
import org.broadinstitute.gatk.utils.commandline.{ Input, Argument }
import scala.util.Random
class GatkBenchmarkGenotyping(val root: Configurable) extends QScript with BiopetQScript {
def this() = this(null)
@Input(doc = "Sample gvcf file")
var sampleGvcf: File = _
@Argument(doc = "SampleName", required = true)
var sampleName: String = _
@Input(doc = "Gvcf files", shortName = "I", required = false)
var gvcfFiles: List[File] = Nil
var reference: File = config("reference")
@Argument(doc = "Dbsnp", shortName = "dbsnp", required = false)
var dbsnp: File = config("dbsnp")
def init() {
if (config.contains("gvcffiles")) for (file <- config("gvcffiles").asList) {
gvcfFiles ::= file.toString
}
if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module")
else if (!outputDir.endsWith("/")) outputDir += "/"
}
def biopetScript() {
var todoGvcfs = gvcfFiles
var gvcfPool: List[File] = Nil
addGenotypingPipeline(gvcfPool)
while (todoGvcfs.size > 0) {
val index = Random.nextInt(todoGvcfs.size)
gvcfPool ::= todoGvcfs(index)
addGenotypingPipeline(gvcfPool)
todoGvcfs = todoGvcfs.filter(b => b != todoGvcfs(index))
}
}
def addGenotypingPipeline(gvcfPool: List[File]) {
val gatkGenotyping = new GatkGenotyping(this)
gatkGenotyping.inputGvcfs = sampleGvcf :: gvcfPool
gatkGenotyping.samples :+= sampleName
gatkGenotyping.outputDir = outputDir + "samples_" + gvcfPool.size + "/"
gatkGenotyping.init
gatkGenotyping.biopetScript
addAll(gatkGenotyping.functions)
}
}
object GatkBenchmarkGenotyping extends PipelineCommand
package nl.lumc.sasc.biopet.pipelines.gatk
import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand }
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.extensions.gatk.{ GenotypeGVCFs, SelectVariants }
import org.broadinstitute.gatk.queue.QScript
import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument }
class GatkGenotyping(val root: Configurable) extends QScript with BiopetQScript {
def this() = this(null)
@Input(doc = "Gvcf files", shortName = "I")
var inputGvcfs: List[File] = Nil
@Argument(doc = "Reference", shortName = "R", required = false)
var reference: File = config("reference")
@Argument(doc = "Dbsnp", shortName = "dbsnp", required = false)
var dbsnp: File = config("dbsnp")
@Argument(doc = "OutputName", required = false)
var outputName: String = "genotype"
@Output(doc = "OutputFile", shortName = "O", required = false)
var outputFile: File = _
@Argument(doc = "Samples", shortName = "sample", required = false)
var samples: List[String] = Nil
def init() {
if (outputFile == null) outputFile = outputDir + outputName + ".vcf.gz"
if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module")
else if (!outputDir.endsWith("/")) outputDir += "/"
}
def biopetScript() {
addGenotypeGVCFs(inputGvcfs, outputFile)
if (!samples.isEmpty) {
if (samples.size > 1) addSelectVariants(outputFile, samples, outputDir + "samples/", "all")
for (sample <- samples) addSelectVariants(outputFile, List(sample), outputDir + "samples/", sample)
}
}
def addGenotypeGVCFs(gvcfFiles: List[File], outputFile: File): File = {
val genotypeGVCFs = GenotypeGVCFs(this, gvcfFiles, outputFile)
add(genotypeGVCFs)
return genotypeGVCFs.out
}
def addSelectVariants(inputFile: File, samples: List[String], outputDir: String, name: String) {
val selectVariants = SelectVariants(this, inputFile, outputDir + name + ".vcf.gz")
selectVariants.excludeNonVariants = true
for (sample <- samples) selectVariants.sample_name :+= sample
add(selectVariants)
}
}
object GatkGenotyping extends PipelineCommand
package nl.lumc.sasc.biopet.pipelines.gatk
import nl.lumc.sasc.biopet.core.MultiSampleQScript
import nl.lumc.sasc.biopet.core.PipelineCommand
import nl.lumc.sasc.biopet.core.config.Configurable
import htsjdk.samtools.SamReaderFactory
import scala.collection.JavaConversions._
import java.io.File
import nl.lumc.sasc.biopet.extensions.gatk.{ CombineVariants, CombineGVCFs }
import nl.lumc.sasc.biopet.extensions.picard.AddOrReplaceReadGroups
import nl.lumc.sasc.biopet.extensions.picard.SamToFastq
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
import nl.lumc.sasc.biopet.pipelines.mapping.Mapping
import org.broadinstitute.gatk.queue.QScript
import org.broadinstitute.gatk.utils.commandline.{ Argument }
class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScript {
def this() = this(null)
@Argument(doc = "Only Sample", shortName = "sample", required = false)
val onlySample: List[String] = Nil
@Argument(doc = "Skip Genotyping step", shortName = "skipgenotyping", required = false)
var skipGenotyping: Boolean = false
@Argument(doc = "Merge gvcfs", shortName = "mergegvcfs", required = false)
var mergeGvcfs: Boolean = false
@Argument(doc = "Joint variantcalling", shortName = "jointVariantCalling", required = false)
var jointVariantcalling: Boolean = config("joint_variantcalling", default = false)
@Argument(doc = "Joint genotyping", shortName = "jointGenotyping", required = false)
var jointGenotyping: Boolean = config("joint_genotyping", default = false)
var singleSampleCalling = config("single_sample_calling", default = true)
var reference: File = config("reference", required = true)
var dbsnp: File = config("dbsnp")
var gvcfFiles: List[File] = Nil
var finalBamFiles: List[File] = Nil
var useAllelesOption: Boolean = config("use_alleles_option", default = false)
class LibraryOutput extends AbstractLibraryOutput {
var mappedBamFile: File = _
var variantcalling: GatkVariantcalling.ScriptOutput = _
}
class SampleOutput extends AbstractSampleOutput {
var variantcalling: GatkVariantcalling.ScriptOutput = _
}
def init() {
if (config.contains("target_bed")) {
defaults ++= Map("gatk" -> Map(("intervals" -> config("target_bed").asStringList)))
}
if (config.contains("gvcfFiles"))
for (file <- config("gvcfFiles").asList)
gvcfFiles :+= file.toString
if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module")
else if (!outputDir.endsWith("/")) outputDir += "/"
}
val multisampleVariantcalling = new GatkVariantcalling(this) {
override protected lazy val configName = "gatkvariantcalling"
override def configPath: List[String] = "multisample" :: super.configPath
}
def biopetScript() {
if (onlySample.isEmpty) {
runSamplesJobs
//SampleWide jobs
if (mergeGvcfs && gvcfFiles.size > 0) {
val newFile = outputDir + "merged.gvcf.vcf.gz"
add(CombineGVCFs(this, gvcfFiles, newFile))
gvcfFiles = List(newFile)
}
if (!skipGenotyping && gvcfFiles.size > 0) {
if (jointGenotyping) {
val gatkGenotyping = new GatkGenotyping(this)
gatkGenotyping.inputGvcfs = gvcfFiles
gatkGenotyping.outputDir = outputDir + "genotyping/"
gatkGenotyping.init
gatkGenotyping.biopetScript
addAll(gatkGenotyping.functions)
var vcfFile = gatkGenotyping.outputFile
}
} else logger.warn("No gVCFs to genotype")
if (jointVariantcalling) {
val allBamfiles = for (
(sampleID, sampleOutput) <- samplesOutput;
file <- sampleOutput.variantcalling.bamFiles
) yield file
val allRawVcfFiles = for ((sampleID, sampleOutput) <- samplesOutput) yield sampleOutput.variantcalling.rawFilterVcfFile
val gatkVariantcalling = new GatkVariantcalling(this) {
override protected lazy val configName = "gatkvariantcalling"
override def configPath: List[String] = "multisample" :: super.configPath
}
if (gatkVariantcalling.useMpileup) {
val cvRaw = CombineVariants(this, allRawVcfFiles.toList, outputDir + "variantcalling/multisample.raw.vcf.gz")
add(cvRaw)
gatkVariantcalling.rawVcfInput = cvRaw.out
}
multisampleVariantcalling.preProcesBams = false
multisampleVariantcalling.doublePreProces = false
multisampleVariantcalling.inputBams = allBamfiles.toList
multisampleVariantcalling.outputDir = outputDir + "variantcalling"
multisampleVariantcalling.outputName = "multisample"
multisampleVariantcalling.init
multisampleVariantcalling.biopetScript
addAll(multisampleVariantcalling.functions)
if (config("inputtype", default = "dna").asString != "rna" && config("recalibration", default = false).asBoolean) {
val recalibration = new GatkVariantRecalibration(this)
recalibration.inputVcf = multisampleVariantcalling.scriptOutput.finalVcfFile
recalibration.bamFiles = finalBamFiles
recalibration.outputDir = outputDir + "recalibration/"
recalibration.init
recalibration.biopetScript
}
}
} else for (sample <- onlySample) runSingleSampleJobs(sample)
}
// Called for each sample
def runSingleSampleJobs(sampleConfig: Map[String, Any]): SampleOutput = {
val sampleOutput = new SampleOutput
var libraryBamfiles: List[File] = List()
val sampleID: String = sampleConfig("ID").toString
sampleOutput.libraries = runLibraryJobs(sampleConfig)
val sampleDir = globalSampleDir + sampleID
for ((libraryID, libraryOutput) <- sampleOutput.libraries) {
libraryBamfiles ++= libraryOutput.variantcalling.bamFiles
}
if (libraryBamfiles.size > 0) {
finalBamFiles ++= libraryBamfiles
val gatkVariantcalling = new GatkVariantcalling(this)
gatkVariantcalling.inputBams = libraryBamfiles
gatkVariantcalling.outputDir = sampleDir + "/variantcalling/"
gatkVariantcalling.preProcesBams = false
if (!singleSampleCalling) {
gatkVariantcalling.useHaplotypecaller = false
gatkVariantcalling.useUnifiedGenotyper = false
}
gatkVariantcalling.sampleID = sampleID
gatkVariantcalling.init
gatkVariantcalling.biopetScript
addAll(gatkVariantcalling.functions)
sampleOutput.variantcalling = gatkVariantcalling.scriptOutput
gvcfFiles :+= gatkVariantcalling.scriptOutput.gvcfFile
} else logger.warn("No bamfiles for variant calling for sample: " + sampleID)
return sampleOutput
}
// Called for each run from a sample
def runSingleLibraryJobs(runConfig: Map[String, Any], sampleConfig: Map[String, Any]): LibraryOutput = {
val libraryOutput = new LibraryOutput
val runID: String = runConfig("ID").toString
val sampleID: String = sampleConfig("ID").toString
val runDir: String = globalSampleDir + sampleID + "/run_" + runID + "/"
var inputType = ""
if (runConfig.contains("inputtype")) inputType = runConfig("inputtype").toString
else inputType = config("inputtype", default = "dna").toString
if (runConfig.contains("R1")) {
val mapping = Mapping.loadFromLibraryConfig(this, runConfig, sampleConfig, runDir)
addAll(mapping.functions) // Add functions of mapping to curent function pool
libraryOutput.mappedBamFile = mapping.outputFiles("finalBamFile")
} else if (runConfig.contains("bam")) {
var bamFile = new File(runConfig("bam").toString)
if (!bamFile.exists) throw new IllegalStateException("Bam in config does not exist, file: " + bamFile)
if (config("bam_to_fastq", default = false).asBoolean) {
val samToFastq = SamToFastq(this, bamFile, runDir + sampleID + "-" + runID + ".R1.fastq",
runDir + sampleID + "-" + runID + ".R2.fastq")
add(samToFastq, isIntermediate = true)
val mapping = Mapping.loadFromLibraryConfig(this, runConfig, sampleConfig, runDir, startJobs = false)
mapping.input_R1 = samToFastq.fastqR1
mapping.input_R2 = samToFastq.fastqR2
mapping.init
mapping.biopetScript
addAll(mapping.functions) // Add functions of mapping to curent function pool
libraryOutput.mappedBamFile = mapping.outputFiles("finalBamFile")
} else {
var readGroupOke = true
val inputSam = SamReaderFactory.makeDefault.open(bamFile)
val header = inputSam.getFileHeader.getReadGroups
for (readGroup <- inputSam.getFileHeader.getReadGroups) {
if (readGroup.getSample != sampleID) logger.warn("Sample ID readgroup in bam file is not the same")
if (readGroup.getLibrary != runID) logger.warn("Library ID readgroup in bam file is not the same")
if (readGroup.getSample != sampleID || readGroup.getLibrary != runID) readGroupOke = false
}
inputSam.close
if (!readGroupOke) {
if (config("correct_readgroups", default = false)) {
logger.info("Correcting readgroups, file:" + bamFile)
val aorrg = AddOrReplaceReadGroups(this, bamFile, new File(runDir + sampleID + "-" + runID + ".bam"))
aorrg.RGID = sampleID + "-" + runID
aorrg.RGLB = runID
aorrg.RGSM = sampleID
if (runConfig.contains("PL")) aorrg.RGPL = runConfig("PL").toString
else aorrg.RGPL = "illumina"
if (runConfig.contains("PU")) aorrg.RGPU = runConfig("PU").toString
else aorrg.RGPU = "na"
if (runConfig.contains("CN")) aorrg.RGCN = runConfig("CN").toString
add(aorrg, isIntermediate = true)
bamFile = aorrg.output
} else throw new IllegalStateException("Readgroup sample and/or library of input bamfile is not correct, file: " + bamFile +
"\nPossible to set 'correct_readgroups' to true on config to automatic fix this")
}
addAll(BamMetrics(this, bamFile, runDir + "metrics/").functions)
libraryOutput.mappedBamFile = bamFile
}
} else logger.error("Sample: " + sampleID + ": No R1 found for run: " + runConfig)
val gatkVariantcalling = new GatkVariantcalling(this)
gatkVariantcalling.inputBams = List(libraryOutput.mappedBamFile)
gatkVariantcalling.outputDir = runDir
gatkVariantcalling.variantcalling = config("library_variantcalling", default = false)
gatkVariantcalling.preProcesBams = true
gatkVariantcalling.sampleID = sampleID
gatkVariantcalling.init
gatkVariantcalling.biopetScript
addAll(gatkVariantcalling.functions)
libraryOutput.variantcalling = gatkVariantcalling.scriptOutput
return libraryOutput
}
}
object GatkPipeline extends PipelineCommand
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment