Commit 5a474415 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Remove protected source file from public artifacts

parent def28b47
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
class AnalyzeCovariates(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.AnalyzeCovariates with GatkGeneral {
}
object AnalyzeCovariates {
def apply(root: Configurable, before: File, after: File, plots: File): AnalyzeCovariates = {
val ac = new AnalyzeCovariates(root)
ac.before = before
ac.after = after
ac.plots = plots
return ac
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
class ApplyRecalibration(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.ApplyRecalibration with GatkGeneral {
override def afterGraph {
super.afterGraph
if (config.contains("scattercount")) scatterCount = config("scattercount")
nt = Option(getThreads(3))
memoryLimit = Option(nt.getOrElse(1) * 2)
ts_filter_level = config("ts_filter_level")
}
}
object ApplyRecalibration {
def apply(root: Configurable, input: File, output: File, recal_file: File, tranches_file: File, indel: Boolean = false): ApplyRecalibration = {
val ar = if (indel) new ApplyRecalibration(root) {
mode = org.broadinstitute.gatk.tools.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL
defaults ++= Map("ts_filter_level" -> 99.0)
}
else new ApplyRecalibration(root) {
mode = org.broadinstitute.gatk.tools.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.SNP
defaults ++= Map("ts_filter_level" -> 99.5)
}
ar.input :+= input
ar.recal_file = recal_file
ar.tranches_file = tranches_file
ar.out = output
return ar
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
class BaseRecalibrator(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.BaseRecalibrator with GatkGeneral {
memoryLimit = Option(4)
override val defaultVmem = "8G"
if (config.contains("scattercount")) scatterCount = config("scattercount")
if (config.contains("dbsnp")) knownSites :+= new File(config("dbsnp").asString)
if (config.contains("known_sites")) knownSites :+= new File(config("known_sites").asString)
}
object BaseRecalibrator {
def apply(root: Configurable, input: File, output: File): BaseRecalibrator = {
val br = new BaseRecalibrator(root)
br.input_file :+= input
br.out = output
br.afterGraph
return br
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
class CombineGVCFs(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.CombineGVCFs with GatkGeneral {
if (config.contains("scattercount")) scatterCount = config("scattercount")
}
object CombineGVCFs {
def apply(root: Configurable, input: List[File], output: File): CombineGVCFs = {
val cg = new CombineGVCFs(root)
cg.variant = input
cg.o = output
return cg
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
class CombineVariants(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.CombineVariants with GatkGeneral {
if (config.contains("scattercount")) scatterCount = config("scattercount")
}
object CombineVariants {
def apply(root: Configurable, input: List[File], output: File): CombineVariants = {
val cv = new CombineVariants(root)
cv.variant = input
cv.out = output
return cv
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.gatk
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import org.broadinstitute.gatk.queue.extensions.gatk.CommandLineGATK
trait GatkGeneral extends CommandLineGATK with BiopetJavaCommandLineFunction {
memoryLimit = Option(3)
if (config.contains("gatk_jar")) jarFile = config("gatk_jar")
override val defaultVmem = "7G"
if (config.contains("intervals", submodule = "gatk")) intervals = config("intervals", submodule = "gatk").asFileList
if (config.contains("exclude_intervals", submodule = "gatk")) excludeIntervals = config("exclude_intervals", submodule = "gatk").asFileList
reference_sequence = config("reference", submodule = "gatk")
gatk_key = config("gatk_key", submodule = "gatk")
if (config.contains("pedigree", submodule = "gatk")) pedigree = config("pedigree", submodule = "gatk").asFileList
}
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
class GenotypeGVCFs(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.GenotypeGVCFs with GatkGeneral {
annotation ++= config("annotation", default = Seq("FisherStrand", "QualByDepth", "ChromosomeCounts")).asStringList
if (config.contains("dbsnp")) dbsnp = config("dbsnp")
if (config.contains("scattercount", "genotypegvcfs")) scatterCount = config("scattercount")
if (config("inputtype", default = "dna").asString == "rna") {
stand_call_conf = config("stand_call_conf", default = 20)
stand_emit_conf = config("stand_emit_conf", default = 0)
} else {
stand_call_conf = config("stand_call_conf", default = 30)
stand_emit_conf = config("stand_emit_conf", default = 0)
}
}
object GenotypeGVCFs {
def apply(root: Configurable, gvcfFiles: List[File], output: File): GenotypeGVCFs = {
val gg = new GenotypeGVCFs(root)
gg.variant = gvcfFiles
gg.out = output
return gg
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.gatk
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType
class HaplotypeCaller(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.HaplotypeCaller with GatkGeneral {
override def afterGraph {
super.afterGraph
min_mapping_quality_score = config("minMappingQualityScore", default = 20)
if (config.contains("scattercount")) scatterCount = config("scattercount")
if (config.contains("dbsnp")) this.dbsnp = config("dbsnp")
this.sample_ploidy = config("ploidy")
nct = config("threads", default = 1)
bamOutput = config("bamOutput")
memoryLimit = Option(nct.getOrElse(1) * 2)
if (config.contains("allSitePLs")) this.allSitePLs = config("allSitePLs")
if (config.contains("output_mode")) {
import org.broadinstitute.gatk.tools.walkers.genotyper.OutputMode._
config("output_mode").asString match {
case "EMIT_ALL_CONFIDENT_SITES" => output_mode = EMIT_ALL_CONFIDENT_SITES
case "EMIT_ALL_SITES" => output_mode = EMIT_ALL_SITES
case "EMIT_VARIANTS_ONLY" => output_mode = EMIT_VARIANTS_ONLY
case e => logger.warn("output mode '" + e + "' does not exist")
}
}
if (config("inputtype", default = "dna").asString == "rna") {
dontUseSoftClippedBases = config("dontusesoftclippedbases", default = true)
stand_call_conf = config("stand_call_conf", default = 5)
stand_emit_conf = config("stand_emit_conf", default = 0)
} else {
dontUseSoftClippedBases = config("dontusesoftclippedbases", default = false)
stand_call_conf = config("stand_call_conf", default = 5)
stand_emit_conf = config("stand_emit_conf", default = 0)
}
if (bamOutput != null && nct.getOrElse(1) > 1) {
nct = Option(1)
logger.warn("BamOutput is on, nct/threads is forced to set on 1, this option is only for debug")
}
}
def useGvcf() {
emitRefConfidence = org.broadinstitute.gatk.tools.walkers.haplotypecaller.ReferenceConfidenceMode.GVCF
variant_index_type = GATKVCFIndexType.LINEAR
variant_index_parameter = config("variant_index_parameter", default = 128000)
}
}
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
class IndelRealigner(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.IndelRealigner with GatkGeneral {
if (config.contains("scattercount")) scatterCount = config("scattercount")
}
object IndelRealigner {
def apply(root: Configurable, input: File, targetIntervals: File, outputDir: String): IndelRealigner = {
val ir = new IndelRealigner(root)
ir.input_file :+= input
ir.targetIntervals = targetIntervals
ir.out = new File(outputDir, input.getName.stripSuffix(".bam") + ".realign.bam")
return ir
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
class PrintReads(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.PrintReads with GatkGeneral {
memoryLimit = Option(4)
override val defaultVmem = "8G"
if (config.contains("scattercount")) scatterCount = config("scattercount")
}
object PrintReads {
def apply(root: Configurable, input: File, output: File): PrintReads = {
val br = new PrintReads(root)
br.input_file :+= input
br.out = output
return br
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
class RealignerTargetCreator(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.RealignerTargetCreator with GatkGeneral {
override val defaultVmem = "6G"
memoryLimit = Some(2.5)
if (config.contains("scattercount")) scatterCount = config("scattercount")
if (config.contains("known")) known ++= config("known").asFileList
}
object RealignerTargetCreator {
def apply(root: Configurable, input: File, outputDir: String): RealignerTargetCreator = {
val re = new RealignerTargetCreator(root)
re.input_file :+= input
re.out = new File(outputDir, input.getName.stripSuffix(".bam") + ".realign.intervals")
return re
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
class SelectVariants(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.SelectVariants with GatkGeneral {
if (config.contains("scattercount")) scatterCount = config("scattercount")
}
object SelectVariants {
def apply(root: Configurable, input: File, output: File): SelectVariants = {
val sv = new SelectVariants(root)
sv.variant = input
sv.out = output
return sv
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.gatk
import nl.lumc.sasc.biopet.core.config.Configurable
class UnifiedGenotyper(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.UnifiedGenotyper with GatkGeneral {
override def afterGraph {
super.afterGraph
genotype_likelihoods_model = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodsCalculationModel.Model.BOTH
if (config.contains("scattercount")) scatterCount = config("scattercount")
if (config.contains("dbsnp")) this.dbsnp = config("dbsnp")
this.sample_ploidy = config("ploidy")
nct = config("threads", default = 1)
memoryLimit = Option(nct.getOrElse(1) * 2)
if (config.contains("allSitePLs")) this.allSitePLs = config("allSitePLs")
if (config.contains("output_mode")) {
import org.broadinstitute.gatk.tools.walkers.genotyper.OutputMode._
config("output_mode").asString match {
case "EMIT_ALL_CONFIDENT_SITES" => output_mode = EMIT_ALL_CONFIDENT_SITES
case "EMIT_ALL_SITES" => output_mode = EMIT_ALL_SITES
case "EMIT_VARIANTS_ONLY" => output_mode = EMIT_VARIANTS_ONLY
case e => logger.warn("output mode '" + e + "' does not exist")
}
}
if (config("inputtype", default = "dna").asString == "rna") {
stand_call_conf = config("stand_call_conf", default = 5)
stand_emit_conf = config("stand_emit_conf", default = 0)
} else {
stand_call_conf = config("stand_call_conf", default = 5)
stand_emit_conf = config("stand_emit_conf", default = 0)
}
}
}
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
class VariantAnnotator(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.VariantAnnotator with GatkGeneral {
if (config.contains("scattercount")) scatterCount = config("scattercount")
dbsnp = config("dbsnp")
}
object VariantAnnotator {
def apply(root: Configurable, input: File, bamFiles: List[File], output: File): VariantAnnotator = {
val va = new VariantAnnotator(root)
va.variant = input
va.input_file = bamFiles
va.out = output
return va
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
class VariantEval(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.VariantEval with GatkGeneral {
override def afterGraph {
super.afterGraph
}
}
object VariantEval {
def apply(root: Configurable, sample: File, compareWith: File,
output: File): VariantEval = {
val vareval = new VariantEval(root)
vareval.eval = Seq(sample)
vareval.comp = Seq(compareWith)
vareval.out = output
vareval.afterGraph
return vareval
}
def apply(root: Configurable, sample: File, compareWith: File,
output: File, ST: Seq[String], EV: Seq[String]): VariantEval = {
val vareval = new VariantEval(root)
vareval.eval = Seq(sample)
vareval.comp = Seq(compareWith)
vareval.out = output
vareval.noST = true
vareval.ST = ST
vareval.noEV = true
vareval.EV = EV
vareval.afterGraph
return vareval
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile
class VariantRecalibrator(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.VariantRecalibrator with GatkGeneral {
nt = Option(getThreads(4))
memoryLimit = Option(nt.getOrElse(1) * 2)
if (config.contains("dbsnp")) resource :+= new TaggedFile(config("dbsnp").asString, "known=true,training=false,truth=false,prior=2.0")
an = config("annotation", default = List("QD", "DP", "FS", "ReadPosRankSum", "MQRankSum")).asStringList
minNumBadVariants = config("minnumbadvariants")
maxGaussians = config("maxgaussians")
}
object VariantRecalibrator {
def apply(root: Configurable, input: File, recal_file: File, tranches_file: File, indel: Boolean = false): VariantRecalibrator = {
val vr = new VariantRecalibrator(root) {
override lazy val configName = "variantrecalibrator"
override def configPath: List[String] = (if (indel) "indel" else "snp") :: super.configPath
if (indel) {
mode = org.broadinstitute.gatk.tools.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL
defaults ++= Map("ts_filter_level" -> 99.0)
if (config.contains("mills")) resource :+= new TaggedFile(config("mills").asString, "known=false,training=true,truth=true,prior=12.0")
} else {
mode = org.broadinstitute.gatk.tools.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.SNP
defaults ++= Map("ts_filter_level" -> 99.5)
if (config.contains("hapmap")) resource +:= new TaggedFile(config("hapmap").asString, "known=false,training=true,truth=true,prior=15.0")
if (config.contains("omni")) resource +:= new TaggedFile(config("omni").asString, "known=false,training=true,truth=true,prior=12.0")
if (config.contains("1000G")) resource +:= new TaggedFile(config("1000G").asString, "known=false,training=true,truth=false,prior=10.0")
}
}
vr.input :+= input
vr.recal_file = recal_file
vr.tranches_file = tranches_file
return vr
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.pipelines.basty
import java.io.File
import nl.lumc.sasc.biopet.core.MultiSampleQScript
import nl.lumc.sasc.biopet.core.PipelineCommand
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.extensions.Cat
import nl.lumc.sasc.biopet.extensions.Raxml
import nl.lumc.sasc.biopet.pipelines.gatk.GatkPipeline
import nl.lumc.sasc.biopet.tools.BastyGenerateFasta
import org.broadinstitute.gatk.queue.QScript
class Basty(val root: Configurable) extends QScript with MultiSampleQScript {
def this() = this(null)
class LibraryOutput extends AbstractLibraryOutput {
}
case class FastaOutput(variants: File, consensus: File, consensusVariants: File)
class SampleOutput extends AbstractSampleOutput {
var output: FastaOutput = _
var outputSnps: FastaOutput = _
}
defaults ++= Map("ploidy" -> 1, "use_haplotypecaller" -> false, "use_unifiedgenotyper" -> true, "joint_variantcalling" -> true)
var gatkPipeline: GatkPipeline = new GatkPipeline(this)
gatkPipeline.jointVariantcalling = true
def init() {
gatkPipeline.outputDir = outputDir
gatkPipeline.init
}
def biopetScript() {
gatkPipeline.biopetScript
addAll(gatkPipeline.functions)
val refVariants = addGenerateFasta(null, outputDir + "reference/", outputName = "reference")
val refVariantSnps = addGenerateFasta(null, outputDir + "reference/", outputName = "reference", snpsOnly = true)
runSamplesJobs()
val catVariants = Cat(this, refVariants.variants :: samplesOutput.map(_._2.output.variants).toList, outputDir + "fastas/variant.fasta")
add(catVariants)
val catVariantsSnps = Cat(this, refVariantSnps.variants :: samplesOutput.map(_._2.outputSnps.variants).toList, outputDir + "fastas/variant.snps_only.fasta")
add(catVariantsSnps)
val catConsensus = Cat(this, refVariants.consensus :: samplesOutput.map(_._2.output.consensus).toList, outputDir + "fastas/consensus.fasta")
add(catConsensus)
val catConsensusSnps = Cat(this, refVariantSnps.consensus :: samplesOutput.map(_._2.outputSnps.consensus).toList, outputDir + "fastas/consensus.snps_only.fasta")
add(catConsensusSnps)
val catConsensusVariants = Cat(this, refVariants.consensusVariants :: samplesOutput.map(_._2.output.consensusVariants).toList, outputDir + "fastas/consensus.variant.fasta")
add(catConsensusVariants)
val catConsensusVariantsSnps = Cat(this, refVariantSnps.consensusVariants :: samplesOutput.map(_._2.outputSnps.consensusVariants).toList, outputDir + "fastas/consensus.variant.snps_only.fasta")
add(catConsensusVariantsSnps)
val seed: Int = config("seed", default = 12345)
def addRaxml(input: File, outputDir: String, outputName: String) {
val raxmlMl = new Raxml(this)
raxmlMl.input = input
raxmlMl.m = config("raxml_ml_model", default = "GTRGAMMAX")
raxmlMl.p = seed
raxmlMl.n = outputName + "_ml"
raxmlMl.w = outputDir
raxmlMl.N = config("ml_runs", default = 20, submodule = "raxml")
add(raxmlMl)
val r = new scala.util.Random(seed)
val numBoot = config("boot_runs", default = 100, submodule = "raxml").asInt
val bootList = for (t <- 0 until numBoot) yield {
val raxmlBoot = new Raxml(this)
raxmlBoot.threads = 1
raxmlBoot.input = input
raxmlBoot.m = config("raxml_ml_model", default = "GTRGAMMAX")
raxmlBoot.p = seed
raxmlBoot.b = math.abs(r.nextInt)
raxmlBoot.w = outputDir
raxmlBoot.N = 1
raxmlBoot.n = outputName + "_boot_" + t
add(raxmlBoot)
raxmlBoot.getBootstrapFile
}
val cat = Cat(this, bootList.toList, outputDir + "/boot_list")
add(cat)
val raxmlBi = new Raxml(this)
raxmlBi.input = input
raxmlBi.t = raxmlMl.getBestTreeFile
raxmlBi.z = cat.output
raxmlBi.m = config("raxml_ml_model", default = "GTRGAMMAX")
raxmlBi.p = seed
raxmlBi.f = "b"
raxmlBi.n = outputName + "_bi"
raxmlBi.w = outputDir
add(raxmlBi)
}
addRaxml(catVariantsSnps.output, outputDir + "raxml", "snps")
}
// Called for each sample
def runSingleSampleJobs(<