Skip to content
Snippets Groups Projects
Commit 09190f82 authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Added general trait (BiopetQScript) that now have multisample functions

parent 5d0b4fc9
No related branches found
No related tags found
No related merge requests found
package nl.lumc.sasc.biopet.core
//import org.broadinstitute.sting.queue.QScript
import java.io.File
import org.broadinstitute.sting.queue.util.Logging
trait BiopetQScript extends Logging {
var config: Config = _
var outputFiles:Map[String,File] = Map()
def runSamplesJobs : Map[String,Map[String,File]] = {
var output: Map[String,Map[String,File]] = Map()
if (config.contains("samples")) for ((key,value) <- config.getAsMap("samples")) {
var sample:Config = config.getAsConfig("samples").getAsConfig(key)
if (!sample.contains("ID")) sample.map += ("ID" -> key)
if (sample.getAsString("ID") == key) {
var files:Map[String,List[File]] = runSingleSampleJobs(sample)
} else logger.warn("Key is not the same as ID on value for sample")
} else logger.warn("No Samples found in config")
return output
}
def runSingleSampleJobs(sample:String) : Map[String,List[File]] ={
return runSingleSampleJobs(config.getAsConfig("samples").getAsConfig(sample))
}
def runSingleSampleJobs(sampleConfig:Config) : Map[String,List[File]] = {
logger.debug("Default sample function, function 'runSingleSampleJobs' not defined in pipeline")
runRunsJobs(sampleConfig)
return Map()
}
def runRunsJobs(sampleConfig:Config) : Map[String,Map[String,File]] = {
var output: Map[String,Map[String,File]] = Map()
val sampleID = sampleConfig.getAsString("ID")
if (sampleConfig.contains("runs")) for (key <- sampleConfig.getAsMap("runs").keySet) {
var run = sampleConfig.getAsConfig("runs").getAsConfig(key)
if (!run.contains("ID")) run.map += ("ID" -> key)
if (run.getAsString("ID") == key) {
output += key -> runSingleRunJobs(run, sampleConfig)
} else logger.warn("Key is not the same as ID on value for run of sample: " + sampleID)
} else logger.warn("No runs found in config for sample: " + sampleID)
return output
}
def runSingleRunJobs(runConfig:Config, sampleConfig:Config) : Map[String,File] = {
logger.debug("Default run function, function 'runSingleRunJobs' not defined in pipeline")
return Map()
}
}
...@@ -4,7 +4,7 @@ import scala.util.parsing.json._ ...@@ -4,7 +4,7 @@ import scala.util.parsing.json._
import java.io.File import java.io.File
import org.broadinstitute.sting.queue.util.Logging import org.broadinstitute.sting.queue.util.Logging
class Config(private var map: Map[String,Any]) extends Logging { class Config(var map: Map[String,Any]) extends Logging {
def this() = { def this() = {
this(Map()) this(Map())
logger.info("Init phase of config") logger.info("Init phase of config")
......
...@@ -11,18 +11,17 @@ import scala.util.parsing.json._ ...@@ -11,18 +11,17 @@ import scala.util.parsing.json._
import org.broadinstitute.sting.commandline._ import org.broadinstitute.sting.commandline._
import nl.lumc.sasc.biopet.pipelines.flexiprep.scripts._ import nl.lumc.sasc.biopet.pipelines.flexiprep.scripts._
class Flexiprep(private var globalConfig: Config) extends QScript { class Flexiprep(private var globalConfig: Config) extends QScript with BiopetQScript {
def this() = this(new Config()) def this() = this(new Config())
@Argument(doc="Config Json file",shortName="config", required=false) var configfiles: List[File] = Nil @Argument(doc="Config Json file",shortName="config", required=false) val configfiles: List[File] = Nil
@Input(doc="R1 fastq file (gzipped allowed)", shortName="R1",required=true) var input_R1: File = _ @Input(doc="R1 fastq file (gzipped allowed)", shortName="R1",required=true) var input_R1: File = _
@Input(doc="R2 fastq file (gzipped allowed)", shortName="R2", required=false) var input_R2: File = _ @Input(doc="R2 fastq file (gzipped allowed)", shortName="R2", required=false) var input_R2: File = _
@Argument(doc="Output directory", shortName="outputDir", required=true) var outputDir: String = _ @Argument(doc="Output directory", shortName="outputDir", required=true) var outputDir: String = _
@Argument(doc="Skip Trim fastq files", shortName="skiptrim", required=false) var skipTrim: Boolean = false @Argument(doc="Skip Trim fastq files", shortName="skiptrim", required=false) var skipTrim: Boolean = false
@Argument(doc="Skip Clip fastq files", shortName="skipclip", required=false) var skipClip: Boolean = false @Argument(doc="Skip Clip fastq files", shortName="skipclip", required=false) var skipClip: Boolean = false
var config: Config = _ //var outputFiles:Map[String,File] = Map()
var outputFiles:Map[String,File] = Map()
var paired: Boolean = (input_R2 != null) var paired: Boolean = (input_R2 != null)
def init() { def init() {
...@@ -38,7 +37,6 @@ class Flexiprep(private var globalConfig: Config) extends QScript { ...@@ -38,7 +37,6 @@ class Flexiprep(private var globalConfig: Config) extends QScript {
def script() { def script() {
init() init()
runInitialFastqc() runInitialFastqc()
outputFiles += ("output_R1" -> zcatIfNeeded(input_R1,outputDir)) outputFiles += ("output_R1" -> zcatIfNeeded(input_R1,outputDir))
...@@ -53,7 +51,6 @@ class Flexiprep(private var globalConfig: Config) extends QScript { ...@@ -53,7 +51,6 @@ class Flexiprep(private var globalConfig: Config) extends QScript {
results = runTrimClip(outputFiles("output_R1"), outputDir) results = runTrimClip(outputFiles("output_R1"), outputDir)
outputFiles += ("output_R1" -> results("output_R1")) outputFiles += ("output_R1" -> results("output_R1"))
} }
runFinalFastqc() runFinalFastqc()
} }
......
...@@ -14,10 +14,10 @@ ...@@ -14,10 +14,10 @@
"1000G": "/home/pjvan_thof/pipelines/test/test", "1000G": "/home/pjvan_thof/pipelines/test/test",
"mills": "/home/pjvan_thof/pipelines/test/test" "mills": "/home/pjvan_thof/pipelines/test/test"
}, },
"Samples": { "samples": {
"test": { "test": {
"ID": "test", "ID": "test",
"Runs": { "runs": {
"1" : { "1" : {
"ID": "1", "ID": "1",
"R1" : "/home/pjvan_thof/pipelines/test/test.fastq" "R1" : "/home/pjvan_thof/pipelines/test/test.fastq"
......
...@@ -12,23 +12,19 @@ import org.broadinstitute.sting.queue.function._ ...@@ -12,23 +12,19 @@ import org.broadinstitute.sting.queue.function._
import scala.util.parsing.json._ import scala.util.parsing.json._
import org.broadinstitute.sting.utils.variant._ import org.broadinstitute.sting.utils.variant._
class Gatk(private var globalConfig: Config) extends QScript { class Gatk(private var globalConfig: Config) extends QScript with BiopetQScript {
qscript => qscript =>
def this() = this(new Config())
@Argument(doc="Config Json file",shortName="config") var configfiles: List[File] = Nil @Argument(doc="Config Json file",shortName="config") var configfiles: List[File] = Nil
@Argument(doc="Only Sample",shortName="sample", required=false) var onlySample: String = _ @Argument(doc="Only Sample",shortName="sample", required=false) var onlySample: String = ""
@Argument(doc="Output directory", shortName="outputDir", required=true) var outputDir: String = _ @Argument(doc="Output directory", shortName="outputDir", required=true) var outputDir: String = _
def this() = this(new Config())
var config: Config = _ //var config: Config = _
var referenceFile: File = _ var referenceFile: File = _
var dbsnp: File = _ var dbsnp: File = _
var gvcfFiles: List[File] = Nil var gvcfFiles: List[File] = Nil
trait gatkArguments extends CommandLineGATK {
this.reference_sequence = referenceFile
this.memoryLimit = 2
this.jobResourceRequests :+= "h_vmem=4G"
}
def init() { def init() {
for (file <- configfiles) globalConfig.loadConfigFile(file) for (file <- configfiles) globalConfig.loadConfigFile(file)
config = Config.mergeConfigs(globalConfig.getAsConfig("gatk"), globalConfig) config = Config.mergeConfigs(globalConfig.getAsConfig("gatk"), globalConfig)
...@@ -40,154 +36,46 @@ class Gatk(private var globalConfig: Config) extends QScript { ...@@ -40,154 +36,46 @@ class Gatk(private var globalConfig: Config) extends QScript {
} }
def script() { def script() {
this.init() init()
if (config.contains("Samples")) for ((key,value) <- config.getAsMap("Samples")) { if (onlySample.isEmpty) {
if (onlySample == null || onlySample == key) { runSamplesJobs
var sample:Config = config.getAsConfig("Samples").getAsConfig(key)
if (sample.getAsString("ID") == key) {
var files:Map[String,List[File]] = sampleJobs(sample)
if (files.contains("gvcf")) for (file <- files("gvcf")) gvcfFiles :+= file
} else logger.warn("Key is not the same as ID on value for sample")
} else logger.info("Skipping Sample: " + key)
} else logger.warn("No Samples found in config")
if (onlySample == null) {
//SampleWide jobs //SampleWide jobs
if (gvcfFiles.size > 0) { if (gvcfFiles.size > 0) {
val genotypeGVCFs = new GenotypeGVCFs() with gatkArguments { var vcfFile = addGenotypeGVCFs(gvcfFiles, outputDir + "recalibration/")
val config: Config = Config.mergeConfigs(qscript.config.getAsConfig("genotypegvcfs"), qscript.config) vcfFile = addSnpVariantRecalibrator(vcfFile, outputDir + "recalibration/")
this.variant = gvcfFiles vcfFile = addIndelVariantRecalibrator(vcfFile, outputDir + "recalibration/")
if (config.contains("scattercount")) this.scatterCount = config.getAsInt("scattercount")
this.out = new File(outputDir,"final.vcf")
}
add(genotypeGVCFs)
//Snp recal
val snpVariantRecalibrator = new VariantRecalibrator() with gatkArguments {
val config: Config = Config.mergeConfigs(qscript.config.getAsConfig("variantrecalibrator"), qscript.config)
this.input +:= genotypeGVCFs.out
this.nt = 4
this.memoryLimit = 2 * nt
this.recal_file = swapExt(genotypeGVCFs.out,".vcf",".snp.recal")
this.tranches_file = swapExt(genotypeGVCFs.out,".vcf",".snp.tranches")
this.resource = Seq(new TaggedFile(config.getAsString("hapmap"), "known=false,training=true,truth=true,prior=15.0"),
new TaggedFile(config.getAsString("omni"), "known=false,training=true,truth=true,prior=12.0"),
new TaggedFile(config.getAsString("1000G"), "known=false,training=true,truth=false,prior=10.0"),
new TaggedFile(config.getAsString("dbsnp"), "known=true,training=false,truth=false,prior=2.0"))
this.an = Seq("QD","MQ","MQRankSum","ReadPosRankSum","FS","DP","InbreedingCoeff")
this.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.SNP
}
add(snpVariantRecalibrator)
val snpApplyRecalibration = new ApplyRecalibration() with gatkArguments {
val config: Config = Config.mergeConfigs(qscript.config.getAsConfig("applyrecalibration"), qscript.config)
this.input +:= genotypeGVCFs.out
this.recal_file = snpVariantRecalibrator.recal_file
this.tranches_file = snpVariantRecalibrator.tranches_file
this.out = swapExt(genotypeGVCFs.out,".vcf",".snp.recal.vcf")
this.ts_filter_level = 99.5
this.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.SNP
this.nt = 3
this.memoryLimit = 2 * nt
if (config.contains("scattercount")) this.scatterCount = config.getAsInt("scattercount")
}
add(snpApplyRecalibration)
//indel recal
val indelVariantRecalibrator = new VariantRecalibrator() with gatkArguments {
val config: Config = Config.mergeConfigs(qscript.config.getAsConfig("variantrecalibrator"), qscript.config)
this.input +:= genotypeGVCFs.out
this.nt = 4
this.memoryLimit = 2 * nt
this.recal_file = swapExt(genotypeGVCFs.out,".vcf",".indel.recal")
this.tranches_file = swapExt(genotypeGVCFs.out,".vcf",".indel.tranches")
this.resource :+= new TaggedFile(config.getAsString("mills"), "known=false,training=true,truth=true,prior=12.0")
this.resource :+= new TaggedFile(config.getAsString("dbsnp"), "known=true,training=false,truth=false,prior=2.0")
this.an = Seq("QD","DP","FS","ReadPosRankSum","MQRankSum","InbreedingCoeff")
this.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL
}
add(indelVariantRecalibrator)
val indelApplyRecalibration = new ApplyRecalibration() with gatkArguments {
val config: Config = Config.mergeConfigs(qscript.config.getAsConfig("applyrecalibration"), qscript.config)
this.input +:= genotypeGVCFs.out
this.recal_file = indelVariantRecalibrator.recal_file
this.tranches_file = indelVariantRecalibrator.tranches_file
this.out = swapExt(genotypeGVCFs.out,".vcf",".indel.recal.vcf")
this.ts_filter_level = 99.0
this.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL
this.nt = 3
this.memoryLimit = 2 * nt
if (config.contains("scattercount")) this.scatterCount = config.getAsInt("scattercount")
}
add(indelApplyRecalibration)
} else logger.warn("No gVCFs to genotype") } else logger.warn("No gVCFs to genotype")
} } else runSingleSampleJobs(onlySample)
} }
// Called for each sample // Called for each sample
def sampleJobs(sampleConfig:Config) : Map[String,List[File]] = { override def runSingleSampleJobs(sampleConfig:Config) : Map[String,List[File]] = {
var outputFiles:Map[String,List[File]] = Map() var outputFiles:Map[String,List[File]] = Map()
outputFiles += ("FinalBams" -> List()) var runBamfiles: List[File] = List()
var runs:List[Map[String,File]] = Nil var sampleID: String = sampleConfig.getAsString("ID")
if (sampleConfig.contains("ID")) { for ((run, runFiles) <- runRunsJobs(sampleConfig)) {
var sampleID: String = sampleConfig.getAsString("ID") runBamfiles +:= runFiles("FinalBam")
this.logger.info("Starting generate jobs for sample: " + sampleID)
for (key <- sampleConfig.getAsMap("Runs").keySet) {
var runConfig = sampleConfig.getAsConfig("Runs").getAsConfig(key)
var run: Map[String,File] = runJobs(runConfig, sampleConfig)
var FinalBams:List[File] = outputFiles("FinalBams")
if (run.contains("FinalBam")) FinalBams :+= run("FinalBam")
else logger.warn("No Final bam for Sample: " + sampleID + " Run: " + runConfig)
outputFiles += ("FinalBams" -> FinalBams)
runs +:= run
}
// Variant calling
val haplotypeCaller = new HaplotypeCaller with gatkArguments {
val config: Config = Config.mergeConfigs(qscript.config.getAsConfig("haplotypecaller"), qscript.config)
if (config.contains("scattercount")) this.scatterCount = config.getAsInt("scattercount")
this.input_file = outputFiles("FinalBams")
this.out = new File(outputDir,sampleID + "/" + sampleID + ".gvcf.vcf")
if (dbsnp != null) this.dbsnp = qscript.dbsnp
this.nct = 3
this.memoryLimit = this.nct * 2
// GVCF options
this.emitRefConfidence = org.broadinstitute.sting.gatk.walkers.haplotypecaller.HaplotypeCaller.ReferenceConfidenceMode.GVCF
this.variant_index_type = GATKVCFIndexType.LINEAR
this.variant_index_parameter = 128000
}
if (haplotypeCaller.input_file.size > 0) {
add(haplotypeCaller)
outputFiles += ("gvcf" -> List(haplotypeCaller.out))
}
} else {
this.logger.warn("Sample in config missing ID, skipping sample")
} }
outputFiles += ("FinalBams" -> runBamfiles)
if (runBamfiles.size > 0) {
var gvcfFile = addHaplotypeCaller(runBamfiles, new File(outputDir + sampleID + "/" + sampleID + ".gvcf.vcf"))
outputFiles += ("gvcf" -> List(gvcfFile))
gvcfFiles :+= gvcfFile
} else logger.warn("No bamfiles for variant calling for sample: " + sampleID)
return outputFiles return outputFiles
} }
// Called for each run from a sample // Called for each run from a sample
def runJobs(runConfig:Config, sampleConfig:Config) : Map[String,File] = { override def runSingleRunJobs(runConfig:Config, sampleConfig:Config) : Map[String,File] = {
var outputFiles:Map[String,File] = Map() var outputFiles:Map[String,File] = Map()
val runID: String = runConfig.getAsString("ID") val runID: String = runConfig.getAsString("ID")
val fastq_R1: String = runConfig.getAsString("R1", null)
val fastq_R2: String = runConfig.getAsString("R2", null)
val paired: Boolean = (fastq_R2 != null)
val sampleID: String = sampleConfig.get("ID").toString val sampleID: String = sampleConfig.get("ID").toString
if (fastq_R1 != null) { val runDir: String = outputDir + sampleID + "/run_" + runID + "/"
val runDir: String = outputDir + sampleID + "/run_" + runID + "/" if (runConfig.contains("R1")) {
val mapping = new Mapping(config) val mapping = new Mapping(config)
mapping.input_R1 = fastq_R1 mapping.loadRunConfig(runConfig, sampleConfig, runDir)
if (paired) mapping.input_R2 = fastq_R2
mapping.outputDir = runDir + "mapping/"
mapping.RGSM = sampleID
mapping.RGLB = runID
if (runConfig.contains("PL")) mapping.RGPL = runConfig.getAsString("PL")
if (runConfig.contains("PU")) mapping.RGPU = runConfig.getAsString("PU")
if (runConfig.contains("CN")) mapping.RGCN = runConfig.getAsString("CN")
mapping.script mapping.script
addAll(mapping.functions) // Add functions of mapping to curent function pool addAll(mapping.functions) // Add functions of mapping to curent function pool
...@@ -195,24 +83,24 @@ class Gatk(private var globalConfig: Config) extends QScript { ...@@ -195,24 +83,24 @@ class Gatk(private var globalConfig: Config) extends QScript {
bamFile = addBaseRecalibrator(bamFile,runDir) // Base recalibrator bamFile = addBaseRecalibrator(bamFile,runDir) // Base recalibrator
outputFiles += ("FinalBam" -> bamFile) outputFiles += ("FinalBam" -> bamFile)
} else this.logger.error("Sample: " + sampleID + ": No R1 found for runs: " + runConfig) } else this.logger.error("Sample: " + sampleID + ": No R1 found for run: " + runConfig)
return outputFiles return outputFiles
} }
def addMarkDuplicates(inputBams:List[File], outputFile:File, dir:String) : File = {
val markDuplicates = new MarkDuplicates {
this.input = inputBams
this.output = outputFile
this.REMOVE_DUPLICATES = false
this.metrics = swapExt(dir,outputFile,".bam",".metrics")
this.outputIndex = swapExt(dir,this.output,".bam",".bai")
this.memoryLimit = 2
this.jobResourceRequests :+= "h_vmem=4G"
}
add(markDuplicates)
// def addMarkDuplicates(inputBams:List[File], outputFile:File, dir:String) : File = { return markDuplicates.output
// val markDuplicates = new MarkDuplicates { }
// this.input = inputBams
// this.output = outputFile
// this.REMOVE_DUPLICATES = false
// this.metrics = swapExt(dir,outputFile,".bam",".metrics")
// this.outputIndex = swapExt(dir,this.output,".bam",".bai")
// this.memoryLimit = 2
// this.jobResourceRequests :+= "h_vmem=4G"
// }
// add(markDuplicates)
//
// return markDuplicates.output
// }
def addIndelRealign(inputBam:File, dir:String): File = { def addIndelRealign(inputBam:File, dir:String): File = {
val realignerTargetCreator = new RealignerTargetCreator with gatkArguments { val realignerTargetCreator = new RealignerTargetCreator with gatkArguments {
...@@ -257,6 +145,109 @@ class Gatk(private var globalConfig: Config) extends QScript { ...@@ -257,6 +145,109 @@ class Gatk(private var globalConfig: Config) extends QScript {
return printReads.o return printReads.o
} }
def addHaplotypeCaller(bamfiles:List[File], outputfile:File): File = {
val haplotypeCaller = new HaplotypeCaller with gatkArguments {
val config: Config = Config.mergeConfigs(qscript.config.getAsConfig("haplotypecaller"), qscript.config)
if (config.contains("scattercount")) this.scatterCount = config.getAsInt("scattercount")
this.input_file = bamfiles
this.out = outputfile
if (dbsnp != null) this.dbsnp = qscript.dbsnp
this.nct = 3
this.memoryLimit = this.nct * 2
// GVCF options
this.emitRefConfidence = org.broadinstitute.sting.gatk.walkers.haplotypecaller.HaplotypeCaller.ReferenceConfidenceMode.GVCF
this.variant_index_type = GATKVCFIndexType.LINEAR
this.variant_index_parameter = 128000
}
add(haplotypeCaller)
return haplotypeCaller.out
}
def addSnpVariantRecalibrator(inputVcf:File, dir:String): File = {
val snpVariantRecalibrator = new VariantRecalibrator() with gatkArguments {
val config: Config = Config.mergeConfigs(qscript.config.getAsConfig("variantrecalibrator"), qscript.config)
this.input +:= inputVcf
this.nt = 4
this.memoryLimit = 2 * nt
this.recal_file = swapExt(dir, inputVcf,".vcf",".snp.recal")
this.tranches_file = swapExt(dir, inputVcf,".vcf",".snp.tranches")
this.resource = Seq(new TaggedFile(config.getAsString("hapmap"), "known=false,training=true,truth=true,prior=15.0"),
new TaggedFile(config.getAsString("omni"), "known=false,training=true,truth=true,prior=12.0"),
new TaggedFile(config.getAsString("1000G"), "known=false,training=true,truth=false,prior=10.0"),
new TaggedFile(config.getAsString("dbsnp"), "known=true,training=false,truth=false,prior=2.0"))
this.an = Seq("QD","MQ","MQRankSum","ReadPosRankSum","FS","DP","InbreedingCoeff")
this.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.SNP
}
add(snpVariantRecalibrator)
val snpApplyRecalibration = new ApplyRecalibration() with gatkArguments {
val config: Config = Config.mergeConfigs(qscript.config.getAsConfig("applyrecalibration"), qscript.config)
this.input +:= inputVcf
this.recal_file = snpVariantRecalibrator.recal_file
this.tranches_file = snpVariantRecalibrator.tranches_file
this.out = swapExt(dir, inputVcf,".vcf",".snp.recal.vcf")
this.ts_filter_level = 99.5
this.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.SNP
this.nt = 3
this.memoryLimit = 2 * nt
if (config.contains("scattercount")) this.scatterCount = config.getAsInt("scattercount")
}
add(snpApplyRecalibration)
return snpApplyRecalibration.out
}
def addIndelVariantRecalibrator(inputVcf:File, dir:String): File = {
val indelVariantRecalibrator = new VariantRecalibrator() with gatkArguments {
val config: Config = Config.mergeConfigs(qscript.config.getAsConfig("variantrecalibrator"), qscript.config)
this.input +:= inputVcf
this.nt = 4
this.memoryLimit = 2 * nt
this.recal_file = swapExt(dir, inputVcf,".vcf",".indel.recal")
this.tranches_file = swapExt(dir, inputVcf,".vcf",".indel.tranches")
this.resource :+= new TaggedFile(config.getAsString("mills"), "known=false,training=true,truth=true,prior=12.0")
this.resource :+= new TaggedFile(config.getAsString("dbsnp"), "known=true,training=false,truth=false,prior=2.0")
this.an = Seq("QD","DP","FS","ReadPosRankSum","MQRankSum","InbreedingCoeff")
this.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL
}
add(indelVariantRecalibrator)
val indelApplyRecalibration = new ApplyRecalibration() with gatkArguments {
val config: Config = Config.mergeConfigs(qscript.config.getAsConfig("applyrecalibration"), qscript.config)
this.input +:= inputVcf
this.recal_file = indelVariantRecalibrator.recal_file
this.tranches_file = indelVariantRecalibrator.tranches_file
this.out = swapExt(dir, inputVcf,".vcf",".indel.recal.vcf")
this.ts_filter_level = 99.0
this.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL
this.nt = 3
this.memoryLimit = 2 * nt
if (config.contains("scattercount")) this.scatterCount = config.getAsInt("scattercount")
}
add(indelApplyRecalibration)
return indelApplyRecalibration.out
}
def addGenotypeGVCFs(gvcfFiles: List[File], dir:String): File = {
val genotypeGVCFs = new GenotypeGVCFs() with gatkArguments {
val config: Config = Config.mergeConfigs(qscript.config.getAsConfig("genotypegvcfs"), qscript.config)
this.variant = gvcfFiles
if (config.contains("scattercount")) this.scatterCount = config.getAsInt("scattercount")
this.out = new File(outputDir,"genotype.vcf")
}
add(genotypeGVCFs)
return genotypeGVCFs.out
}
trait gatkArguments extends CommandLineGATK {
this.reference_sequence = referenceFile
this.memoryLimit = 2
this.jobResourceRequests :+= "h_vmem=4G"
}
} }
object Gatk extends PipelineCommand { object Gatk extends PipelineCommand {
......
...@@ -14,7 +14,7 @@ import org.broadinstitute.sting.queue.function._ ...@@ -14,7 +14,7 @@ import org.broadinstitute.sting.queue.function._
import scala.util.parsing.json._ import scala.util.parsing.json._
import org.broadinstitute.sting.utils.variant._ import org.broadinstitute.sting.utils.variant._
class Mapping(private var globalConfig: Config) extends QScript { class Mapping(private var globalConfig: Config) extends QScript with BiopetQScript {
qscript => qscript =>
@Argument(doc="Config Json file",shortName="config", required=false) var configfiles: List[File] = Nil @Argument(doc="Config Json file",shortName="config", required=false) var configfiles: List[File] = Nil
@Input(doc="R1 fastq file", shortName="R1",required=true) var input_R1: File = _ @Input(doc="R1 fastq file", shortName="R1",required=true) var input_R1: File = _
...@@ -38,15 +38,13 @@ class Mapping(private var globalConfig: Config) extends QScript { ...@@ -38,15 +38,13 @@ class Mapping(private var globalConfig: Config) extends QScript {
def this() = this(new Config()) def this() = this(new Config())
var config: Config = _
var referenceFile: File = _ var referenceFile: File = _
var outputFiles:Map[String,File] = Map()
var paired: Boolean = false var paired: Boolean = false
def init() { def init() {
for (file <- configfiles) globalConfig.loadConfigFile(file) for (file <- configfiles) globalConfig.loadConfigFile(file)
config = Config.mergeConfigs(globalConfig.getAsConfig("mapping"), globalConfig) config = Config.mergeConfigs(globalConfig.getAsConfig("mapping"), globalConfig)
if (aligner == null) aligner = "bwa" if (aligner == null) aligner = config.getAsString("aligner", "bwa")
referenceFile = config.getAsString("referenceFile") referenceFile = config.getAsString("referenceFile")
if (outputDir == null) throw new IllegalStateException("Missing Output directory on mapping module") if (outputDir == null) throw new IllegalStateException("Missing Output directory on mapping module")
else if (!outputDir.endsWith("/")) outputDir += "/" else if (!outputDir.endsWith("/")) outputDir += "/"
...@@ -157,13 +155,25 @@ class Mapping(private var globalConfig: Config) extends QScript { ...@@ -157,13 +155,25 @@ class Mapping(private var globalConfig: Config) extends QScript {
RG += "PL:" + RGPL + "\\t" RG += "PL:" + RGPL + "\\t"
RG += "PU:" + RGPU + "\\t" RG += "PU:" + RGPU + "\\t"
RG += "SM:" + RGSM + "\\t" RG += "SM:" + RGSM + "\\t"
RG += "CN:" + RGCN + "\\t" if (RGCN != null) RG += "CN:" + RGCN + "\\t"
RG += "DS" + RGDS + "\\t" if (RGDS != null) RG += "DS" + RGDS + "\\t"
RG += "DT" + RGDT + "\\t" if (RGDT != null) RG += "DT" + RGDT + "\\t"
RG += "PI" + RGPI + "\\t" if (RGPI > 0) RG += "PI" + RGPI + "\\t"
return RG.substring(0, RG.lastIndexOf("\\t")) return RG.substring(0, RG.lastIndexOf("\\t"))
} }
def loadRunConfig(runConfig:Config, sampleConfig:Config, runDir: String) {
input_R1 = runConfig.getAsString("R1", null)
input_R2 = runConfig.getAsString("R2", null)
paired = (input_R2 != null)
RGLB = runConfig.getAsString("ID")
RGSM = sampleConfig.get("ID").toString
if (runConfig.contains("PL")) RGPL = runConfig.getAsString("PL")
if (runConfig.contains("PU")) RGPU = runConfig.getAsString("PU")
if (runConfig.contains("CN")) RGCN = runConfig.getAsString("CN")
outputDir = runDir
}
} }
object Mapping extends PipelineCommand { object Mapping extends PipelineCommand {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment