From 205355eb2ac0d533d4eec9a3fc267734ac1b5530 Mon Sep 17 00:00:00 2001 From: Peter van 't Hof <p.j.van_t_hof@lumc.nl> Date: Tue, 20 May 2014 12:35:09 +0200 Subject: [PATCH] - Fixed scattercount in Variant Recalibration --- .../sasc/biopet/pipelines/gatk/Gatk.scala | 168 ++++++++++-------- 1 file changed, 89 insertions(+), 79 deletions(-) diff --git a/gatk/src/main/java/nl/lumc/sasc/biopet/pipelines/gatk/Gatk.scala b/gatk/src/main/java/nl/lumc/sasc/biopet/pipelines/gatk/Gatk.scala index ca21c3cf6..120a97a86 100644 --- a/gatk/src/main/java/nl/lumc/sasc/biopet/pipelines/gatk/Gatk.scala +++ b/gatk/src/main/java/nl/lumc/sasc/biopet/pipelines/gatk/Gatk.scala @@ -53,69 +53,74 @@ class Gatk(private var globalConfig: Config) extends QScript { if (onlySample == null) { //SampleWide jobs if (gvcfFiles.size > 0) { - val genotypeGVCFs = new GenotypeGVCFs() with gatkArguments - genotypeGVCFs.variant = gvcfFiles - genotypeGVCFs.scatterCount = scatterCount - genotypeGVCFs.out = new File(outputDir,"final.vcf") + val genotypeGVCFs = new GenotypeGVCFs() with gatkArguments { + this.variant = gvcfFiles + this.scatterCount = scatterCount + this.out = new File(outputDir,"final.vcf") + } add(genotypeGVCFs) //Snp recal val snpVariantRecalibrator = new VariantRecalibrator() with gatkArguments { - input +:= genotypeGVCFs.out - nt = 4; memoryLimit = 2 * nt - recal_file = swapExt(genotypeGVCFs.out,".vcf",".snp.recal") - tranches_file = swapExt(genotypeGVCFs.out,".vcf",".snp.tranches") - resource = Seq(new TaggedFile(config.getAsString("hapmap"), "known=false,training=true,truth=true,prior=15.0"), - new TaggedFile(config.getAsString("omni"), "known=false,training=true,truth=true,prior=12.0"), - new TaggedFile(config.getAsString("1000G"), "known=false,training=true,truth=false,prior=10.0"), - new TaggedFile(config.getAsString("dbsnp"), "known=true,training=false,truth=false,prior=2.0")) - an = Seq("QD","MQ","MQRankSum","ReadPosRankSum","FS","DP","InbreedingCoeff") - mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.SNP + this.input +:= genotypeGVCFs.out + this.nt = 4 + this.memoryLimit = 2 * nt + this.recal_file = swapExt(genotypeGVCFs.out,".vcf",".snp.recal") + this.tranches_file = swapExt(genotypeGVCFs.out,".vcf",".snp.tranches") + this.resource = Seq(new TaggedFile(config.getAsString("hapmap"), "known=false,training=true,truth=true,prior=15.0"), + new TaggedFile(config.getAsString("omni"), "known=false,training=true,truth=true,prior=12.0"), + new TaggedFile(config.getAsString("1000G"), "known=false,training=true,truth=false,prior=10.0"), + new TaggedFile(config.getAsString("dbsnp"), "known=true,training=false,truth=false,prior=2.0")) + this.an = Seq("QD","MQ","MQRankSum","ReadPosRankSum","FS","DP","InbreedingCoeff") + this.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.SNP } add(snpVariantRecalibrator) val snpApplyRecalibration = new ApplyRecalibration() with gatkArguments { - input +:= genotypeGVCFs.out - recal_file = snpVariantRecalibrator.recal_file - tranches_file = snpVariantRecalibrator.tranches_file - out = swapExt(genotypeGVCFs.out,".vcf",".snp.recal.vcf") - ts_filter_level = 99.5 - mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.SNP - nt = 3; memoryLimit = 2 * nt - scatterCount = scatterCount + this.input +:= genotypeGVCFs.out + this.recal_file = snpVariantRecalibrator.recal_file + this.tranches_file = snpVariantRecalibrator.tranches_file + this.out = swapExt(genotypeGVCFs.out,".vcf",".snp.recal.vcf") + this.ts_filter_level = 99.5 + this.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.SNP + this.nt = 3 + this.memoryLimit = 2 * nt + if (scatterCount > 1) this.scatterCount = scatterCount } add(snpApplyRecalibration) //indel recal val indelVariantRecalibrator = new VariantRecalibrator() with gatkArguments { - input +:= genotypeGVCFs.out - nt = 4; memoryLimit = 2 * nt - recal_file = swapExt(genotypeGVCFs.out,".vcf",".indel.recal") - tranches_file = swapExt(genotypeGVCFs.out,".vcf",".indel.tranches") - resource :+= new TaggedFile(config.getAsString("mills"), "known=false,training=true,truth=true,prior=12.0") - resource :+= new TaggedFile(config.getAsString("dbsnp"), "known=true,training=false,truth=false,prior=2.0") - an = Seq("QD","DP","FS","ReadPosRankSum","MQRankSum","InbreedingCoeff") - mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL + this.input +:= genotypeGVCFs.out + this.nt = 4 + this.memoryLimit = 2 * nt + this.recal_file = swapExt(genotypeGVCFs.out,".vcf",".indel.recal") + this.tranches_file = swapExt(genotypeGVCFs.out,".vcf",".indel.tranches") + this.resource :+= new TaggedFile(config.getAsString("mills"), "known=false,training=true,truth=true,prior=12.0") + this.resource :+= new TaggedFile(config.getAsString("dbsnp"), "known=true,training=false,truth=false,prior=2.0") + this.an = Seq("QD","DP","FS","ReadPosRankSum","MQRankSum","InbreedingCoeff") + this.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL } add(indelVariantRecalibrator) val indelApplyRecalibration = new ApplyRecalibration() with gatkArguments { - input +:= genotypeGVCFs.out - recal_file = indelVariantRecalibrator.recal_file - tranches_file = indelVariantRecalibrator.tranches_file - out = swapExt(genotypeGVCFs.out,".vcf",".indel.recal.vcf") - ts_filter_level = 99.0 - mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL - nt = 3; memoryLimit = 2 * nt - scatterCount = scatterCount + this.input +:= genotypeGVCFs.out + this.recal_file = indelVariantRecalibrator.recal_file + this.tranches_file = indelVariantRecalibrator.tranches_file + this.out = swapExt(genotypeGVCFs.out,".vcf",".indel.recal.vcf") + this.ts_filter_level = 99.0 + this.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL + this.nt = 3 + this.memoryLimit = 2 * nt + if (scatterCount > 1) this.scatterCount = scatterCount } add(indelApplyRecalibration) // merge snp and indels val catVariants = new CatVariants() { - variant = Seq(snpApplyRecalibration.out,indelApplyRecalibration.out) - outputFile = swapExt(genotypeGVCFs.out,".vcf",".recal.vcf") - reference = referenceFile + this.variant = Seq(snpApplyRecalibration.out,indelApplyRecalibration.out) + this.outputFile = swapExt(genotypeGVCFs.out,".vcf",".recal.vcf") + this.reference = referenceFile } add(catVariants) } else logger.warn("No gVCFs to genotype") @@ -189,7 +194,7 @@ class Gatk(private var globalConfig: Config) extends QScript { if (paired) flexiprep.input_R2 = fastq_R2 flexiprep.outputDir = runDir + "flexiprep/" flexiprep.script - addAll(flexiprep.functions) // Add function of flexiprep to curent function pool + addAll(flexiprep.functions) // Add functions of flexiprep to curent function pool val bwaCommand = new Bwa(config) bwaCommand.R1 = flexiprep.outputFiles("output_R1") @@ -218,65 +223,70 @@ class Gatk(private var globalConfig: Config) extends QScript { } def addSortSam(inputSam:List[File], outputFile:File, dir:String) : File = { - val sortSam = new SortSam - sortSam.input = inputSam - sortSam.createIndex = true - sortSam.output = outputFile - sortSam.memoryLimit = 2 - sortSam.nCoresRequest = 2 - sortSam.jobResourceRequests :+= "h_vmem=4G" + val sortSam = new SortSam { + this.input = inputSam + this.createIndex = true + this.output = outputFile + this.memoryLimit = 2 + this.nCoresRequest = 2 + this.jobResourceRequests :+= "h_vmem=4G" + } add(sortSam) return sortSam.output } def addMarkDuplicates(inputBams:List[File], outputFile:File, dir:String) : File = { - val markDuplicates = new MarkDuplicates - markDuplicates.input = inputBams - markDuplicates.output = outputFile - markDuplicates.REMOVE_DUPLICATES = false - markDuplicates.metrics = swapExt(dir,outputFile,".bam",".metrics") - markDuplicates.outputIndex = swapExt(dir,markDuplicates.output,".bam",".bai") - markDuplicates.memoryLimit = 2 - markDuplicates.jobResourceRequests :+= "h_vmem=4G" + val markDuplicates = new MarkDuplicates { + this.input = inputBams + this.output = outputFile + this.REMOVE_DUPLICATES = false + this.metrics = swapExt(dir,outputFile,".bam",".metrics") + this.outputIndex = swapExt(dir,this.output,".bam",".bai") + this.memoryLimit = 2 + this.jobResourceRequests :+= "h_vmem=4G" + } add(markDuplicates) return markDuplicates.output } def addIndelRealign(inputBam:File, dir:String): File = { - val realignerTargetCreator = new RealignerTargetCreator with gatkArguments - realignerTargetCreator.I :+= inputBam - realignerTargetCreator.o = swapExt(dir,inputBam,".bam",".realign.intervals") - //realignerTargetCreator.nt = 1 - realignerTargetCreator.jobResourceRequests :+= "h_vmem=5G" - if (scatterCount > 1) realignerTargetCreator.scatterCount = scatterCount + val realignerTargetCreator = new RealignerTargetCreator with gatkArguments { + this.I :+= inputBam + this.o = swapExt(dir,inputBam,".bam",".realign.intervals") + this.jobResourceRequests :+= "h_vmem=5G" + if (scatterCount > 1) this.scatterCount = scatterCount + } add(realignerTargetCreator) - val indelRealigner = new IndelRealigner with gatkArguments - indelRealigner.I :+= inputBam - indelRealigner.targetIntervals = realignerTargetCreator.o - indelRealigner.o = swapExt(dir,inputBam,".bam",".realign.bam") - if (scatterCount > 1) indelRealigner.scatterCount = scatterCount + val indelRealigner = new IndelRealigner with gatkArguments { + this.I :+= inputBam + this.targetIntervals = realignerTargetCreator.o + this.o = swapExt(dir,inputBam,".bam",".realign.bam") + if (scatterCount > 1) this.scatterCount = scatterCount + } add(indelRealigner) return indelRealigner.o } def addBaseRecalibrator(inputBam:File, dir:String): File = { - val baseRecalibrator = new BaseRecalibrator with gatkArguments - baseRecalibrator.I :+= inputBam - baseRecalibrator.o = swapExt(dir,inputBam,".bam",".baserecal") - baseRecalibrator.knownSites :+= dbsnp - if (scatterCount > 1) baseRecalibrator.scatterCount = scatterCount - baseRecalibrator.nct = 2 + val baseRecalibrator = new BaseRecalibrator with gatkArguments { + this.I :+= inputBam + this.o = swapExt(dir,inputBam,".bam",".baserecal") + this.knownSites :+= dbsnp + if (scatterCount > 1) this.scatterCount = scatterCount + this.nct = 2 + } add(baseRecalibrator) - val printReads = new PrintReads with gatkArguments - printReads.I :+= inputBam - printReads.o = swapExt(dir,inputBam,".bam",".baserecal.bam") - printReads.BQSR = baseRecalibrator.o - if (scatterCount > 1) printReads.scatterCount = scatterCount + val printReads = new PrintReads with gatkArguments { + this.I :+= inputBam + this.o = swapExt(dir,inputBam,".bam",".baserecal.bam") + this.BQSR = baseRecalibrator.o + if (scatterCount > 1) this.scatterCount = scatterCount + } return printReads.o } -- GitLab