diff --git a/gatk/src/main/java/nl/lumc/sasc/biopet/pipelines/gatk/Gatk.scala b/gatk/src/main/java/nl/lumc/sasc/biopet/pipelines/gatk/Gatk.scala index 9beec1e9315da5639ec5b6fa0d2e3c9a67d0c9f2..ca21c3cf64d130bf6e4a66fa1728cbab67f337a7 100644 --- a/gatk/src/main/java/nl/lumc/sasc/biopet/pipelines/gatk/Gatk.scala +++ b/gatk/src/main/java/nl/lumc/sasc/biopet/pipelines/gatk/Gatk.scala @@ -60,58 +60,63 @@ class Gatk(private var globalConfig: Config) extends QScript { add(genotypeGVCFs) //Snp recal - val snpVariantRecalibrator = new VariantRecalibrator() with gatkArguments - snpVariantRecalibrator.input +:= genotypeGVCFs.out - snpVariantRecalibrator.nt = 8 - snpVariantRecalibrator.recal_file = swapExt(genotypeGVCFs.out,".vcf",".snp.recal") - snpVariantRecalibrator.tranches_file = swapExt(genotypeGVCFs.out,".vcf",".snp.tranches") - snpVariantRecalibrator.resource :+= new TaggedFile(config.getAsString("hapmap"), "known=false,training=true,truth=true,prior=15.0") - snpVariantRecalibrator.resource :+= new TaggedFile(config.getAsString("omni"), "known=false,training=true,truth=true,prior=12.0") - snpVariantRecalibrator.resource :+= new TaggedFile(config.getAsString("1000G"), "known=false,training=true,truth=false,prior=10.0") - snpVariantRecalibrator.resource :+= new TaggedFile(config.getAsString("dbsnp"), "known=true,training=false,truth=false,prior=2.0") - snpVariantRecalibrator.an = Seq("QD","MQ","MQRankSum","ReadPosRankSum","FS","DP","InbreedingCoeff") - snpVariantRecalibrator.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.SNP + val snpVariantRecalibrator = new VariantRecalibrator() with gatkArguments { + input +:= genotypeGVCFs.out + nt = 4; memoryLimit = 2 * nt + recal_file = swapExt(genotypeGVCFs.out,".vcf",".snp.recal") + tranches_file = swapExt(genotypeGVCFs.out,".vcf",".snp.tranches") + resource = Seq(new TaggedFile(config.getAsString("hapmap"), "known=false,training=true,truth=true,prior=15.0"), + new TaggedFile(config.getAsString("omni"), "known=false,training=true,truth=true,prior=12.0"), + new TaggedFile(config.getAsString("1000G"), "known=false,training=true,truth=false,prior=10.0"), + new TaggedFile(config.getAsString("dbsnp"), "known=true,training=false,truth=false,prior=2.0")) + an = Seq("QD","MQ","MQRankSum","ReadPosRankSum","FS","DP","InbreedingCoeff") + mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.SNP + } add(snpVariantRecalibrator) - val snpApplyRecalibration = new ApplyRecalibration() with gatkArguments - snpApplyRecalibration.input +:= genotypeGVCFs.out - snpApplyRecalibration.recal_file = snpVariantRecalibrator.recal_file - snpApplyRecalibration.tranches_file = snpVariantRecalibrator.tranches_file - snpApplyRecalibration.out = swapExt(genotypeGVCFs.out,".vcf",".snp.recal.vcf") - snpApplyRecalibration.ts_filter_level = 99.5 - snpApplyRecalibration.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.SNP - snpApplyRecalibration.nt = 3 - snpApplyRecalibration.scatterCount = scatterCount + val snpApplyRecalibration = new ApplyRecalibration() with gatkArguments { + input +:= genotypeGVCFs.out + recal_file = snpVariantRecalibrator.recal_file + tranches_file = snpVariantRecalibrator.tranches_file + out = swapExt(genotypeGVCFs.out,".vcf",".snp.recal.vcf") + ts_filter_level = 99.5 + mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.SNP + nt = 3; memoryLimit = 2 * nt + scatterCount = scatterCount + } add(snpApplyRecalibration) //indel recal - val indelVariantRecalibrator = new VariantRecalibrator() with gatkArguments - indelVariantRecalibrator.input +:= genotypeGVCFs.out - indelVariantRecalibrator.nt = 8 - indelVariantRecalibrator.recal_file = swapExt(genotypeGVCFs.out,".vcf",".indel.recal") - indelVariantRecalibrator.tranches_file = swapExt(genotypeGVCFs.out,".vcf",".indel.tranches") - indelVariantRecalibrator.resource :+= new TaggedFile(config.getAsString("mills"), "known=false,training=true,truth=true,prior=12.0") - indelVariantRecalibrator.resource :+= new TaggedFile(config.getAsString("dbsnp"), "known=true,training=false,truth=false,prior=2.0") - indelVariantRecalibrator.an = Seq("QD","DP","FS","ReadPosRankSum","MQRankSum","InbreedingCoeff") - indelVariantRecalibrator.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL + val indelVariantRecalibrator = new VariantRecalibrator() with gatkArguments { + input +:= genotypeGVCFs.out + nt = 4; memoryLimit = 2 * nt + recal_file = swapExt(genotypeGVCFs.out,".vcf",".indel.recal") + tranches_file = swapExt(genotypeGVCFs.out,".vcf",".indel.tranches") + resource :+= new TaggedFile(config.getAsString("mills"), "known=false,training=true,truth=true,prior=12.0") + resource :+= new TaggedFile(config.getAsString("dbsnp"), "known=true,training=false,truth=false,prior=2.0") + an = Seq("QD","DP","FS","ReadPosRankSum","MQRankSum","InbreedingCoeff") + mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL + } add(indelVariantRecalibrator) - val indelApplyRecalibration = new ApplyRecalibration() with gatkArguments - indelApplyRecalibration.input +:= genotypeGVCFs.out - indelApplyRecalibration.recal_file = indelVariantRecalibrator.recal_file - indelApplyRecalibration.tranches_file = indelVariantRecalibrator.tranches_file - indelApplyRecalibration.out = swapExt(genotypeGVCFs.out,".vcf",".indel.recal.vcf") - indelApplyRecalibration.ts_filter_level = 99.0 - indelApplyRecalibration.mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL - indelApplyRecalibration.nt = 3 - indelApplyRecalibration.scatterCount = scatterCount + val indelApplyRecalibration = new ApplyRecalibration() with gatkArguments { + input +:= genotypeGVCFs.out + recal_file = indelVariantRecalibrator.recal_file + tranches_file = indelVariantRecalibrator.tranches_file + out = swapExt(genotypeGVCFs.out,".vcf",".indel.recal.vcf") + ts_filter_level = 99.0 + mode = org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL + nt = 3; memoryLimit = 2 * nt + scatterCount = scatterCount + } add(indelApplyRecalibration) // merge snp and indels - val catVariants = new CatVariants() - catVariants.variant = Seq(snpApplyRecalibration.out,indelApplyRecalibration.out) - catVariants.outputFile = swapExt(genotypeGVCFs.out,".vcf",".recal.vcf") - catVariants.reference = referenceFile + val catVariants = new CatVariants() { + variant = Seq(snpApplyRecalibration.out,indelApplyRecalibration.out) + outputFile = swapExt(genotypeGVCFs.out,".vcf",".recal.vcf") + reference = referenceFile + } add(catVariants) } else logger.warn("No gVCFs to genotype") diff --git a/mapping/src/main/java/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala b/mapping/src/main/java/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala index 74573e2a18d8c083eacfb9ad2f149b1058fd9351..7f680b3a617681bba2c5270e740b238a59cb5eb0 100644 --- a/mapping/src/main/java/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala +++ b/mapping/src/main/java/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala @@ -80,7 +80,6 @@ class Mapping(private var globalConfig: Config) extends QScript { if (paired) fastq_R2 = flexiprep.outputFiles("output_R2") } var bamFile:File = "" - //var samFile:File = "" if (aligner == "bwa") { val bwaCommand = new Bwa(config) { R1 = fastq_R1; if (paired) R2 = fastq_R2; RG = getReadGroup; output = new File(outputDir + outputName + ".sam") } @@ -96,46 +95,48 @@ class Mapping(private var globalConfig: Config) extends QScript { } def addSortSam(inputSam:List[File], outputFile:File, dir:String) : File = { - val sortSam = new SortSam - sortSam.input = inputSam - sortSam.createIndex = true - sortSam.output = outputFile - sortSam.memoryLimit = 2 - sortSam.nCoresRequest = 2 - sortSam.jobResourceRequests :+= "h_vmem=4G" + val sortSam = new SortSam { + input = inputSam + createIndex = true + output = outputFile + memoryLimit = 2 + nCoresRequest = 2 + jobResourceRequests :+= "h_vmem=4G" + } add(sortSam) return sortSam.output } def addAddOrReplaceReadGroups(inputSam:List[File], outputFile:File, dir:String) : File = { - val addOrReplaceReadGroups = new AddOrReplaceReadGroups - addOrReplaceReadGroups.input = inputSam - addOrReplaceReadGroups.createIndex = true - addOrReplaceReadGroups.output = outputFile - addOrReplaceReadGroups.memoryLimit = 2 - addOrReplaceReadGroups.nCoresRequest = 2 - addOrReplaceReadGroups.jobResourceRequests :+= "h_vmem=4G" - - addOrReplaceReadGroups.RGLB = RGLB - addOrReplaceReadGroups.RGPL = RGPL - addOrReplaceReadGroups.RGPU = RGPU - addOrReplaceReadGroups.RGSM = RGSM - if (RGCN != null) addOrReplaceReadGroups.RGCN = RGCN - if (RGDS != null) addOrReplaceReadGroups.RGDS = RGDS - + val addOrReplaceReadGroups = new AddOrReplaceReadGroups { + input = inputSam + output = outputFile + createIndex = true + memoryLimit = 2 + nCoresRequest = 2 + jobResourceRequests :+= "h_vmem=4G" + + this.RGLB = RGLB + this.RGPL = RGPL + this.RGPU = RGPU + this.RGSM = RGSM + if (RGCN != null) this.RGCN = RGCN + if (RGDS != null) this.RGDS = RGDS + } return addOrReplaceReadGroups.output } def addMarkDuplicates(inputBams:List[File], outputFile:File, dir:String) : File = { - val markDuplicates = new MarkDuplicates - markDuplicates.input = inputBams - markDuplicates.output = outputFile - markDuplicates.REMOVE_DUPLICATES = false - markDuplicates.metrics = swapExt(dir,outputFile,".bam",".metrics") - markDuplicates.outputIndex = swapExt(dir,markDuplicates.output,".bam",".bai") - markDuplicates.memoryLimit = 2 - markDuplicates.jobResourceRequests :+= "h_vmem=4G" + val markDuplicates = new MarkDuplicates { + input = inputBams + output = outputFile + REMOVE_DUPLICATES = false + metrics = swapExt(dir,outputFile,".bam",".metrics") + outputIndex = swapExt(dir,this.output,".bam",".bai") + memoryLimit = 2 + jobResourceRequests :+= "h_vmem=4G" + } add(markDuplicates) return markDuplicates.output