From 1144a30ee2d9b88451eb2b18fba68efb7be04e2c Mon Sep 17 00:00:00 2001 From: Peter van 't Hof <p.j.van_t_hof@lumc.nl> Date: Mon, 9 Feb 2015 15:18:12 +0100 Subject: [PATCH] Added link to final file when only 1 lib --- .../biopet/pipelines/gatk/GatkPipeline.scala | 2 +- .../pipelines/gatk/GatkVariantcalling.scala | 82 +++++++++++-------- 2 files changed, 49 insertions(+), 35 deletions(-) diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala index c76139133..fd82a58f0 100644 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala +++ b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala @@ -50,6 +50,7 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri /** Library variantcalling */ val gatkVariantcalling = new GatkVariantcalling(qscript) + gatkVariantcalling.doublePreProces = false gatkVariantcalling.sampleID = sampleId gatkVariantcalling.outputDir = libDir @@ -112,7 +113,6 @@ class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScri if (bamFile.isDefined) { gatkVariantcalling.inputBams = List(bamFile.get) gatkVariantcalling.variantcalling = config("library_variantcalling", default = false) - gatkVariantcalling.preProcesBams = true gatkVariantcalling.init gatkVariantcalling.biopetScript addAll(gatkVariantcalling.functions) diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala index 8bac4aaf6..4f1fce023 100644 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala +++ b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala @@ -7,6 +7,7 @@ package nl.lumc.sasc.biopet.pipelines.gatk import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand } import java.io.File +import nl.lumc.sasc.biopet.extensions.Ln import nl.lumc.sasc.biopet.tools.{ VcfStats, MpileupToVcf, VcfFilter, MergeAlleles } import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.extensions.gatk.{ AnalyzeCovariates, BaseRecalibrator, GenotypeGVCFs, HaplotypeCaller, IndelRealigner, PrintReads, RealignerTargetCreator, SelectVariants, CombineVariants, UnifiedGenotyper } @@ -38,12 +39,12 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr @Argument(doc = "Sample name", required = false) var sampleID: String = _ - var preProcesBams: Option[Boolean] = config("pre_proces_bams", default = true) + var preProcesBams: Boolean = config("pre_proces_bams", default = true) var variantcalling: Boolean = true - var doublePreProces: Option[Boolean] = config("double_pre_proces", default = true) - var useHaplotypecaller: Option[Boolean] = config("use_haplotypecaller", default = true) - var useUnifiedGenotyper: Option[Boolean] = config("use_unifiedgenotyper", default = false) - var useAllelesOption: Option[Boolean] = config("use_alleles_option", default = false) + var doublePreProces: Boolean = config("double_pre_proces", default = true) + var useHaplotypecaller: Boolean = config("use_haplotypecaller", default = true) + var useUnifiedGenotyper: Boolean = config("use_unifiedgenotyper", default = false) + var useAllelesOption: Boolean = config("use_alleles_option", default = false) var useMpileup: Boolean = config("use_mpileup", default = true) var useIndelRealigner: Boolean = config("use_indel_realign", default = true) var useBaseRecalibration: Boolean = config("use_base_recalibration", default = true) @@ -62,42 +63,55 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr } private def doublePreProces(files: List[File]): List[File] = { - if (files.size == 1) return files if (files.isEmpty) throw new IllegalStateException("Files can't be empty") - if (!doublePreProces.get) return files - val markDup = MarkDuplicates(this, files, new File(outputDir + outputName + ".dedup.bam")) - markDup.isIntermediate = useIndelRealigner - add(markDup) - if (useIndelRealigner) { - List(addIndelRealign(markDup.output, outputDir, isIntermediate = false)) + else if (!doublePreProces) files + else if (files.size == 1) { + val bamFile: File = outputDir + files.head.getName + if (bamFile != files.head) { + val oldIndex: File = files.head.getAbsolutePath.stripSuffix(".bam") + ".bai" + val newIndex: File = bamFile.getAbsolutePath.stripSuffix(".bam") + ".bai" + add(Ln(this, oldIndex, newIndex)) + + val bamLn = Ln(this, files.head, bamFile) + bamLn.deps :+= newIndex + add(bamLn) + } + List(bamFile) } else { - List(markDup.output) + val markDup = MarkDuplicates(this, files, new File(outputDir + outputName + ".dedup.bam")) + markDup.isIntermediate = useIndelRealigner + add(markDup) + if (useIndelRealigner) { + List(addIndelRealign(markDup.output, outputDir, isIntermediate = false)) + } else { + List(markDup.output) + } } } def biopetScript() { - scriptOutput.bamFiles = if (preProcesBams.get) { - var bamFiles: List[File] = Nil - for (inputBam <- inputBams) { - var bamFile = inputBam - if (useIndelRealigner) { - bamFile = addIndelRealign(bamFile, outputDir, isIntermediate = useBaseRecalibration) - } - if (useBaseRecalibration) { - bamFile = addBaseRecalibrator(bamFile, outputDir, isIntermediate = bamFiles.size > 1) + scriptOutput.bamFiles = { + doublePreProces(if (preProcesBams) { + //var bamFiles: List[File] = Nil + for (inputBam <- inputBams) yield { + var bamFile = inputBam + if (useIndelRealigner) + bamFile = addIndelRealign(bamFile, outputDir, isIntermediate = useBaseRecalibration) + if (useBaseRecalibration) + bamFile = addBaseRecalibrator(bamFile, outputDir, isIntermediate = inputBams.size > 1) + bamFile } - bamFiles :+= bamFile - } - doublePreProces(bamFiles) - } else if (inputBams.size > 1 && doublePreProces.get) { - doublePreProces(inputBams) - } else inputBams + //bamFiles + } else { + inputBams + }) + } if (variantcalling) { var mergBuffer: SortedMap[String, File] = SortedMap() def mergeList = mergBuffer map { case (key, file) => TaggedFile(removeNoneVariants(file), "name=" + key) } - if (sampleID != null && (useHaplotypecaller.get || config("joint_genotyping", default = false).asBoolean)) { + if (sampleID != null && (useHaplotypecaller || config("joint_genotyping", default = false).asBoolean)) { val hcGvcf = new HaplotypeCaller(this) hcGvcf.useGvcf hcGvcf.input_file = scriptOutput.bamFiles @@ -106,7 +120,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr scriptOutput.gvcfFile = hcGvcf.out } - if (useHaplotypecaller.get) { + if (useHaplotypecaller) { if (sampleID != null) { val genotypeGVCFs = GenotypeGVCFs(this, List(scriptOutput.gvcfFile), outputDir + outputName + ".hc.discovery.vcf.gz") add(genotypeGVCFs) @@ -121,7 +135,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr mergBuffer += ("1.HC-Discovery" -> scriptOutput.hcVcfFile) } - if (useUnifiedGenotyper.get) { + if (useUnifiedGenotyper) { val ugVcf = new UnifiedGenotyper(this) ugVcf.input_file = scriptOutput.bamFiles ugVcf.out = outputDir + outputName + ".ug.discovery.vcf.gz" @@ -156,12 +170,12 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr } // Allele mode - if (useAllelesOption.get) { + if (useAllelesOption) { val mergeAlleles = MergeAlleles(this, mergeList.toList, outputDir + "raw.allele__temp_only.vcf.gz") mergeAlleles.isIntermediate = true add(mergeAlleles) - if (useHaplotypecaller.get) { + if (useHaplotypecaller) { val hcAlleles = new HaplotypeCaller(this) hcAlleles.input_file = scriptOutput.bamFiles hcAlleles.out = outputDir + outputName + ".hc.allele.vcf.gz" @@ -172,7 +186,7 @@ class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScr mergBuffer += ("3.HC-alleles" -> hcAlleles.out) } - if (useUnifiedGenotyper.get) { + if (useUnifiedGenotyper) { val ugAlleles = new UnifiedGenotyper(this) ugAlleles.input_file = scriptOutput.bamFiles ugAlleles.out = outputDir + outputName + ".ug.allele.vcf.gz" -- GitLab