From 5a4744154798a75118cf7f28cb8a9de34a344566 Mon Sep 17 00:00:00 2001 From: Peter van 't Hof <p.j.van_t_hof@lumc.nl> Date: Sat, 22 Nov 2014 16:06:33 +0100 Subject: [PATCH] Remove protected source file from public artifacts --- .../extensions/gatk/AnalyzeCovariates.scala | 17 -- .../extensions/gatk/ApplyRecalibration.scala | 34 --- .../extensions/gatk/BaseRecalibrator.scala | 23 -- .../biopet/extensions/gatk/CombineGVCFs.scala | 17 -- .../extensions/gatk/CombineVariants.scala | 17 -- .../biopet/extensions/gatk/GatkGeneral.scala | 18 -- .../extensions/gatk/GenotypeGVCFs.scala | 28 -- .../extensions/gatk/HaplotypeCaller.scala | 48 ---- .../extensions/gatk/IndelRealigner.scala | 18 -- .../biopet/extensions/gatk/PrintReads.scala | 21 -- .../gatk/RealignerTargetCreator.scala | 22 -- .../extensions/gatk/SelectVariants.scala | 17 -- .../extensions/gatk/UnifiedGenotyper.scala | 34 --- .../extensions/gatk/VariantAnnotator.scala | 19 -- .../biopet/extensions/gatk/VariantEval.scala | 37 --- .../extensions/gatk/VariantRecalibrator.scala | 40 --- .../sasc/biopet/pipelines/basty/Basty.scala | 147 ----------- .../gatk/GatkBenchmarkGenotyping.scala | 58 ----- .../pipelines/gatk/GatkGenotyping.scala | 58 ----- .../biopet/pipelines/gatk/GatkPipeline.scala | 237 ----------------- .../gatk/GatkVariantRecalibration.scala | 76 ------ .../pipelines/gatk/GatkVariantcalling.scala | 244 ------------------ .../pipelines/gatk/GatkVcfSampleCompare.scala | 82 ------ .../pipelines/gatk/SampleCompareMetrics.scala | 148 ----------- 24 files changed, 1460 deletions(-) delete mode 100644 public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/AnalyzeCovariates.scala delete mode 100644 public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/ApplyRecalibration.scala delete mode 100644 public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/BaseRecalibrator.scala delete mode 100644 public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineGVCFs.scala delete mode 100644 public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala delete mode 100644 public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GatkGeneral.scala delete mode 100644 public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeGVCFs.scala delete mode 100644 public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala delete mode 100644 public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/IndelRealigner.scala delete mode 100644 public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/PrintReads.scala delete mode 100644 public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/RealignerTargetCreator.scala delete mode 100644 public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala delete mode 100644 public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/UnifiedGenotyper.scala delete mode 100644 public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantAnnotator.scala delete mode 100644 public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantEval.scala delete mode 100644 public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantRecalibrator.scala delete mode 100644 public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala delete mode 100644 public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkBenchmarkGenotyping.scala delete mode 100644 public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkGenotyping.scala delete mode 100644 public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala delete mode 100644 public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantRecalibration.scala delete mode 100644 public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala delete mode 100644 public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVcfSampleCompare.scala delete mode 100644 public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/SampleCompareMetrics.scala diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/AnalyzeCovariates.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/AnalyzeCovariates.scala deleted file mode 100644 index 525c357a9..000000000 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/AnalyzeCovariates.scala +++ /dev/null @@ -1,17 +0,0 @@ -package nl.lumc.sasc.biopet.extensions.gatk - -import java.io.File -import nl.lumc.sasc.biopet.core.config.Configurable - -class AnalyzeCovariates(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.AnalyzeCovariates with GatkGeneral { -} - -object AnalyzeCovariates { - def apply(root: Configurable, before: File, after: File, plots: File): AnalyzeCovariates = { - val ac = new AnalyzeCovariates(root) - ac.before = before - ac.after = after - ac.plots = plots - return ac - } -} \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/ApplyRecalibration.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/ApplyRecalibration.scala deleted file mode 100644 index b0e3a71fc..000000000 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/ApplyRecalibration.scala +++ /dev/null @@ -1,34 +0,0 @@ -package nl.lumc.sasc.biopet.extensions.gatk - -import java.io.File -import nl.lumc.sasc.biopet.core.config.Configurable - -class ApplyRecalibration(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.ApplyRecalibration with GatkGeneral { - override def afterGraph { - super.afterGraph - - if (config.contains("scattercount")) scatterCount = config("scattercount") - - nt = Option(getThreads(3)) - memoryLimit = Option(nt.getOrElse(1) * 2) - ts_filter_level = config("ts_filter_level") - } -} - -object ApplyRecalibration { - def apply(root: Configurable, input: File, output: File, recal_file: File, tranches_file: File, indel: Boolean = false): ApplyRecalibration = { - val ar = if (indel) new ApplyRecalibration(root) { - mode = org.broadinstitute.gatk.tools.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL - defaults ++= Map("ts_filter_level" -> 99.0) - } - else new ApplyRecalibration(root) { - mode = org.broadinstitute.gatk.tools.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.SNP - defaults ++= Map("ts_filter_level" -> 99.5) - } - ar.input :+= input - ar.recal_file = recal_file - ar.tranches_file = tranches_file - ar.out = output - return ar - } -} \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/BaseRecalibrator.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/BaseRecalibrator.scala deleted file mode 100644 index 147ab732f..000000000 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/BaseRecalibrator.scala +++ /dev/null @@ -1,23 +0,0 @@ -package nl.lumc.sasc.biopet.extensions.gatk - -import java.io.File -import nl.lumc.sasc.biopet.core.config.Configurable - -class BaseRecalibrator(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.BaseRecalibrator with GatkGeneral { - memoryLimit = Option(4) - override val defaultVmem = "8G" - - if (config.contains("scattercount")) scatterCount = config("scattercount") - if (config.contains("dbsnp")) knownSites :+= new File(config("dbsnp").asString) - if (config.contains("known_sites")) knownSites :+= new File(config("known_sites").asString) -} - -object BaseRecalibrator { - def apply(root: Configurable, input: File, output: File): BaseRecalibrator = { - val br = new BaseRecalibrator(root) - br.input_file :+= input - br.out = output - br.afterGraph - return br - } -} \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineGVCFs.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineGVCFs.scala deleted file mode 100644 index 5b3e4df4b..000000000 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineGVCFs.scala +++ /dev/null @@ -1,17 +0,0 @@ -package nl.lumc.sasc.biopet.extensions.gatk - -import java.io.File -import nl.lumc.sasc.biopet.core.config.Configurable - -class CombineGVCFs(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.CombineGVCFs with GatkGeneral { - if (config.contains("scattercount")) scatterCount = config("scattercount") -} - -object CombineGVCFs { - def apply(root: Configurable, input: List[File], output: File): CombineGVCFs = { - val cg = new CombineGVCFs(root) - cg.variant = input - cg.o = output - return cg - } -} \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala deleted file mode 100644 index 1ad7e42e7..000000000 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala +++ /dev/null @@ -1,17 +0,0 @@ -package nl.lumc.sasc.biopet.extensions.gatk - -import java.io.File -import nl.lumc.sasc.biopet.core.config.Configurable - -class CombineVariants(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.CombineVariants with GatkGeneral { - if (config.contains("scattercount")) scatterCount = config("scattercount") -} - -object CombineVariants { - def apply(root: Configurable, input: List[File], output: File): CombineVariants = { - val cv = new CombineVariants(root) - cv.variant = input - cv.out = output - return cv - } -} \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GatkGeneral.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GatkGeneral.scala deleted file mode 100644 index 75f5f07a9..000000000 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GatkGeneral.scala +++ /dev/null @@ -1,18 +0,0 @@ -package nl.lumc.sasc.biopet.extensions.gatk - -import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction -import org.broadinstitute.gatk.queue.extensions.gatk.CommandLineGATK - -trait GatkGeneral extends CommandLineGATK with BiopetJavaCommandLineFunction { - memoryLimit = Option(3) - - if (config.contains("gatk_jar")) jarFile = config("gatk_jar") - - override val defaultVmem = "7G" - - if (config.contains("intervals", submodule = "gatk")) intervals = config("intervals", submodule = "gatk").asFileList - if (config.contains("exclude_intervals", submodule = "gatk")) excludeIntervals = config("exclude_intervals", submodule = "gatk").asFileList - reference_sequence = config("reference", submodule = "gatk") - gatk_key = config("gatk_key", submodule = "gatk") - if (config.contains("pedigree", submodule = "gatk")) pedigree = config("pedigree", submodule = "gatk").asFileList -} diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeGVCFs.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeGVCFs.scala deleted file mode 100644 index d24913d23..000000000 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/GenotypeGVCFs.scala +++ /dev/null @@ -1,28 +0,0 @@ -package nl.lumc.sasc.biopet.extensions.gatk - -import java.io.File -import nl.lumc.sasc.biopet.core.config.Configurable - -class GenotypeGVCFs(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.GenotypeGVCFs with GatkGeneral { - annotation ++= config("annotation", default = Seq("FisherStrand", "QualByDepth", "ChromosomeCounts")).asStringList - - if (config.contains("dbsnp")) dbsnp = config("dbsnp") - if (config.contains("scattercount", "genotypegvcfs")) scatterCount = config("scattercount") - - if (config("inputtype", default = "dna").asString == "rna") { - stand_call_conf = config("stand_call_conf", default = 20) - stand_emit_conf = config("stand_emit_conf", default = 0) - } else { - stand_call_conf = config("stand_call_conf", default = 30) - stand_emit_conf = config("stand_emit_conf", default = 0) - } -} - -object GenotypeGVCFs { - def apply(root: Configurable, gvcfFiles: List[File], output: File): GenotypeGVCFs = { - val gg = new GenotypeGVCFs(root) - gg.variant = gvcfFiles - gg.out = output - return gg - } -} \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala deleted file mode 100644 index 087bf08e4..000000000 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/HaplotypeCaller.scala +++ /dev/null @@ -1,48 +0,0 @@ -package nl.lumc.sasc.biopet.extensions.gatk - -import nl.lumc.sasc.biopet.core.config.Configurable -import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType - -class HaplotypeCaller(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.HaplotypeCaller with GatkGeneral { - override def afterGraph { - super.afterGraph - - min_mapping_quality_score = config("minMappingQualityScore", default = 20) - if (config.contains("scattercount")) scatterCount = config("scattercount") - if (config.contains("dbsnp")) this.dbsnp = config("dbsnp") - this.sample_ploidy = config("ploidy") - nct = config("threads", default = 1) - bamOutput = config("bamOutput") - memoryLimit = Option(nct.getOrElse(1) * 2) - if (config.contains("allSitePLs")) this.allSitePLs = config("allSitePLs") - if (config.contains("output_mode")) { - import org.broadinstitute.gatk.tools.walkers.genotyper.OutputMode._ - config("output_mode").asString match { - case "EMIT_ALL_CONFIDENT_SITES" => output_mode = EMIT_ALL_CONFIDENT_SITES - case "EMIT_ALL_SITES" => output_mode = EMIT_ALL_SITES - case "EMIT_VARIANTS_ONLY" => output_mode = EMIT_VARIANTS_ONLY - case e => logger.warn("output mode '" + e + "' does not exist") - } - } - - if (config("inputtype", default = "dna").asString == "rna") { - dontUseSoftClippedBases = config("dontusesoftclippedbases", default = true) - stand_call_conf = config("stand_call_conf", default = 5) - stand_emit_conf = config("stand_emit_conf", default = 0) - } else { - dontUseSoftClippedBases = config("dontusesoftclippedbases", default = false) - stand_call_conf = config("stand_call_conf", default = 5) - stand_emit_conf = config("stand_emit_conf", default = 0) - } - if (bamOutput != null && nct.getOrElse(1) > 1) { - nct = Option(1) - logger.warn("BamOutput is on, nct/threads is forced to set on 1, this option is only for debug") - } - } - - def useGvcf() { - emitRefConfidence = org.broadinstitute.gatk.tools.walkers.haplotypecaller.ReferenceConfidenceMode.GVCF - variant_index_type = GATKVCFIndexType.LINEAR - variant_index_parameter = config("variant_index_parameter", default = 128000) - } -} diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/IndelRealigner.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/IndelRealigner.scala deleted file mode 100644 index f9f06daac..000000000 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/IndelRealigner.scala +++ /dev/null @@ -1,18 +0,0 @@ -package nl.lumc.sasc.biopet.extensions.gatk - -import java.io.File -import nl.lumc.sasc.biopet.core.config.Configurable - -class IndelRealigner(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.IndelRealigner with GatkGeneral { - if (config.contains("scattercount")) scatterCount = config("scattercount") -} - -object IndelRealigner { - def apply(root: Configurable, input: File, targetIntervals: File, outputDir: String): IndelRealigner = { - val ir = new IndelRealigner(root) - ir.input_file :+= input - ir.targetIntervals = targetIntervals - ir.out = new File(outputDir, input.getName.stripSuffix(".bam") + ".realign.bam") - return ir - } -} \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/PrintReads.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/PrintReads.scala deleted file mode 100644 index 6f039932b..000000000 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/PrintReads.scala +++ /dev/null @@ -1,21 +0,0 @@ -package nl.lumc.sasc.biopet.extensions.gatk - -import java.io.File -import nl.lumc.sasc.biopet.core.config.Configurable - -class PrintReads(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.PrintReads with GatkGeneral { - memoryLimit = Option(4) - - override val defaultVmem = "8G" - - if (config.contains("scattercount")) scatterCount = config("scattercount") -} - -object PrintReads { - def apply(root: Configurable, input: File, output: File): PrintReads = { - val br = new PrintReads(root) - br.input_file :+= input - br.out = output - return br - } -} \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/RealignerTargetCreator.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/RealignerTargetCreator.scala deleted file mode 100644 index 7377389a0..000000000 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/RealignerTargetCreator.scala +++ /dev/null @@ -1,22 +0,0 @@ -package nl.lumc.sasc.biopet.extensions.gatk - -import java.io.File -import nl.lumc.sasc.biopet.core.config.Configurable - -class RealignerTargetCreator(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.RealignerTargetCreator with GatkGeneral { - override val defaultVmem = "6G" - memoryLimit = Some(2.5) - - if (config.contains("scattercount")) scatterCount = config("scattercount") - - if (config.contains("known")) known ++= config("known").asFileList -} - -object RealignerTargetCreator { - def apply(root: Configurable, input: File, outputDir: String): RealignerTargetCreator = { - val re = new RealignerTargetCreator(root) - re.input_file :+= input - re.out = new File(outputDir, input.getName.stripSuffix(".bam") + ".realign.intervals") - return re - } -} \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala deleted file mode 100644 index cd056c429..000000000 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/SelectVariants.scala +++ /dev/null @@ -1,17 +0,0 @@ -package nl.lumc.sasc.biopet.extensions.gatk - -import java.io.File -import nl.lumc.sasc.biopet.core.config.Configurable - -class SelectVariants(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.SelectVariants with GatkGeneral { - if (config.contains("scattercount")) scatterCount = config("scattercount") -} - -object SelectVariants { - def apply(root: Configurable, input: File, output: File): SelectVariants = { - val sv = new SelectVariants(root) - sv.variant = input - sv.out = output - return sv - } -} \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/UnifiedGenotyper.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/UnifiedGenotyper.scala deleted file mode 100644 index 255fb100a..000000000 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/UnifiedGenotyper.scala +++ /dev/null @@ -1,34 +0,0 @@ -package nl.lumc.sasc.biopet.extensions.gatk - -import nl.lumc.sasc.biopet.core.config.Configurable - -class UnifiedGenotyper(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.UnifiedGenotyper with GatkGeneral { - override def afterGraph { - super.afterGraph - - genotype_likelihoods_model = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypeLikelihoodsCalculationModel.Model.BOTH - if (config.contains("scattercount")) scatterCount = config("scattercount") - if (config.contains("dbsnp")) this.dbsnp = config("dbsnp") - this.sample_ploidy = config("ploidy") - nct = config("threads", default = 1) - memoryLimit = Option(nct.getOrElse(1) * 2) - if (config.contains("allSitePLs")) this.allSitePLs = config("allSitePLs") - if (config.contains("output_mode")) { - import org.broadinstitute.gatk.tools.walkers.genotyper.OutputMode._ - config("output_mode").asString match { - case "EMIT_ALL_CONFIDENT_SITES" => output_mode = EMIT_ALL_CONFIDENT_SITES - case "EMIT_ALL_SITES" => output_mode = EMIT_ALL_SITES - case "EMIT_VARIANTS_ONLY" => output_mode = EMIT_VARIANTS_ONLY - case e => logger.warn("output mode '" + e + "' does not exist") - } - } - - if (config("inputtype", default = "dna").asString == "rna") { - stand_call_conf = config("stand_call_conf", default = 5) - stand_emit_conf = config("stand_emit_conf", default = 0) - } else { - stand_call_conf = config("stand_call_conf", default = 5) - stand_emit_conf = config("stand_emit_conf", default = 0) - } - } -} diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantAnnotator.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantAnnotator.scala deleted file mode 100644 index 3a829421c..000000000 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantAnnotator.scala +++ /dev/null @@ -1,19 +0,0 @@ -package nl.lumc.sasc.biopet.extensions.gatk - -import java.io.File -import nl.lumc.sasc.biopet.core.config.Configurable - -class VariantAnnotator(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.VariantAnnotator with GatkGeneral { - if (config.contains("scattercount")) scatterCount = config("scattercount") - dbsnp = config("dbsnp") -} - -object VariantAnnotator { - def apply(root: Configurable, input: File, bamFiles: List[File], output: File): VariantAnnotator = { - val va = new VariantAnnotator(root) - va.variant = input - va.input_file = bamFiles - va.out = output - return va - } -} \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantEval.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantEval.scala deleted file mode 100644 index 798771801..000000000 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantEval.scala +++ /dev/null @@ -1,37 +0,0 @@ -package nl.lumc.sasc.biopet.extensions.gatk - -import java.io.File -import nl.lumc.sasc.biopet.core.config.Configurable - -class VariantEval(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.VariantEval with GatkGeneral { - override def afterGraph { - super.afterGraph - } -} - -object VariantEval { - def apply(root: Configurable, sample: File, compareWith: File, - output: File): VariantEval = { - val vareval = new VariantEval(root) - vareval.eval = Seq(sample) - vareval.comp = Seq(compareWith) - vareval.out = output - vareval.afterGraph - return vareval - } - - def apply(root: Configurable, sample: File, compareWith: File, - output: File, ST: Seq[String], EV: Seq[String]): VariantEval = { - val vareval = new VariantEval(root) - vareval.eval = Seq(sample) - vareval.comp = Seq(compareWith) - vareval.out = output - vareval.noST = true - vareval.ST = ST - vareval.noEV = true - vareval.EV = EV - vareval.afterGraph - return vareval - } - -} \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantRecalibrator.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantRecalibrator.scala deleted file mode 100644 index b778dd4de..000000000 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/VariantRecalibrator.scala +++ /dev/null @@ -1,40 +0,0 @@ -package nl.lumc.sasc.biopet.extensions.gatk - -import java.io.File -import nl.lumc.sasc.biopet.core.config.Configurable -import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile - -class VariantRecalibrator(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.VariantRecalibrator with GatkGeneral { - nt = Option(getThreads(4)) - memoryLimit = Option(nt.getOrElse(1) * 2) - - if (config.contains("dbsnp")) resource :+= new TaggedFile(config("dbsnp").asString, "known=true,training=false,truth=false,prior=2.0") - - an = config("annotation", default = List("QD", "DP", "FS", "ReadPosRankSum", "MQRankSum")).asStringList - minNumBadVariants = config("minnumbadvariants") - maxGaussians = config("maxgaussians") -} - -object VariantRecalibrator { - def apply(root: Configurable, input: File, recal_file: File, tranches_file: File, indel: Boolean = false): VariantRecalibrator = { - val vr = new VariantRecalibrator(root) { - override lazy val configName = "variantrecalibrator" - override def configPath: List[String] = (if (indel) "indel" else "snp") :: super.configPath - if (indel) { - mode = org.broadinstitute.gatk.tools.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.INDEL - defaults ++= Map("ts_filter_level" -> 99.0) - if (config.contains("mills")) resource :+= new TaggedFile(config("mills").asString, "known=false,training=true,truth=true,prior=12.0") - } else { - mode = org.broadinstitute.gatk.tools.walkers.variantrecalibration.VariantRecalibratorArgumentCollection.Mode.SNP - defaults ++= Map("ts_filter_level" -> 99.5) - if (config.contains("hapmap")) resource +:= new TaggedFile(config("hapmap").asString, "known=false,training=true,truth=true,prior=15.0") - if (config.contains("omni")) resource +:= new TaggedFile(config("omni").asString, "known=false,training=true,truth=true,prior=12.0") - if (config.contains("1000G")) resource +:= new TaggedFile(config("1000G").asString, "known=false,training=true,truth=false,prior=10.0") - } - } - vr.input :+= input - vr.recal_file = recal_file - vr.tranches_file = tranches_file - return vr - } -} \ No newline at end of file diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala deleted file mode 100644 index 874890746..000000000 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/Basty.scala +++ /dev/null @@ -1,147 +0,0 @@ -package nl.lumc.sasc.biopet.pipelines.basty - -import java.io.File -import nl.lumc.sasc.biopet.core.MultiSampleQScript -import nl.lumc.sasc.biopet.core.PipelineCommand -import nl.lumc.sasc.biopet.core.config.Configurable -import nl.lumc.sasc.biopet.extensions.Cat -import nl.lumc.sasc.biopet.extensions.Raxml -import nl.lumc.sasc.biopet.pipelines.gatk.GatkPipeline -import nl.lumc.sasc.biopet.tools.BastyGenerateFasta -import org.broadinstitute.gatk.queue.QScript - -class Basty(val root: Configurable) extends QScript with MultiSampleQScript { - def this() = this(null) - - class LibraryOutput extends AbstractLibraryOutput { - } - - case class FastaOutput(variants: File, consensus: File, consensusVariants: File) - class SampleOutput extends AbstractSampleOutput { - var output: FastaOutput = _ - var outputSnps: FastaOutput = _ - } - - defaults ++= Map("ploidy" -> 1, "use_haplotypecaller" -> false, "use_unifiedgenotyper" -> true, "joint_variantcalling" -> true) - - var gatkPipeline: GatkPipeline = new GatkPipeline(this) - gatkPipeline.jointVariantcalling = true - - def init() { - gatkPipeline.outputDir = outputDir - gatkPipeline.init - } - - def biopetScript() { - gatkPipeline.biopetScript - addAll(gatkPipeline.functions) - - val refVariants = addGenerateFasta(null, outputDir + "reference/", outputName = "reference") - val refVariantSnps = addGenerateFasta(null, outputDir + "reference/", outputName = "reference", snpsOnly = true) - - runSamplesJobs() - - val catVariants = Cat(this, refVariants.variants :: samplesOutput.map(_._2.output.variants).toList, outputDir + "fastas/variant.fasta") - add(catVariants) - val catVariantsSnps = Cat(this, refVariantSnps.variants :: samplesOutput.map(_._2.outputSnps.variants).toList, outputDir + "fastas/variant.snps_only.fasta") - add(catVariantsSnps) - - val catConsensus = Cat(this, refVariants.consensus :: samplesOutput.map(_._2.output.consensus).toList, outputDir + "fastas/consensus.fasta") - add(catConsensus) - val catConsensusSnps = Cat(this, refVariantSnps.consensus :: samplesOutput.map(_._2.outputSnps.consensus).toList, outputDir + "fastas/consensus.snps_only.fasta") - add(catConsensusSnps) - - val catConsensusVariants = Cat(this, refVariants.consensusVariants :: samplesOutput.map(_._2.output.consensusVariants).toList, outputDir + "fastas/consensus.variant.fasta") - add(catConsensusVariants) - val catConsensusVariantsSnps = Cat(this, refVariantSnps.consensusVariants :: samplesOutput.map(_._2.outputSnps.consensusVariants).toList, outputDir + "fastas/consensus.variant.snps_only.fasta") - add(catConsensusVariantsSnps) - - val seed: Int = config("seed", default = 12345) - def addRaxml(input: File, outputDir: String, outputName: String) { - val raxmlMl = new Raxml(this) - raxmlMl.input = input - raxmlMl.m = config("raxml_ml_model", default = "GTRGAMMAX") - raxmlMl.p = seed - raxmlMl.n = outputName + "_ml" - raxmlMl.w = outputDir - raxmlMl.N = config("ml_runs", default = 20, submodule = "raxml") - add(raxmlMl) - - val r = new scala.util.Random(seed) - val numBoot = config("boot_runs", default = 100, submodule = "raxml").asInt - val bootList = for (t <- 0 until numBoot) yield { - val raxmlBoot = new Raxml(this) - raxmlBoot.threads = 1 - raxmlBoot.input = input - raxmlBoot.m = config("raxml_ml_model", default = "GTRGAMMAX") - raxmlBoot.p = seed - raxmlBoot.b = math.abs(r.nextInt) - raxmlBoot.w = outputDir - raxmlBoot.N = 1 - raxmlBoot.n = outputName + "_boot_" + t - add(raxmlBoot) - raxmlBoot.getBootstrapFile - } - - val cat = Cat(this, bootList.toList, outputDir + "/boot_list") - add(cat) - - val raxmlBi = new Raxml(this) - raxmlBi.input = input - raxmlBi.t = raxmlMl.getBestTreeFile - raxmlBi.z = cat.output - raxmlBi.m = config("raxml_ml_model", default = "GTRGAMMAX") - raxmlBi.p = seed - raxmlBi.f = "b" - raxmlBi.n = outputName + "_bi" - raxmlBi.w = outputDir - add(raxmlBi) - } - - addRaxml(catVariantsSnps.output, outputDir + "raxml", "snps") - } - - // Called for each sample - def runSingleSampleJobs(sampleConfig: Map[String, Any]): SampleOutput = { - val sampleOutput = new SampleOutput - val sampleID: String = sampleConfig("ID").toString - val sampleDir = globalSampleDir + sampleID + "/" - - sampleOutput.libraries = runLibraryJobs(sampleConfig) - - sampleOutput.output = addGenerateFasta(sampleID, sampleDir) - sampleOutput.outputSnps = addGenerateFasta(sampleID, sampleDir, snpsOnly = true) - - return sampleOutput - } - - // Called for each run from a sample - def runSingleLibraryJobs(runConfig: Map[String, Any], sampleConfig: Map[String, Any]): LibraryOutput = { - val libraryOutput = new LibraryOutput - - val runID: String = runConfig("ID").toString - val sampleID: String = sampleConfig("ID").toString - val runDir: String = globalSampleDir + sampleID + "/run_" + runID + "/" - - return libraryOutput - } - - def addGenerateFasta(sampleName: String, outputDir: String, outputName: String = null, - snpsOnly: Boolean = false): FastaOutput = { - val bastyGenerateFasta = new BastyGenerateFasta(this) - bastyGenerateFasta.outputName = if (outputName != null) outputName else sampleName - bastyGenerateFasta.inputVcf = gatkPipeline.multisampleVariantcalling.scriptOutput.finalVcfFile - if (gatkPipeline.samplesOutput.contains(sampleName)) { - bastyGenerateFasta.bamFile = gatkPipeline.samplesOutput(sampleName).variantcalling.bamFiles.head - } - bastyGenerateFasta.outputVariants = outputDir + bastyGenerateFasta.outputName + ".variants" + (if (snpsOnly) ".snps_only" else "") + ".fasta" - bastyGenerateFasta.outputConsensus = outputDir + bastyGenerateFasta.outputName + ".consensus" + (if (snpsOnly) ".snps_only" else "") + ".fasta" - bastyGenerateFasta.outputConsensusVariants = outputDir + bastyGenerateFasta.outputName + ".consensus_variants" + (if (snpsOnly) ".snps_only" else "") + ".fasta" - bastyGenerateFasta.sampleName = sampleName - bastyGenerateFasta.snpsOnly = snpsOnly - add(bastyGenerateFasta) - return FastaOutput(bastyGenerateFasta.outputVariants, bastyGenerateFasta.outputConsensus, bastyGenerateFasta.outputConsensusVariants) - } -} - -object Basty extends PipelineCommand diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkBenchmarkGenotyping.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkBenchmarkGenotyping.scala deleted file mode 100644 index 8d7158f5a..000000000 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkBenchmarkGenotyping.scala +++ /dev/null @@ -1,58 +0,0 @@ -package nl.lumc.sasc.biopet.pipelines.gatk - -import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand } -import nl.lumc.sasc.biopet.core.config.Configurable -import org.broadinstitute.gatk.queue.QScript -import org.broadinstitute.gatk.utils.commandline.{ Input, Argument } -import scala.util.Random - -class GatkBenchmarkGenotyping(val root: Configurable) extends QScript with BiopetQScript { - def this() = this(null) - - @Input(doc = "Sample gvcf file") - var sampleGvcf: File = _ - - @Argument(doc = "SampleName", required = true) - var sampleName: String = _ - - @Input(doc = "Gvcf files", shortName = "I", required = false) - var gvcfFiles: List[File] = Nil - - var reference: File = config("reference") - - @Argument(doc = "Dbsnp", shortName = "dbsnp", required = false) - var dbsnp: File = config("dbsnp") - - def init() { - if (config.contains("gvcffiles")) for (file <- config("gvcffiles").asList) { - gvcfFiles ::= file.toString - } - if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module") - else if (!outputDir.endsWith("/")) outputDir += "/" - } - - def biopetScript() { - var todoGvcfs = gvcfFiles - var gvcfPool: List[File] = Nil - addGenotypingPipeline(gvcfPool) - - while (todoGvcfs.size > 0) { - val index = Random.nextInt(todoGvcfs.size) - gvcfPool ::= todoGvcfs(index) - addGenotypingPipeline(gvcfPool) - todoGvcfs = todoGvcfs.filter(b => b != todoGvcfs(index)) - } - } - - def addGenotypingPipeline(gvcfPool: List[File]) { - val gatkGenotyping = new GatkGenotyping(this) - gatkGenotyping.inputGvcfs = sampleGvcf :: gvcfPool - gatkGenotyping.samples :+= sampleName - gatkGenotyping.outputDir = outputDir + "samples_" + gvcfPool.size + "/" - gatkGenotyping.init - gatkGenotyping.biopetScript - addAll(gatkGenotyping.functions) - } -} - -object GatkBenchmarkGenotyping extends PipelineCommand diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkGenotyping.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkGenotyping.scala deleted file mode 100644 index 5ed4f0629..000000000 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkGenotyping.scala +++ /dev/null @@ -1,58 +0,0 @@ -package nl.lumc.sasc.biopet.pipelines.gatk - -import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand } -import nl.lumc.sasc.biopet.core.config.Configurable -import nl.lumc.sasc.biopet.extensions.gatk.{ GenotypeGVCFs, SelectVariants } -import org.broadinstitute.gatk.queue.QScript -import org.broadinstitute.gatk.utils.commandline.{ Input, Output, Argument } - -class GatkGenotyping(val root: Configurable) extends QScript with BiopetQScript { - def this() = this(null) - - @Input(doc = "Gvcf files", shortName = "I") - var inputGvcfs: List[File] = Nil - - @Argument(doc = "Reference", shortName = "R", required = false) - var reference: File = config("reference") - - @Argument(doc = "Dbsnp", shortName = "dbsnp", required = false) - var dbsnp: File = config("dbsnp") - - @Argument(doc = "OutputName", required = false) - var outputName: String = "genotype" - - @Output(doc = "OutputFile", shortName = "O", required = false) - var outputFile: File = _ - - @Argument(doc = "Samples", shortName = "sample", required = false) - var samples: List[String] = Nil - - def init() { - if (outputFile == null) outputFile = outputDir + outputName + ".vcf.gz" - if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module") - else if (!outputDir.endsWith("/")) outputDir += "/" - } - - def biopetScript() { - addGenotypeGVCFs(inputGvcfs, outputFile) - if (!samples.isEmpty) { - if (samples.size > 1) addSelectVariants(outputFile, samples, outputDir + "samples/", "all") - for (sample <- samples) addSelectVariants(outputFile, List(sample), outputDir + "samples/", sample) - } - } - - def addGenotypeGVCFs(gvcfFiles: List[File], outputFile: File): File = { - val genotypeGVCFs = GenotypeGVCFs(this, gvcfFiles, outputFile) - add(genotypeGVCFs) - return genotypeGVCFs.out - } - - def addSelectVariants(inputFile: File, samples: List[String], outputDir: String, name: String) { - val selectVariants = SelectVariants(this, inputFile, outputDir + name + ".vcf.gz") - selectVariants.excludeNonVariants = true - for (sample <- samples) selectVariants.sample_name :+= sample - add(selectVariants) - } -} - -object GatkGenotyping extends PipelineCommand diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala deleted file mode 100644 index 39d7465b0..000000000 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala +++ /dev/null @@ -1,237 +0,0 @@ -package nl.lumc.sasc.biopet.pipelines.gatk - -import nl.lumc.sasc.biopet.core.MultiSampleQScript -import nl.lumc.sasc.biopet.core.PipelineCommand -import nl.lumc.sasc.biopet.core.config.Configurable -import htsjdk.samtools.SamReaderFactory -import scala.collection.JavaConversions._ -import java.io.File -import nl.lumc.sasc.biopet.extensions.gatk.{ CombineVariants, CombineGVCFs } -import nl.lumc.sasc.biopet.extensions.picard.AddOrReplaceReadGroups -import nl.lumc.sasc.biopet.extensions.picard.SamToFastq -import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics -import nl.lumc.sasc.biopet.pipelines.mapping.Mapping -import org.broadinstitute.gatk.queue.QScript -import org.broadinstitute.gatk.utils.commandline.{ Argument } - -class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScript { - def this() = this(null) - - @Argument(doc = "Only Sample", shortName = "sample", required = false) - val onlySample: List[String] = Nil - - @Argument(doc = "Skip Genotyping step", shortName = "skipgenotyping", required = false) - var skipGenotyping: Boolean = false - - @Argument(doc = "Merge gvcfs", shortName = "mergegvcfs", required = false) - var mergeGvcfs: Boolean = false - - @Argument(doc = "Joint variantcalling", shortName = "jointVariantCalling", required = false) - var jointVariantcalling: Boolean = config("joint_variantcalling", default = false) - - @Argument(doc = "Joint genotyping", shortName = "jointGenotyping", required = false) - var jointGenotyping: Boolean = config("joint_genotyping", default = false) - - var singleSampleCalling = config("single_sample_calling", default = true) - var reference: File = config("reference", required = true) - var dbsnp: File = config("dbsnp") - var gvcfFiles: List[File] = Nil - var finalBamFiles: List[File] = Nil - var useAllelesOption: Boolean = config("use_alleles_option", default = false) - - class LibraryOutput extends AbstractLibraryOutput { - var mappedBamFile: File = _ - var variantcalling: GatkVariantcalling.ScriptOutput = _ - } - - class SampleOutput extends AbstractSampleOutput { - var variantcalling: GatkVariantcalling.ScriptOutput = _ - } - - def init() { - if (config.contains("target_bed")) { - defaults ++= Map("gatk" -> Map(("intervals" -> config("target_bed").asStringList))) - } - if (config.contains("gvcfFiles")) - for (file <- config("gvcfFiles").asList) - gvcfFiles :+= file.toString - if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module") - else if (!outputDir.endsWith("/")) outputDir += "/" - } - - val multisampleVariantcalling = new GatkVariantcalling(this) { - override protected lazy val configName = "gatkvariantcalling" - override def configPath: List[String] = "multisample" :: super.configPath - } - - def biopetScript() { - if (onlySample.isEmpty) { - runSamplesJobs - - //SampleWide jobs - if (mergeGvcfs && gvcfFiles.size > 0) { - val newFile = outputDir + "merged.gvcf.vcf.gz" - add(CombineGVCFs(this, gvcfFiles, newFile)) - gvcfFiles = List(newFile) - } - - if (!skipGenotyping && gvcfFiles.size > 0) { - if (jointGenotyping) { - val gatkGenotyping = new GatkGenotyping(this) - gatkGenotyping.inputGvcfs = gvcfFiles - gatkGenotyping.outputDir = outputDir + "genotyping/" - gatkGenotyping.init - gatkGenotyping.biopetScript - addAll(gatkGenotyping.functions) - var vcfFile = gatkGenotyping.outputFile - } - } else logger.warn("No gVCFs to genotype") - - if (jointVariantcalling) { - val allBamfiles = for ( - (sampleID, sampleOutput) <- samplesOutput; - file <- sampleOutput.variantcalling.bamFiles - ) yield file - val allRawVcfFiles = for ((sampleID, sampleOutput) <- samplesOutput) yield sampleOutput.variantcalling.rawFilterVcfFile - - val gatkVariantcalling = new GatkVariantcalling(this) { - override protected lazy val configName = "gatkvariantcalling" - override def configPath: List[String] = "multisample" :: super.configPath - } - - if (gatkVariantcalling.useMpileup) { - val cvRaw = CombineVariants(this, allRawVcfFiles.toList, outputDir + "variantcalling/multisample.raw.vcf.gz") - add(cvRaw) - gatkVariantcalling.rawVcfInput = cvRaw.out - } - - multisampleVariantcalling.preProcesBams = false - multisampleVariantcalling.doublePreProces = false - multisampleVariantcalling.inputBams = allBamfiles.toList - multisampleVariantcalling.outputDir = outputDir + "variantcalling" - multisampleVariantcalling.outputName = "multisample" - multisampleVariantcalling.init - multisampleVariantcalling.biopetScript - addAll(multisampleVariantcalling.functions) - - if (config("inputtype", default = "dna").asString != "rna" && config("recalibration", default = false).asBoolean) { - val recalibration = new GatkVariantRecalibration(this) - recalibration.inputVcf = multisampleVariantcalling.scriptOutput.finalVcfFile - recalibration.bamFiles = finalBamFiles - recalibration.outputDir = outputDir + "recalibration/" - recalibration.init - recalibration.biopetScript - } - } - } else for (sample <- onlySample) runSingleSampleJobs(sample) - } - - // Called for each sample - def runSingleSampleJobs(sampleConfig: Map[String, Any]): SampleOutput = { - val sampleOutput = new SampleOutput - var libraryBamfiles: List[File] = List() - val sampleID: String = sampleConfig("ID").toString - sampleOutput.libraries = runLibraryJobs(sampleConfig) - val sampleDir = globalSampleDir + sampleID - for ((libraryID, libraryOutput) <- sampleOutput.libraries) { - libraryBamfiles ++= libraryOutput.variantcalling.bamFiles - } - - if (libraryBamfiles.size > 0) { - finalBamFiles ++= libraryBamfiles - val gatkVariantcalling = new GatkVariantcalling(this) - gatkVariantcalling.inputBams = libraryBamfiles - gatkVariantcalling.outputDir = sampleDir + "/variantcalling/" - gatkVariantcalling.preProcesBams = false - if (!singleSampleCalling) { - gatkVariantcalling.useHaplotypecaller = false - gatkVariantcalling.useUnifiedGenotyper = false - } - gatkVariantcalling.sampleID = sampleID - gatkVariantcalling.init - gatkVariantcalling.biopetScript - addAll(gatkVariantcalling.functions) - sampleOutput.variantcalling = gatkVariantcalling.scriptOutput - gvcfFiles :+= gatkVariantcalling.scriptOutput.gvcfFile - } else logger.warn("No bamfiles for variant calling for sample: " + sampleID) - return sampleOutput - } - - // Called for each run from a sample - def runSingleLibraryJobs(runConfig: Map[String, Any], sampleConfig: Map[String, Any]): LibraryOutput = { - val libraryOutput = new LibraryOutput - val runID: String = runConfig("ID").toString - val sampleID: String = sampleConfig("ID").toString - val runDir: String = globalSampleDir + sampleID + "/run_" + runID + "/" - var inputType = "" - if (runConfig.contains("inputtype")) inputType = runConfig("inputtype").toString - else inputType = config("inputtype", default = "dna").toString - if (runConfig.contains("R1")) { - val mapping = Mapping.loadFromLibraryConfig(this, runConfig, sampleConfig, runDir) - addAll(mapping.functions) // Add functions of mapping to curent function pool - libraryOutput.mappedBamFile = mapping.outputFiles("finalBamFile") - } else if (runConfig.contains("bam")) { - var bamFile = new File(runConfig("bam").toString) - if (!bamFile.exists) throw new IllegalStateException("Bam in config does not exist, file: " + bamFile) - - if (config("bam_to_fastq", default = false).asBoolean) { - val samToFastq = SamToFastq(this, bamFile, runDir + sampleID + "-" + runID + ".R1.fastq", - runDir + sampleID + "-" + runID + ".R2.fastq") - add(samToFastq, isIntermediate = true) - val mapping = Mapping.loadFromLibraryConfig(this, runConfig, sampleConfig, runDir, startJobs = false) - mapping.input_R1 = samToFastq.fastqR1 - mapping.input_R2 = samToFastq.fastqR2 - mapping.init - mapping.biopetScript - addAll(mapping.functions) // Add functions of mapping to curent function pool - libraryOutput.mappedBamFile = mapping.outputFiles("finalBamFile") - } else { - var readGroupOke = true - val inputSam = SamReaderFactory.makeDefault.open(bamFile) - val header = inputSam.getFileHeader.getReadGroups - for (readGroup <- inputSam.getFileHeader.getReadGroups) { - if (readGroup.getSample != sampleID) logger.warn("Sample ID readgroup in bam file is not the same") - if (readGroup.getLibrary != runID) logger.warn("Library ID readgroup in bam file is not the same") - if (readGroup.getSample != sampleID || readGroup.getLibrary != runID) readGroupOke = false - } - inputSam.close - - if (!readGroupOke) { - if (config("correct_readgroups", default = false)) { - logger.info("Correcting readgroups, file:" + bamFile) - val aorrg = AddOrReplaceReadGroups(this, bamFile, new File(runDir + sampleID + "-" + runID + ".bam")) - aorrg.RGID = sampleID + "-" + runID - aorrg.RGLB = runID - aorrg.RGSM = sampleID - if (runConfig.contains("PL")) aorrg.RGPL = runConfig("PL").toString - else aorrg.RGPL = "illumina" - if (runConfig.contains("PU")) aorrg.RGPU = runConfig("PU").toString - else aorrg.RGPU = "na" - if (runConfig.contains("CN")) aorrg.RGCN = runConfig("CN").toString - add(aorrg, isIntermediate = true) - bamFile = aorrg.output - } else throw new IllegalStateException("Readgroup sample and/or library of input bamfile is not correct, file: " + bamFile + - "\nPossible to set 'correct_readgroups' to true on config to automatic fix this") - } - addAll(BamMetrics(this, bamFile, runDir + "metrics/").functions) - - libraryOutput.mappedBamFile = bamFile - } - } else logger.error("Sample: " + sampleID + ": No R1 found for run: " + runConfig) - - val gatkVariantcalling = new GatkVariantcalling(this) - gatkVariantcalling.inputBams = List(libraryOutput.mappedBamFile) - gatkVariantcalling.outputDir = runDir - gatkVariantcalling.variantcalling = config("library_variantcalling", default = false) - gatkVariantcalling.preProcesBams = true - gatkVariantcalling.sampleID = sampleID - gatkVariantcalling.init - gatkVariantcalling.biopetScript - addAll(gatkVariantcalling.functions) - libraryOutput.variantcalling = gatkVariantcalling.scriptOutput - - return libraryOutput - } -} - -object GatkPipeline extends PipelineCommand diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantRecalibration.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantRecalibration.scala deleted file mode 100644 index d5dcdb9f2..000000000 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantRecalibration.scala +++ /dev/null @@ -1,76 +0,0 @@ -package nl.lumc.sasc.biopet.pipelines.gatk - -import nl.lumc.sasc.biopet.core.BiopetQScript -import nl.lumc.sasc.biopet.core.PipelineCommand -import nl.lumc.sasc.biopet.core.config.Configurable -import nl.lumc.sasc.biopet.extensions.gatk.ApplyRecalibration -import nl.lumc.sasc.biopet.extensions.gatk.VariantAnnotator -import nl.lumc.sasc.biopet.extensions.gatk.VariantRecalibrator -import org.broadinstitute.gatk.queue.QScript - -class GatkVariantRecalibration(val root: Configurable) extends QScript with BiopetQScript { - def this() = this(null) - - @Input(doc = "input vcf file", shortName = "I") - var inputVcf: File = _ - - @Input(doc = "input vcf file", shortName = "BAM", required = false) - var bamFiles: List[File] = Nil - - @Output(doc = "output vcf file", shortName = "out") - var outputVcf: File = _ - - def init() { - if (inputVcf == null) throw new IllegalStateException("Missing Output directory on gatk module") - if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module") - else if (!outputDir.endsWith("/")) outputDir += "/" - } - - def biopetScript() { - var vcfFile: File = if (!bamFiles.isEmpty) addVariantAnnotator(inputVcf, bamFiles, outputDir) else inputVcf - vcfFile = addSnpVariantRecalibrator(vcfFile, outputDir) - vcfFile = addIndelVariantRecalibrator(vcfFile, outputDir) - } - - def addSnpVariantRecalibrator(inputVcf: File, dir: String): File = { - val snpRecal = VariantRecalibrator(this, inputVcf, swapExt(dir, inputVcf, ".vcf", ".indel.recal"), - swapExt(dir, inputVcf, ".vcf", ".indel.tranches"), indel = false) - if (!snpRecal.resource.isEmpty) { - add(snpRecal) - - val snpApply = ApplyRecalibration(this, inputVcf, swapExt(dir, inputVcf, ".vcf", ".indel.recal.vcf"), - snpRecal.recal_file, snpRecal.tranches_file, indel = false) - add(snpApply) - - return snpApply.out - } else { - logger.warn("Skipped snp Recalibration, resource is missing") - return inputVcf - } - } - - def addIndelVariantRecalibrator(inputVcf: File, dir: String): File = { - val indelRecal = VariantRecalibrator(this, inputVcf, swapExt(dir, inputVcf, ".vcf", ".indel.recal"), - swapExt(dir, inputVcf, ".vcf", ".indel.tranches"), indel = true) - if (!indelRecal.resource.isEmpty) { - add(indelRecal) - - val indelApply = ApplyRecalibration(this, inputVcf, swapExt(dir, inputVcf, ".vcf", ".indel.recal.vcf"), - indelRecal.recal_file, indelRecal.tranches_file, indel = true) - add(indelApply) - - return indelApply.out - } else { - logger.warn("Skipped indel Recalibration, resource is missing") - return inputVcf - } - } - - def addVariantAnnotator(inputvcf: File, bamfiles: List[File], dir: String): File = { - val variantAnnotator = VariantAnnotator(this, inputvcf, bamfiles, swapExt(dir, inputvcf, ".vcf", ".anotated.vcf")) - add(variantAnnotator) - return variantAnnotator.out - } -} - -object GatkVariantRecalibration extends PipelineCommand diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala deleted file mode 100644 index d34466a00..000000000 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala +++ /dev/null @@ -1,244 +0,0 @@ -package nl.lumc.sasc.biopet.pipelines.gatk - -import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand } -import java.io.File -import nl.lumc.sasc.biopet.tools.{ MpileupToVcf, VcfFilter, MergeAlleles } -import nl.lumc.sasc.biopet.core.config.Configurable -import nl.lumc.sasc.biopet.extensions.gatk.{ AnalyzeCovariates, BaseRecalibrator, GenotypeGVCFs, HaplotypeCaller, IndelRealigner, PrintReads, RealignerTargetCreator, SelectVariants, CombineVariants, UnifiedGenotyper } -import nl.lumc.sasc.biopet.extensions.picard.MarkDuplicates -import org.broadinstitute.gatk.queue.QScript -import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile -import org.broadinstitute.gatk.utils.commandline.{ Input, Argument } -import scala.collection.SortedMap -import scala.language.reflectiveCalls - -class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScript { - def this() = this(null) - - val scriptOutput = new GatkVariantcalling.ScriptOutput - - @Input(doc = "Bam files (should be deduped bams)", shortName = "BAM") - var inputBams: List[File] = Nil - - @Input(doc = "Raw vcf file", shortName = "raw") - var rawVcfInput: File = _ - - @Argument(doc = "Reference", shortName = "R", required = false) - var reference: File = config("reference", required = true) - - @Argument(doc = "Dbsnp", shortName = "dbsnp", required = false) - var dbsnp: File = config("dbsnp") - - @Argument(doc = "OutputName", required = false) - var outputName: String = _ - - @Argument(doc = "Sample name", required = false) - var sampleID: String = _ - - var preProcesBams: Option[Boolean] = config("pre_proces_bams", default = true) - var variantcalling: Boolean = true - var doublePreProces: Option[Boolean] = config("double_pre_proces", default = true) - var useHaplotypecaller: Option[Boolean] = config("use_haplotypecaller", default = true) - var useUnifiedGenotyper: Option[Boolean] = config("use_unifiedgenotyper", default = false) - var useAllelesOption: Option[Boolean] = config("use_alleles_option", default = false) - var useMpileup: Boolean = config("use_mpileup", default = true) - var useIndelRealigner: Boolean = config("use_indel_realign", default = true) - var useBaseRecalibration: Boolean = config("use_base_recalibration", default = true) - - def init() { - if (outputName == null && sampleID != null) outputName = sampleID - else if (outputName == null) outputName = "noname" - if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module") - else if (!outputDir.endsWith("/")) outputDir += "/" - - val baseRecalibrator = new BaseRecalibrator(this) - if (preProcesBams && useBaseRecalibration && baseRecalibrator.knownSites.isEmpty) { - logger.warn("No Known site found, skipping base recalibration") - useBaseRecalibration = false - } - } - - private def doublePreProces(files: List[File]): List[File] = { - if (files.size == 1) return files - if (files.isEmpty) throw new IllegalStateException("Files can't be empty") - if (!doublePreProces.get) return files - val markDup = MarkDuplicates(this, files, new File(outputDir + outputName + ".dedup.bam")) - add(markDup, isIntermediate = useIndelRealigner) - if (useIndelRealigner) { - List(addIndelRealign(markDup.output, outputDir, isIntermediate = false)) - } else { - List(markDup.output) - } - } - - def biopetScript() { - scriptOutput.bamFiles = if (preProcesBams.get) { - var bamFiles: List[File] = Nil - for (inputBam <- inputBams) { - var bamFile = inputBam - if (useIndelRealigner) { - bamFile = addIndelRealign(bamFile, outputDir, isIntermediate = useBaseRecalibration) - } - if (useBaseRecalibration) { - bamFile = addBaseRecalibrator(bamFile, outputDir, isIntermediate = bamFiles.size > 1) - } - bamFiles :+= bamFile - } - doublePreProces(bamFiles) - } else if (inputBams.size > 1 && doublePreProces.get) { - doublePreProces(inputBams) - } else inputBams - - if (variantcalling) { - var mergBuffer: SortedMap[String, File] = SortedMap() - def mergeList = mergBuffer map { case (key, file) => TaggedFile(removeNoneVariants(file), "name=" + key) } - - if (sampleID != null && (useHaplotypecaller.get || config("joint_genotyping", default = false).asBoolean)) { - val hcGvcf = new HaplotypeCaller(this) - hcGvcf.useGvcf - hcGvcf.input_file = scriptOutput.bamFiles - hcGvcf.out = outputDir + outputName + ".hc.discovery.gvcf.vcf.gz" - add(hcGvcf) - scriptOutput.gvcfFile = hcGvcf.out - } - - if (useHaplotypecaller.get) { - if (sampleID != null) { - val genotypeGVCFs = GenotypeGVCFs(this, List(scriptOutput.gvcfFile), outputDir + outputName + ".hc.discovery.vcf.gz") - add(genotypeGVCFs) - scriptOutput.hcVcfFile = genotypeGVCFs.out - } else { - val hcGvcf = new HaplotypeCaller(this) - hcGvcf.input_file = scriptOutput.bamFiles - hcGvcf.out = outputDir + outputName + ".hc.discovery.vcf.gz" - add(hcGvcf) - scriptOutput.hcVcfFile = hcGvcf.out - } - mergBuffer += ("1.HC-Discovery" -> scriptOutput.hcVcfFile) - } - - if (useUnifiedGenotyper.get) { - val ugVcf = new UnifiedGenotyper(this) - ugVcf.input_file = scriptOutput.bamFiles - ugVcf.out = outputDir + outputName + ".ug.discovery.vcf.gz" - add(ugVcf) - scriptOutput.ugVcfFile = ugVcf.out - mergBuffer += ("2.UG-Discovery" -> scriptOutput.ugVcfFile) - } - - // Generate raw vcf - if (useMpileup) { - if (sampleID != null && scriptOutput.bamFiles.size == 1) { - val m2v = new MpileupToVcf(this) - m2v.inputBam = scriptOutput.bamFiles.head - m2v.sample = sampleID - m2v.output = outputDir + outputName + ".raw.vcf" - add(m2v) - scriptOutput.rawVcfFile = m2v.output - - val vcfFilter = new VcfFilter(this) - vcfFilter.defaults ++= Map("min_sample_depth" -> 8, - "min_alternate_depth" -> 2, - "min_samples_pass" -> 1, - "filter_ref_calls" -> true) - vcfFilter.inputVcf = m2v.output - vcfFilter.outputVcf = this.swapExt(outputDir, m2v.output, ".vcf", ".filter.vcf.gz") - add(vcfFilter) - scriptOutput.rawFilterVcfFile = vcfFilter.outputVcf - } else if (rawVcfInput != null) scriptOutput.rawFilterVcfFile = rawVcfInput - if (scriptOutput.rawFilterVcfFile == null) throw new IllegalStateException("Files can't be empty") - mergBuffer += ("9.raw" -> scriptOutput.rawFilterVcfFile) - } - - // Allele mode - if (useAllelesOption.get) { - val mergeAlleles = MergeAlleles(this, mergeList.toList, outputDir + "raw.allele__temp_only.vcf.gz") - add(mergeAlleles, isIntermediate = true) - - if (useHaplotypecaller.get) { - val hcAlleles = new HaplotypeCaller(this) - hcAlleles.input_file = scriptOutput.bamFiles - hcAlleles.out = outputDir + outputName + ".hc.allele.vcf.gz" - hcAlleles.alleles = mergeAlleles.output - hcAlleles.genotyping_mode = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES - add(hcAlleles) - scriptOutput.hcAlleleVcf = hcAlleles.out - mergBuffer += ("3.HC-alleles" -> hcAlleles.out) - } - - if (useUnifiedGenotyper.get) { - val ugAlleles = new UnifiedGenotyper(this) - ugAlleles.input_file = scriptOutput.bamFiles - ugAlleles.out = outputDir + outputName + ".ug.allele.vcf.gz" - ugAlleles.alleles = mergeAlleles.output - ugAlleles.genotyping_mode = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES - add(ugAlleles) - scriptOutput.ugAlleleVcf = ugAlleles.out - mergBuffer += ("4.UG-alleles" -> ugAlleles.out) - } - } - - def removeNoneVariants(input: File): File = { - val output = input.getAbsolutePath.stripSuffix(".vcf.gz") + ".variants_only.vcf.gz" - val sv = SelectVariants(this, input, output) - sv.excludeFiltered = true - sv.excludeNonVariants = true - add(sv, isIntermediate = true) - sv.out - } - - val cvFinal = CombineVariants(this, mergeList.toList, outputDir + outputName + ".final.vcf.gz") - cvFinal.genotypemergeoption = org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils.GenotypeMergeType.UNSORTED - add(cvFinal) - scriptOutput.finalVcfFile = cvFinal.out - } - } - - def addIndelRealign(inputBam: File, dir: String, isIntermediate: Boolean = true): File = { - val realignerTargetCreator = RealignerTargetCreator(this, inputBam, dir) - add(realignerTargetCreator, isIntermediate = true) - - val indelRealigner = IndelRealigner.apply(this, inputBam, realignerTargetCreator.out, dir) - add(indelRealigner, isIntermediate = isIntermediate) - - return indelRealigner.o - } - - def addBaseRecalibrator(inputBam: File, dir: String, isIntermediate: Boolean = false): File = { - val baseRecalibrator = BaseRecalibrator(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal")) - - if (baseRecalibrator.knownSites.isEmpty) { - logger.warn("No Known site found, skipping base recalibration, file: " + inputBam) - return inputBam - } - add(baseRecalibrator) - - if (config("use_analyze_covariates", default = false).asBoolean) { - val baseRecalibratorAfter = BaseRecalibrator(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.after")) - baseRecalibratorAfter.BQSR = baseRecalibrator.o - add(baseRecalibratorAfter) - - add(AnalyzeCovariates(this, baseRecalibrator.o, baseRecalibratorAfter.o, swapExt(dir, inputBam, ".bam", ".baserecal.pdf"))) - } - - val printReads = PrintReads(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.bam")) - printReads.BQSR = baseRecalibrator.o - add(printReads, isIntermediate = isIntermediate) - - return printReads.o - } -} - -object GatkVariantcalling extends PipelineCommand { - class ScriptOutput { - var bamFiles: List[File] = _ - var gvcfFile: File = _ - var hcVcfFile: File = _ - var ugVcfFile: File = _ - var rawVcfFile: File = _ - var rawFilterVcfFile: File = _ - var hcAlleleVcf: File = _ - var ugAlleleVcf: File = _ - var finalVcfFile: File = _ - } -} diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVcfSampleCompare.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVcfSampleCompare.scala deleted file mode 100644 index 6fdbefd4c..000000000 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVcfSampleCompare.scala +++ /dev/null @@ -1,82 +0,0 @@ -package nl.lumc.sasc.biopet.pipelines.gatk - -import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand } -import java.io.File -import nl.lumc.sasc.biopet.core.config.Configurable -import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants -import nl.lumc.sasc.biopet.extensions.gatk.SelectVariants -import nl.lumc.sasc.biopet.extensions.gatk.VariantEval -import org.broadinstitute.gatk.queue.QScript -import org.broadinstitute.gatk.utils.commandline.{ Input, Argument } - -class GatkVcfSampleCompare(val root: Configurable) extends QScript with BiopetQScript { - def this() = this(null) - - @Input(doc = "Sample vcf file(s)", shortName = "V") - var vcfFiles: List[File] = _ - - @Argument(doc = "Reference", shortName = "R", required = false) - var reference: File = config("reference") - - @Argument(doc = "Target bed", shortName = "targetBed", required = false) - var targetBed: List[File] = Nil - - @Argument(doc = "Samples", shortName = "sample", required = false) - var samples: List[String] = Nil - - var vcfFile: File = _ - var sampleVcfs: Map[String, File] = Map() - def generalSampleDir = outputDir + "samples/" - - def init() { - if (config.contains("target_bed")) - for (bed <- config("target_bed").asList) - targetBed :+= bed.toString - if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module") - else if (!outputDir.endsWith("/")) outputDir += "/" - } - - def biopetScript() { - vcfFile = if (vcfFiles.size > 1) { - val combineVariants = CombineVariants(this, vcfFiles, outputDir + "merge.vcf") - add(combineVariants) - combineVariants.out - } else vcfFiles.head - - for (sample <- samples) { - sampleVcfs += (sample -> new File(generalSampleDir + sample + File.separator + sample + ".vcf")) - val selectVariants = SelectVariants(this, vcfFile, sampleVcfs(sample)) - selectVariants.sample_name = Seq(sample) - selectVariants.excludeNonVariants = true - add(selectVariants) - } - - val sampleCompareMetrics = new SampleCompareMetrics(this) - sampleCompareMetrics.samples = samples - sampleCompareMetrics.sampleDir = generalSampleDir - sampleCompareMetrics.snpRelFile = outputDir + "compare.snp.rel.tsv" - sampleCompareMetrics.snpAbsFile = outputDir + "compare.snp.abs.tsv" - sampleCompareMetrics.indelRelFile = outputDir + "compare.indel.rel.tsv" - sampleCompareMetrics.indelAbsFile = outputDir + "compare.indel.abs.tsv" - sampleCompareMetrics.totalFile = outputDir + "total.tsv" - - for ((sample, sampleVcf) <- sampleVcfs) { - val sampleDir = generalSampleDir + sample + File.separator - for ((compareSample, compareSampleVcf) <- sampleVcfs) { - val variantEval = VariantEval(this, - sampleVcf, - compareSampleVcf, - new File(sampleDir + sample + "-" + compareSample + ".eval.txt"), - Seq("VariantType", "CompRod"), - Seq("CompOverlap") - ) - if (targetBed != null) variantEval.L = targetBed - add(variantEval) - sampleCompareMetrics.deps ::= variantEval.out - } - } - add(sampleCompareMetrics) - } -} - -object GatkVcfSampleCompare extends PipelineCommand diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/SampleCompareMetrics.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/SampleCompareMetrics.scala deleted file mode 100644 index b66ac47c3..000000000 --- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/SampleCompareMetrics.scala +++ /dev/null @@ -1,148 +0,0 @@ -package nl.lumc.sasc.biopet.pipelines.gatk - -import java.io.File -import java.io.PrintWriter -import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction -import nl.lumc.sasc.biopet.core.config.Configurable -import org.broadinstitute.gatk.utils.R.RScriptExecutor -import org.broadinstitute.gatk.utils.commandline.{ Output, Argument } -import scala.io.Source -import org.broadinstitute.gatk.utils.R.{ RScriptLibrary, RScriptExecutor } -import org.broadinstitute.gatk.utils.io.Resource -import scala.collection.mutable.Map -import scala.math._ - -class SampleCompareMetrics(val root: Configurable) extends BiopetJavaCommandLineFunction { - javaMainClass = getClass.getName - - @Argument(doc = "Sample Dir", shortName = "sampleDir", required = true) - var sampleDir: String = _ - - @Argument(doc = "Samples", shortName = "sample", required = true) - var samples: List[String] = Nil - - @Argument(doc = "File sufix", shortName = "sufix", required = false) - var fileSufix: String = _ - - @Output(doc = "snpRelFile", shortName = "snpRelFile", required = true) - var snpRelFile: File = _ - - @Output(doc = "snpAbsFile", shortName = "snpAbsFile", required = true) - var snpAbsFile: File = _ - - @Output(doc = "indelRelFile", shortName = "indelRelFile", required = true) - var indelRelFile: File = _ - - @Output(doc = "indelAbsFile", shortName = "indelAbsFile", required = true) - var indelAbsFile: File = _ - - @Output(doc = "totalFile", shortName = "totalFile", required = true) - var totalFile: File = _ - - override val defaultVmem = "8G" - memoryLimit = Option(4.0) - - override def commandLine = super.commandLine + - required("-sampleDir", sampleDir) + - repeat("-sample", samples) + - optional("-fileSufix", fileSufix) + - required("-snpRelFile", snpRelFile) + - required("-snpAbsFile", snpAbsFile) + - required("-indelRelFile", indelRelFile) + - required("-indelAbsFile", indelAbsFile) + - required("-totalFile", totalFile) -} - -object SampleCompareMetrics { - var sampleDir: String = _ - var samples: List[String] = Nil - var fileSufix: String = ".eval.txt" - var snpRelFile: File = _ - var snpAbsFile: File = _ - var indelRelFile: File = _ - var indelAbsFile: File = _ - var totalFile: File = _ - /** - * @param args the command line arguments - */ - def main(args: Array[String]): Unit = { - - for (t <- 0 until args.size) { - args(t) match { - case "-sample" => samples +:= args(t + 1) - case "-sampleDir" => sampleDir = args(t + 1) - case "-fileSufix" => fileSufix = args(t + 1) - case "-snpRelFile" => snpRelFile = new File(args(t + 1)) - case "-snpAbsFile" => snpAbsFile = new File(args(t + 1)) - case "-indelRelFile" => indelRelFile = new File(args(t + 1)) - case "-indelAbsFile" => indelAbsFile = new File(args(t + 1)) - case "-totalFile" => totalFile = new File(args(t + 1)) - case _ => - } - } - if (sampleDir == null) throw new IllegalStateException("No sampleDir, use -sampleDir") - else if (!sampleDir.endsWith("/")) sampleDir += "/" - - val regex = """\W+""".r - val snpsOverlap: Map[(String, String), Int] = Map() - val indelsOverlap: Map[(String, String), Int] = Map() - val snpsTotal: Map[String, Int] = Map() - val indelsTotal: Map[String, Int] = Map() - for (sample1 <- samples; sample2 <- samples) { - val reader = Source.fromFile(new File(sampleDir + sample1 + "/" + sample1 + "-" + sample2 + fileSufix)) - for (line <- reader.getLines) { - regex.split(line) match { - case Array(_, _, _, varType, all, novel, overlap, rate, _*) => { - varType match { - case "SNP" => { - snpsOverlap += (sample1, sample2) -> overlap.toInt - snpsTotal += sample1 -> all.toInt - } - case "INDEL" => { - indelsOverlap += (sample1, sample2) -> overlap.toInt - indelsTotal += sample1 -> all.toInt - } - case _ => - } - } - case _ => - } - } - reader.close() - } - - val snpRelWritter = new PrintWriter(snpRelFile) - val snpAbsWritter = new PrintWriter(snpAbsFile) - val indelRelWritter = new PrintWriter(indelRelFile) - val indelAbsWritter = new PrintWriter(indelAbsFile) - - val allWritters = List(snpRelWritter, snpAbsWritter, indelRelWritter, indelAbsWritter) - for (writter <- allWritters) writter.println(samples.mkString("\t", "\t", "")) - for (sample1 <- samples) { - for (writter <- allWritters) writter.print(sample1) - for (sample2 <- samples) { - snpRelWritter.print("\t" + (round((snpsOverlap(sample1, sample2).toDouble / snpsTotal(sample1) * 10000.0)) / 10000.0)) - snpAbsWritter.print("\t" + snpsOverlap(sample1, sample2)) - indelRelWritter.print("\t" + (round((indelsOverlap(sample1, sample2).toDouble / indelsTotal(sample1) * 10000.0)) / 10000.0)) - indelAbsWritter.print("\t" + indelsOverlap(sample1, sample2)) - } - for (writter <- allWritters) writter.println() - } - for (writter <- allWritters) writter.close() - - val totalWritter = new PrintWriter(totalFile) - totalWritter.println("Sample\tSNPs\tIndels") - for (sample <- samples) - totalWritter.println(sample + "\t" + snpsTotal(sample) + "\t" + indelsTotal(sample)) - totalWritter.close() - - def plot(file: File) { - val executor = new RScriptExecutor - executor.addScript(new Resource("plotHeatmap.R", getClass)) - executor.addArgs(file, file.getAbsolutePath.stripSuffix(".tsv") + ".png", file.getAbsolutePath.stripSuffix(".tsv") + ".clustering.png") - executor.exec() - } - plot(snpRelFile) - plot(indelRelFile) - } -} \ No newline at end of file -- GitLab