diff --git a/external-example/src/main/scala/org/example/group/pipelines/BiopetPipeline.scala b/external-example/src/main/scala/org/example/group/pipelines/BiopetPipeline.scala index f28b4d52f94b95ebece020fed591a2e4d8bbf806..a9ff3afa2daf3e3a1d48845b176fcc8489987b4d 100644 --- a/external-example/src/main/scala/org/example/group/pipelines/BiopetPipeline.scala +++ b/external-example/src/main/scala/org/example/group/pipelines/BiopetPipeline.scala @@ -31,6 +31,8 @@ class BiopetPipeline(val root: Configurable) extends QScript with SummaryQScript // Executing a biopet pipeline inside val shiva = new Shiva(this) + add(shiva) + shiva.init() shiva.biopetScript() addAll(shiva.functions) diff --git a/protected/biopet-gatk-extensions/pom.xml b/protected/biopet-gatk-extensions/pom.xml index 76da4b194ba8495a0fb3e84a81b29f2d1d37ffbd..bf0ce809bb4e3ff1178e4a95686e380c1f45b601 100644 --- a/protected/biopet-gatk-extensions/pom.xml +++ b/protected/biopet-gatk-extensions/pom.xml @@ -25,13 +25,13 @@ <dependencies> <dependency> <groupId>nl.lumc.sasc</groupId> - <artifactId>BiopetCore</artifactId> + <artifactId>BiopetExtensions</artifactId> <version>${project.version}</version> </dependency> <dependency> <groupId>org.broadinstitute.gatk</groupId> <artifactId>gatk-queue-extensions-distribution</artifactId> - <version>3.4</version> + <version>3.5</version> </dependency> </dependencies> </project> diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GenotypeGVCFs.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GenotypeGVCFs.scala index e347ebf4743eca9b2f80bc3f072c922cb012e88a..60614ecc938bef58534c810423254fc56eab5dbc 100644 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GenotypeGVCFs.scala +++ b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GenotypeGVCFs.scala @@ -8,8 +8,13 @@ package nl.lumc.sasc.biopet.extensions.gatk.broad import java.io.File import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.Output class GenotypeGVCFs(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.GenotypeGVCFs with GatkGeneral { + + @Output(required = false) + protected var vcfIndex: File = _ + annotation ++= config("annotation", default = Seq(), freeVar = false).asStringList if (config.contains("dbsnp")) dbsnp = config("dbsnp") @@ -22,6 +27,11 @@ class GenotypeGVCFs(val root: Configurable) extends org.broadinstitute.gatk.queu stand_call_conf = config("stand_call_conf", default = 30) stand_emit_conf = config("stand_emit_conf", default = 0) } + + override def freezeFieldValues(): Unit = { + super.freezeFieldValues() + if (out.getName.endsWith(".vcf.gz")) vcfIndex = new File(out.getAbsolutePath + ".tbi") + } } object GenotypeGVCFs { @@ -29,6 +39,7 @@ object GenotypeGVCFs { val gg = new GenotypeGVCFs(root) gg.variant = gvcfFiles gg.out = output + if (gg.out.getName.endsWith(".vcf.gz")) gg.vcfIndex = new File(gg.out.getAbsolutePath + ".tbi") gg } } \ No newline at end of file diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/HaplotypeCaller.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/HaplotypeCaller.scala index 514879f9d2b96088a6e5b3e30eb969d44740934b..819a836bfe03486ca5b86a30708ead4bb9b39a67 100644 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/HaplotypeCaller.scala +++ b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/HaplotypeCaller.scala @@ -5,11 +5,18 @@ */ package nl.lumc.sasc.biopet.extensions.gatk.broad +import java.io.File + import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{Gather, Output} import org.broadinstitute.gatk.utils.variant.GATKVCFIndexType class HaplotypeCaller(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.HaplotypeCaller with GatkGeneral { + @Gather(enabled = false) + @Output(required = false) + protected var vcfIndex: File = _ + override val defaultThreads = 1 min_mapping_quality_score = config("minMappingQualityScore", default = 20) @@ -40,6 +47,7 @@ class HaplotypeCaller(val root: Configurable) extends org.broadinstitute.gatk.qu override def freezeFieldValues() { super.freezeFieldValues() + if (out.getName.endsWith(".vcf.gz")) vcfIndex = new File(out.getAbsolutePath + ".tbi") if (bamOutput != null && nct.getOrElse(1) > 1) { logger.warn("BamOutput is on, nct/threads is forced to set on 1, this option is only for debug") nCoresRequest = Some(1) @@ -47,10 +55,22 @@ class HaplotypeCaller(val root: Configurable) extends org.broadinstitute.gatk.qu nct = Some(getThreads) memoryLimit = Option(memoryLimit.getOrElse(2.0) * nct.getOrElse(1)) } +} + +object HaplotypeCaller { + def apply(root: Configurable, inputFiles: List[File], outputFile: File): HaplotypeCaller = { + val hc = new HaplotypeCaller(root) + hc.input_file = inputFiles + hc.out = outputFile + if (hc.out.getName.endsWith(".vcf.gz")) hc.vcfIndex = new File(hc.out.getAbsolutePath + ".tbi") + hc + } - def useGvcf() { - emitRefConfidence = org.broadinstitute.gatk.tools.walkers.haplotypecaller.ReferenceConfidenceMode.GVCF - variant_index_type = GATKVCFIndexType.LINEAR - variant_index_parameter = config("variant_index_parameter", default = 128000) + def gvcf(root: Configurable, inputFile: File, outputFile: File): HaplotypeCaller = { + val hc = apply(root, List(inputFile), outputFile) + hc.emitRefConfidence = org.broadinstitute.gatk.tools.walkers.haplotypecaller.ReferenceConfidenceMode.GVCF + hc.variant_index_type = GATKVCFIndexType.LINEAR + hc.variant_index_parameter = Some(hc.config("variant_index_parameter", default = 128000).asInt) + hc } -} +} \ No newline at end of file diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/IndelRealigner.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/IndelRealigner.scala index 44a3eac6607a6165920ab4d4564c9777119a1ca9..32cf797a0f77d488a5aa0c7fda32fcfaa422e038 100644 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/IndelRealigner.scala +++ b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/IndelRealigner.scala @@ -8,8 +8,12 @@ package nl.lumc.sasc.biopet.extensions.gatk.broad import java.io.File import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.Output class IndelRealigner(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.IndelRealigner with GatkGeneral { + @Output + protected var bamIndex: File = _ + if (config.contains("scattercount")) scatterCount = config("scattercount") } @@ -19,6 +23,7 @@ object IndelRealigner { ir.input_file :+= input ir.targetIntervals = targetIntervals ir.out = new File(outputDir, input.getName.stripSuffix(".bam") + ".realign.bam") + ir.bamIndex = new File(outputDir, input.getName.stripSuffix(".bam") + ".realign.bai") ir } } \ No newline at end of file diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/UnifiedGenotyper.scala b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/UnifiedGenotyper.scala index 70d988f4b057572bc1c110ad597c232d6093a2cc..c0cecb873aceb39d8073e91bf7571a18f20f634d 100644 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/UnifiedGenotyper.scala +++ b/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/UnifiedGenotyper.scala @@ -5,9 +5,17 @@ */ package nl.lumc.sasc.biopet.extensions.gatk.broad +import java.io.File + import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{Gather, Output} class UnifiedGenotyper(val root: Configurable) extends org.broadinstitute.gatk.queue.extensions.gatk.UnifiedGenotyper with GatkGeneral { + + @Gather(enabled = false) + @Output(required = false) + protected var vcfIndex: File = _ + if (config.contains("scattercount")) scatterCount = config("scattercount") if (config.contains("dbsnp")) this.dbsnp = config("dbsnp") sample_ploidy = config("ploidy") @@ -36,3 +44,14 @@ class UnifiedGenotyper(val root: Configurable) extends org.broadinstitute.gatk.q memoryLimit = Option(nct.getOrElse(1) * memoryLimit.getOrElse(2.0)) } } + +object UnifiedGenotyper { + def apply(root: Configurable, inputFiles: List[File], outputFile: File): UnifiedGenotyper = { + val ug = new UnifiedGenotyper(root) + ug.input_file = inputFiles + ug.out = outputFile + if (ug.out.getName.endsWith(".vcf.gz")) ug.vcfIndex = new File(ug.out.getAbsolutePath + ".tbi") + ug + } + +} \ No newline at end of file diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkBenchmarkGenotyping.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkBenchmarkGenotyping.scala deleted file mode 100644 index e489c4afdf4a30b8ca0b2a965242eae7811ad24c..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkBenchmarkGenotyping.scala +++ /dev/null @@ -1,60 +0,0 @@ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.pipelines.gatk - -import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand } -import org.broadinstitute.gatk.queue.QScript - -import scala.util.Random - -class GatkBenchmarkGenotyping(val root: Configurable) extends QScript with BiopetQScript { - def this() = this(null) - - @Input(doc = "Sample gvcf file") - var sampleGvcf: File = _ - - @Argument(doc = "SampleName", required = true) - var sampleName: String = _ - - @Input(doc = "Gvcf files", shortName = "I", required = false) - var gvcfFiles: List[File] = Nil - - var reference: File = config("reference") - - @Argument(doc = "Dbsnp", shortName = "dbsnp", required = false) - var dbsnp: File = config("dbsnp") - - def init() { - if (config.contains("gvcffiles")) for (file <- config("gvcffiles").asList) - gvcfFiles ::= file.toString - } - - def biopetScript() { - var todoGvcfs = gvcfFiles - var gvcfPool: List[File] = Nil - addGenotypingPipeline(gvcfPool) - - while (todoGvcfs.nonEmpty) { - val index = Random.nextInt(todoGvcfs.size) - gvcfPool ::= todoGvcfs(index) - addGenotypingPipeline(gvcfPool) - todoGvcfs = todoGvcfs.filter(b => b != todoGvcfs(index)) - } - } - - def addGenotypingPipeline(gvcfPool: List[File]) { - val gatkGenotyping = new GatkGenotyping(this) - gatkGenotyping.inputGvcfs = sampleGvcf :: gvcfPool - gatkGenotyping.samples :+= sampleName - gatkGenotyping.outputDir = new File(outputDir, "samples_" + gvcfPool.size) - gatkGenotyping.init() - gatkGenotyping.biopetScript() - addAll(gatkGenotyping.functions) - } -} - -object GatkBenchmarkGenotyping extends PipelineCommand diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkGenotyping.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkGenotyping.scala deleted file mode 100644 index 2f54cbbc70bc7c2666ef9c043017603c0b1c4b9f..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkGenotyping.scala +++ /dev/null @@ -1,61 +0,0 @@ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.pipelines.gatk - -import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand } -import nl.lumc.sasc.biopet.extensions.gatk.broad.{ GenotypeGVCFs, SelectVariants } -import org.broadinstitute.gatk.queue.QScript - -class GatkGenotyping(val root: Configurable) extends QScript with BiopetQScript { - def this() = this(null) - - @Input(doc = "Gvcf files", shortName = "I") - var inputGvcfs: List[File] = Nil - - @Argument(doc = "Reference", shortName = "R", required = false) - var reference: File = config("reference") - - @Argument(doc = "Dbsnp", shortName = "dbsnp", required = false) - var dbsnp: File = config("dbsnp") - - @Argument(doc = "OutputName", required = false) - var outputName: String = "genotype" - - @Output(doc = "OutputFile", shortName = "O", required = false) - var outputFile: File = _ - - @Argument(doc = "Samples", shortName = "sample", required = false) - var samples: List[String] = Nil - - def init() { - require(outputName != null, "Outputname is null") - if (outputFile == null) outputFile = new File(outputDir, outputName + ".vcf.gz") - } - - def biopetScript() { - addGenotypeGVCFs(inputGvcfs, outputFile) - if (samples.nonEmpty) { - if (samples.size > 1) addSelectVariants(outputFile, samples, new File(outputDir, "samples/"), "all") - for (sample <- samples) addSelectVariants(outputFile, List(sample), new File(outputDir, "samples/"), sample) - } - } - - def addGenotypeGVCFs(gvcfFiles: List[File], outputFile: File): File = { - val genotypeGVCFs = GenotypeGVCFs(this, gvcfFiles, outputFile) - add(genotypeGVCFs) - genotypeGVCFs.out - } - - def addSelectVariants(inputFile: File, samples: List[String], outputDir: File, name: String) { - val selectVariants = SelectVariants(this, inputFile, new File(outputDir, name + ".vcf.gz")) - selectVariants.excludeNonVariants = true - for (sample <- samples) selectVariants.sample_name :+= sample - add(selectVariants) - } -} - -object GatkGenotyping extends PipelineCommand diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala deleted file mode 100644 index 3707ec2751cd21c82193f65c47281d294a764778..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkPipeline.scala +++ /dev/null @@ -1,235 +0,0 @@ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.pipelines.gatk - -import htsjdk.samtools.SamReaderFactory -import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, PipelineCommand } -import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.core.summary.SummaryQScript -import nl.lumc.sasc.biopet.extensions.gatk.broad.{ CombineGVCFs, CombineVariants } -import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, SamToFastq } -import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics -import nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig -import nl.lumc.sasc.biopet.pipelines.mapping.Mapping -import org.broadinstitute.gatk.queue.QScript - -import scala.collection.JavaConversions._ - -class GatkPipeline(val root: Configurable) extends QScript with MultiSampleQScript with SummaryQScript { - qscript => - def this() = this(null) - - @Argument(doc = "Skip Genotyping step", shortName = "skipgenotyping", required = false) - var skipGenotyping: Boolean = config("skip_genotyping", default = false) - - /** Merge gvcfs */ - var mergeGvcfs: Boolean = config("merge_gvcfs", default = false) - - /** Joint variantcalling */ - var jointVariantcalling: Boolean = config("joint_variantcalling", default = false) - - /** Joint genotyping */ - var jointGenotyping: Boolean = config("joint_genotyping", default = false) - - var singleSampleCalling = config("single_sample_calling", default = true) - var reference: File = config("reference") - var useAllelesOption: Boolean = config("use_alleles_option", default = false) - val externalGvcfs = config("external_gvcfs_files", default = Nil).asFileList - - def summaryFile = new File(outputDir, "GatkPipeline.summary.json") - - //TODO: Add summary - def summaryFiles = Map() - - //TODO: Add summary - def summarySettings = Map() - - def makeSample(id: String) = new Sample(id) - class Sample(sampleId: String) extends AbstractSample(sampleId) { - //TODO: Add summary - def summaryFiles: Map[String, File] = Map() - - //TODO: Add summary - def summaryStats: Map[String, Any] = Map() - - def makeLibrary(id: String) = new Library(id) - class Library(libId: String) extends AbstractLibrary(libId) { - //TODO: Add summary - def summaryFiles: Map[String, File] = Map() - - //TODO: Add summary - def summaryStats: Map[String, Any] = Map() - - val mapping = new Mapping(qscript) - mapping.sampleId = Some(sampleId) - mapping.libId = Some(libId) - mapping.outputDir = libDir - - /** Library variantcalling */ - val gatkVariantcalling = new GatkVariantcalling(qscript) - gatkVariantcalling.doublePreProces = false - gatkVariantcalling.sampleID = sampleId - gatkVariantcalling.outputDir = new File(libDir, "variantcalling") - - protected def addJobs(): Unit = { - val bamFile: Option[File] = if (config.contains("R1")) { - mapping.input_R1 = config("R1") - mapping.input_R2 = config("R2") - mapping.init() - mapping.biopetScript() - addAll(mapping.functions) // Add functions of mapping to curent function pool - Some(mapping.finalBamFile) - } else if (config.contains("bam")) { - var bamFile: File = config("bam") - if (!bamFile.exists) throw new IllegalStateException("Bam in config does not exist, file: " + bamFile) - - if (config("bam_to_fastq", default = false).asBoolean) { - val samToFastq = SamToFastq(qscript, bamFile, libDir + sampleId + "-" + libId + ".R1.fastq", - libDir + sampleId + "-" + libId + ".R2.fastq") - samToFastq.isIntermediate = true - qscript.add(samToFastq) - mapping.input_R1 = samToFastq.fastqR1 - mapping.input_R2 = Some(samToFastq.fastqR2) - mapping.init() - mapping.biopetScript() - addAll(mapping.functions) // Add functions of mapping to curent function pool - Some(mapping.finalBamFile) - } else { - var readGroupOke = true - val inputSam = SamReaderFactory.makeDefault.open(bamFile) - val header = inputSam.getFileHeader.getReadGroups - for (readGroup <- inputSam.getFileHeader.getReadGroups) { - if (readGroup.getSample != sampleId) logger.warn("Sample ID readgroup in bam file is not the same") - if (readGroup.getLibrary != libId) logger.warn("Library ID readgroup in bam file is not the same") - if (readGroup.getSample != sampleId || readGroup.getLibrary != libId) readGroupOke = false - } - inputSam.close() - - if (!readGroupOke) { - if (config("correct_readgroups", default = false)) { - logger.info("Correcting readgroups, file:" + bamFile) - val aorrg = AddOrReplaceReadGroups(qscript, bamFile, new File(libDir + sampleId + "-" + libId + ".bam")) - aorrg.RGID = sampleId + "-" + libId - aorrg.RGLB = libId - aorrg.RGSM = sampleId - aorrg.isIntermediate = true - qscript.add(aorrg) - bamFile = aorrg.output - } else throw new IllegalStateException("Sample readgroup and/or library of input bamfile is not correct, file: " + bamFile + - "\nPlease note that it is possible to set 'correct_readgroups' to true in the config to automatic fix this") - } - addAll(BamMetrics(qscript, bamFile, libDir + "metrics/").functions) - - Some(bamFile) - } - } else { - logger.error("Sample: " + sampleId + ": No R1 found for run: " + libId) - None - } - - if (bamFile.isDefined) { - gatkVariantcalling.inputBams = List(bamFile.get) - gatkVariantcalling.variantcalling = config("library_variantcalling", default = false) - gatkVariantcalling.init() - gatkVariantcalling.biopetScript() - addAll(gatkVariantcalling.functions) - } - - addSummaryQScript(mapping) - } - } - - /** sample variantcalling */ - val gatkVariantcalling = new GatkVariantcalling(qscript) - gatkVariantcalling.sampleID = sampleId - gatkVariantcalling.outputDir = new File(sampleDir, "variantcalling") - - protected def addJobs(): Unit = { - addPerLibJobs() - gatkVariantcalling.inputBams = libraries.map(_._2.mapping.finalBamFile).toList - gatkVariantcalling.preProcesBams = false - if (!singleSampleCalling) { - gatkVariantcalling.useHaplotypecaller = false - gatkVariantcalling.useUnifiedGenotyper = false - } - gatkVariantcalling.init() - gatkVariantcalling.biopetScript() - addAll(gatkVariantcalling.functions) - - gatkVariantcalling.inputBams.foreach(x => addAll(Bam2Wig(qscript, x).functions)) - } - } - - def init() { - } - - val multisampleVariantcalling = new GatkVariantcalling(this) { - override def configName = "gatkvariantcalling" - override def configPath: List[String] = super.configPath ::: "multisample" :: Nil - } - - def biopetScript(): Unit = { - addSamplesJobs() - - addSummaryJobs() - } - - def addMultiSampleJobs(): Unit = { - val gvcfFiles: List[File] = if (mergeGvcfs && externalGvcfs.size + samples.size > 1) { - val newFile = new File(outputDir, "merged.gvcf.vcf.gz") - add(CombineGVCFs(this, externalGvcfs ++ samples.map(_._2.gatkVariantcalling.scriptOutput.gvcfFile), newFile)) - List(newFile) - } else externalGvcfs ++ samples.map(_._2.gatkVariantcalling.scriptOutput.gvcfFile) - - if (!skipGenotyping && gvcfFiles.nonEmpty) { - if (jointGenotyping) { - val gatkGenotyping = new GatkGenotyping(this) - gatkGenotyping.inputGvcfs = gvcfFiles - gatkGenotyping.outputDir = new File(outputDir, "genotyping") - gatkGenotyping.init() - gatkGenotyping.biopetScript() - addAll(gatkGenotyping.functions) - var vcfFile = gatkGenotyping.outputFile - } - } else logger.warn("No gVCFs to genotype") - - if (jointVariantcalling) { - val allBamfiles = samples.map(_._2.gatkVariantcalling.scriptOutput.bamFiles).toList.fold(Nil)(_ ++ _) - val allRawVcfFiles = samples.map(_._2.gatkVariantcalling.scriptOutput.rawVcfFile).filter(_ != null).toList - - val gatkVariantcalling = new GatkVariantcalling(this) { - override def configName = "gatkvariantcalling" - override def configPath: List[String] = super.configPath ::: "multisample" :: Nil - } - - if (gatkVariantcalling.useMpileup) { - val cvRaw = CombineVariants(this, allRawVcfFiles.toList, new File(outputDir, "variantcalling/multisample.raw.vcf.gz")) - add(cvRaw) - gatkVariantcalling.rawVcfInput = cvRaw.out - } - - multisampleVariantcalling.preProcesBams = false - multisampleVariantcalling.doublePreProces = false - multisampleVariantcalling.inputBams = allBamfiles.toList - multisampleVariantcalling.outputDir = new File(outputDir, "variantcalling") - multisampleVariantcalling.outputName = "multisample" - multisampleVariantcalling.init() - multisampleVariantcalling.biopetScript() - addAll(multisampleVariantcalling.functions) - - if (config("inputtype", default = "dna").asString != "rna" && config("recalibration", default = false).asBoolean) { - val recalibration = new GatkVariantRecalibration(this) - recalibration.inputVcf = multisampleVariantcalling.scriptOutput.finalVcfFile - recalibration.bamFiles = allBamfiles - recalibration.outputDir = new File(outputDir, "recalibration") - recalibration.init() - recalibration.biopetScript() - } - } - } -} - -object GatkPipeline extends PipelineCommand diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantRecalibration.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantRecalibration.scala deleted file mode 100644 index 772aa6887d7a7e4983059f84ea3ac7b455880c4a..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantRecalibration.scala +++ /dev/null @@ -1,76 +0,0 @@ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.pipelines.gatk - -import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand } -import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.extensions.gatk.broad.{ ApplyRecalibration, VariantAnnotator, VariantRecalibrator } -import org.broadinstitute.gatk.queue.QScript - -class GatkVariantRecalibration(val root: Configurable) extends QScript with BiopetQScript { - def this() = this(null) - - @Input(doc = "input vcf file", shortName = "I") - var inputVcf: File = _ - - @Input(doc = "input vcf file", shortName = "BAM", required = false) - var bamFiles: List[File] = Nil - - @Output(doc = "output vcf file", shortName = "out") - var outputVcf: File = _ - - def init() { - require(inputVcf != null, "Missing Output directory on gatk module") - } - - def biopetScript() { - var vcfFile: File = if (bamFiles.nonEmpty) addVariantAnnotator(inputVcf, bamFiles, outputDir) else inputVcf - vcfFile = addSnpVariantRecalibrator(vcfFile, outputDir) - vcfFile = addIndelVariantRecalibrator(vcfFile, outputDir) - } - - def addSnpVariantRecalibrator(inputVcf: File, dir: File): File = { - val snpRecal = VariantRecalibrator(this, inputVcf, swapExt(dir, inputVcf, ".vcf", ".indel.recal"), - swapExt(dir, inputVcf, ".vcf", ".indel.tranches"), indel = false) - if (snpRecal.resource.nonEmpty) { - add(snpRecal) - - val snpApply = ApplyRecalibration(this, inputVcf, swapExt(dir, inputVcf, ".vcf", ".indel.recal.vcf"), - snpRecal.recal_file, snpRecal.tranches_file, indel = false) - add(snpApply) - - snpApply.out - } else { - logger.warn("Skipped snp Recalibration, resource is missing") - inputVcf - } - } - - def addIndelVariantRecalibrator(inputVcf: File, dir: File): File = { - val indelRecal = VariantRecalibrator(this, inputVcf, swapExt(dir, inputVcf, ".vcf", ".indel.recal"), - swapExt(dir, inputVcf, ".vcf", ".indel.tranches"), indel = true) - if (indelRecal.resource.nonEmpty) { - add(indelRecal) - - val indelApply = ApplyRecalibration(this, inputVcf, swapExt(dir, inputVcf, ".vcf", ".indel.recal.vcf"), - indelRecal.recal_file, indelRecal.tranches_file, indel = true) - add(indelApply) - - indelApply.out - } else { - logger.warn("Skipped indel Recalibration, resource is missing") - inputVcf - } - } - - def addVariantAnnotator(inputvcf: File, bamfiles: List[File], dir: File): File = { - val variantAnnotator = VariantAnnotator(this, inputvcf, bamfiles, swapExt(dir, inputvcf, ".vcf", ".anotated.vcf")) - add(variantAnnotator) - variantAnnotator.out - } -} - -object GatkVariantRecalibration extends PipelineCommand diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala deleted file mode 100644 index 750cbee21adea6c059d939aad518ca9aed0eb06e..0000000000000000000000000000000000000000 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.scala +++ /dev/null @@ -1,272 +0,0 @@ -/** - * Due to the license issue with GATK, this part of Biopet can only be used inside the - * LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions - * on how to use this protected part of biopet or contact us at sasc@lumc.nl - */ -package nl.lumc.sasc.biopet.pipelines.gatk - -import java.io.File - -import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand } -import nl.lumc.sasc.biopet.extensions.Ln -import nl.lumc.sasc.biopet.extensions.gatk.broad._ -import nl.lumc.sasc.biopet.extensions.picard.MarkDuplicates -import nl.lumc.sasc.biopet.extensions.tools.{ MergeAlleles, MpileupToVcf, VcfFilter, VcfStats } -import nl.lumc.sasc.biopet.utils.ConfigUtils -import org.broadinstitute.gatk.queue.QScript -import org.broadinstitute.gatk.queue.extensions.gatk.TaggedFile - -import scala.collection.SortedMap -import scala.language.reflectiveCalls - -class GatkVariantcalling(val root: Configurable) extends QScript with BiopetQScript { - def this() = this(null) - - val scriptOutput = new GatkVariantcalling.ScriptOutput - - @Input(doc = "Bam files (should be deduped bams)", shortName = "BAM") - var inputBams: List[File] = Nil - - @Input(doc = "Raw vcf file", shortName = "raw") - var rawVcfInput: File = _ - - @Argument(doc = "Reference", shortName = "R", required = false) - var reference: File = config("reference") - - @Argument(doc = "OutputName", required = false) - var outputName: String = _ - - @Argument(doc = "Sample name", required = false) - var sampleID: String = _ - - var preProcesBams: Boolean = config("pre_proces_bams", default = true) - var variantcalling: Boolean = true - var doublePreProces: Boolean = config("double_pre_proces", default = true) - var useHaplotypecaller: Boolean = config("use_haplotypecaller", default = true) - var useUnifiedGenotyper: Boolean = config("use_unifiedgenotyper", default = false) - var useAllelesOption: Boolean = config("use_alleles_option", default = false) - var useMpileup: Boolean = config("use_mpileup", default = true) - var useIndelRealigner: Boolean = config("use_indel_realign", default = true) - var useBaseRecalibration: Boolean = config("use_base_recalibration", default = true) - - def init() { - if (outputName == null && sampleID != null) outputName = sampleID - else if (outputName == null) outputName = config("output_name", default = "noname") - - val baseRecalibrator = new BaseRecalibrator(this) - if (preProcesBams && useBaseRecalibration && baseRecalibrator.knownSites.isEmpty) { - logger.warn("No Known site found, skipping base recalibration") - useBaseRecalibration = false - } - } - - private def doublePreProces(files: List[File]): List[File] = { - if (files.isEmpty) throw new IllegalStateException("Files can't be empty") - else if (!doublePreProces) files - else if (files.size == 1) { - val bamFile = new File(outputDir, files.head.getName) - if (bamFile != files.head) { - val oldIndex: File = new File(files.head.getAbsolutePath.stripSuffix(".bam") + ".bai") - val newIndex: File = swapExt(outputDir, bamFile, ".bam", ".bai") - val baiLn = Ln(this, oldIndex, newIndex) - add(baiLn) - - val bamLn = Ln(this, files.head, bamFile) - bamLn.deps :+= baiLn.output - add(bamLn) - } - List(bamFile) - } else { - val markDup = MarkDuplicates(this, files, new File(outputDir, outputName + ".dedup.bam")) - markDup.isIntermediate = useIndelRealigner - add(markDup) - if (useIndelRealigner) { - List(addIndelRealign(markDup.output, outputDir, isIntermediate = false)) - } else { - List(markDup.output) - } - } - } - - def biopetScript() { - scriptOutput.bamFiles = { - doublePreProces(if (preProcesBams) { - for (inputBam <- inputBams) yield { - var bamFile = inputBam - if (useIndelRealigner) - bamFile = addIndelRealign(bamFile, outputDir, isIntermediate = useBaseRecalibration) - if (useBaseRecalibration) - bamFile = addBaseRecalibrator(bamFile, outputDir, isIntermediate = inputBams.size > 1) - bamFile - } - } else { - inputBams - }) - } - - if (variantcalling) { - var mergBuffer: SortedMap[String, File] = SortedMap() - def mergeList = mergBuffer map { case (key, file) => TaggedFile(removeNoneVariants(file), "name=" + key) } - - if (sampleID != null && (useHaplotypecaller || config("joint_genotyping", default = false).asBoolean)) { - val hcGvcf = new HaplotypeCaller(this) - hcGvcf.useGvcf() - hcGvcf.input_file = scriptOutput.bamFiles - hcGvcf.out = new File(outputDir, outputName + ".hc.discovery.gvcf.vcf.gz") - add(hcGvcf) - scriptOutput.gvcfFile = hcGvcf.out - } - - if (useHaplotypecaller) { - if (sampleID != null) { - val genotypeGVCFs = GenotypeGVCFs(this, List(scriptOutput.gvcfFile), new File(outputDir, outputName + ".hc.discovery.vcf.gz")) - add(genotypeGVCFs) - scriptOutput.hcVcfFile = genotypeGVCFs.out - } else { - val hcGvcf = new HaplotypeCaller(this) - hcGvcf.input_file = scriptOutput.bamFiles - hcGvcf.out = new File(outputDir, outputName + ".hc.discovery.vcf.gz") - add(hcGvcf) - scriptOutput.hcVcfFile = hcGvcf.out - } - mergBuffer += ("1.HC-Discovery" -> scriptOutput.hcVcfFile) - } - - if (useUnifiedGenotyper) { - val ugVcf = new UnifiedGenotyper(this) - ugVcf.input_file = scriptOutput.bamFiles - ugVcf.out = new File(outputDir, outputName + ".ug.discovery.vcf.gz") - add(ugVcf) - scriptOutput.ugVcfFile = ugVcf.out - mergBuffer += ("2.UG-Discovery" -> scriptOutput.ugVcfFile) - } - - // Generate raw vcf - if (useMpileup) { - if (sampleID != null && scriptOutput.bamFiles.size == 1) { - val m2v = new MpileupToVcf(this) - m2v.inputBam = scriptOutput.bamFiles.head - m2v.sample = sampleID - m2v.output = new File(outputDir, outputName + ".raw.vcf") - add(m2v) - scriptOutput.rawVcfFile = m2v.output - - val vcfFilter = new VcfFilter(this) { - override def defaults = Map("min_sample_depth" -> 8, - "min_alternate_depth" -> 2, - "min_samples_pass" -> 1, - "filter_ref_calls" -> true - ) - } - vcfFilter.inputVcf = m2v.output - vcfFilter.outputVcf = swapExt(outputDir, m2v.output, ".vcf", ".filter.vcf.gz") - add(vcfFilter) - scriptOutput.rawFilterVcfFile = vcfFilter.outputVcf - } else if (rawVcfInput != null) scriptOutput.rawFilterVcfFile = rawVcfInput - if (scriptOutput.rawFilterVcfFile != null) mergBuffer += ("9.raw" -> scriptOutput.rawFilterVcfFile) - } - - // Allele mode - if (useAllelesOption) { - val mergeAlleles = MergeAlleles(this, mergeList.toList, outputDir + "raw.allele__temp_only.vcf.gz") - mergeAlleles.isIntermediate = true - add(mergeAlleles) - - if (useHaplotypecaller) { - val hcAlleles = new HaplotypeCaller(this) - hcAlleles.input_file = scriptOutput.bamFiles - hcAlleles.out = new File(outputDir, outputName + ".hc.allele.vcf.gz") - hcAlleles.alleles = mergeAlleles.output - hcAlleles.genotyping_mode = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES - add(hcAlleles) - scriptOutput.hcAlleleVcf = hcAlleles.out - mergBuffer += ("3.HC-alleles" -> hcAlleles.out) - } - - if (useUnifiedGenotyper) { - val ugAlleles = new UnifiedGenotyper(this) - ugAlleles.input_file = scriptOutput.bamFiles - ugAlleles.out = new File(outputDir, outputName + ".ug.allele.vcf.gz") - ugAlleles.alleles = mergeAlleles.output - ugAlleles.genotyping_mode = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES - add(ugAlleles) - scriptOutput.ugAlleleVcf = ugAlleles.out - mergBuffer += ("4.UG-alleles" -> ugAlleles.out) - } - } - - def removeNoneVariants(input: File): File = { - val output = input.getAbsolutePath.stripSuffix(".vcf.gz") + ".variants_only.vcf.gz" - val sv = SelectVariants(this, input, output) - sv.excludeFiltered = true - sv.excludeNonVariants = true - sv.isIntermediate = true - add(sv) - sv.out - } - - val cvFinal = CombineVariants(this, mergeList.toList, new File(outputDir, outputName + ".final.vcf.gz")) - cvFinal.genotypemergeoption = org.broadinstitute.gatk.utils.variant.GATKVariantContextUtils.GenotypeMergeType.UNSORTED - add(cvFinal) - - val vcfStats = new VcfStats(this) - vcfStats.input = cvFinal.out - vcfStats.setOutputDir(new File(outputDir, "vcfstats")) - add(vcfStats) - - scriptOutput.finalVcfFile = cvFinal.out - } - } - - def addIndelRealign(inputBam: File, dir: File, isIntermediate: Boolean = true): File = { - val realignerTargetCreator = RealignerTargetCreator(this, inputBam, dir) - realignerTargetCreator.isIntermediate = true - add(realignerTargetCreator) - - val indelRealigner = IndelRealigner(this, inputBam, realignerTargetCreator.out, dir) - indelRealigner.isIntermediate = isIntermediate - add(indelRealigner) - - indelRealigner.o - } - - def addBaseRecalibrator(inputBam: File, dir: File, isIntermediate: Boolean = false): File = { - val baseRecalibrator = BaseRecalibrator(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal")) - - if (baseRecalibrator.knownSites.isEmpty) { - logger.warn("No Known site found, skipping base recalibration, file: " + inputBam) - return inputBam - } - add(baseRecalibrator) - - if (config("use_analyze_covariates", default = false).asBoolean) { - val baseRecalibratorAfter = BaseRecalibrator(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.after")) - baseRecalibratorAfter.BQSR = baseRecalibrator.o - add(baseRecalibratorAfter) - - add(AnalyzeCovariates(this, baseRecalibrator.o, baseRecalibratorAfter.o, swapExt(dir, inputBam, ".bam", ".baserecal.pdf"))) - } - - val printReads = PrintReads(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.bam")) - printReads.BQSR = baseRecalibrator.o - printReads.isIntermediate = isIntermediate - add(printReads) - - printReads.o - } -} - -object GatkVariantcalling extends PipelineCommand { - class ScriptOutput { - var bamFiles: List[File] = _ - var gvcfFile: File = _ - var hcVcfFile: File = _ - var ugVcfFile: File = _ - var rawVcfFile: File = _ - var rawFilterVcfFile: File = _ - var hcAlleleVcf: File = _ - var ugAlleleVcf: File = _ - var finalVcfFile: File = _ - } -} diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Shiva.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Shiva.scala index eae23a65406c85035ae38a939c11f4ac3d25af66..620619bfea1ccdb745f814d2b801c100437d4c76 100644 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Shiva.scala +++ b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/Shiva.scala @@ -8,7 +8,7 @@ package nl.lumc.sasc.biopet.pipelines.gatk import nl.lumc.sasc.biopet.core.PipelineCommand import nl.lumc.sasc.biopet.utils.config.Configurable import nl.lumc.sasc.biopet.extensions.gatk.broad._ -import nl.lumc.sasc.biopet.pipelines.shiva.{ ShivaTrait, ShivaVariantcallingTrait } +import nl.lumc.sasc.biopet.pipelines.shiva.{ ShivaVariantcallingTrait, ShivaTrait } import org.broadinstitute.gatk.queue.QScript /** diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcalling.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcalling.scala index 1878d86fd150af451ab46fb69234ccbb66b05ec2..edbc633f3a03204f3d4e50ac14bc016134825252 100644 --- a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcalling.scala +++ b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcalling.scala @@ -6,9 +6,9 @@ package nl.lumc.sasc.biopet.pipelines.gatk import nl.lumc.sasc.biopet.core.PipelineCommand -import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.extensions.gatk.broad.GenotypeGVCFs +import nl.lumc.sasc.biopet.pipelines.gatk.variantcallers._ import nl.lumc.sasc.biopet.pipelines.shiva.ShivaVariantcallingTrait +import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.queue.QScript /** @@ -22,99 +22,13 @@ class ShivaVariantcalling(val root: Configurable) extends QScript with ShivaVari /** Will generate all available variantcallers */ override def callersList = { - new HaplotypeCallerGvcf :: - new HaplotypeCallerAllele :: - new UnifiedGenotyperAllele :: - new UnifiedGenotyper :: - new HaplotypeCaller :: + new HaplotypeCallerGvcf(this) :: + new HaplotypeCallerAllele(this) :: + new UnifiedGenotyperAllele(this) :: + new UnifiedGenotyper(this) :: + new HaplotypeCaller(this) :: super.callersList } - - /** Default mode for the haplotypecaller */ - class HaplotypeCaller extends Variantcaller { - val name = "haplotypecaller" - protected val defaultPrio = 1 - - def outputFile = new File(outputDir, namePrefix + ".haplotypecaller.vcf.gz") - - def addJobs() { - val hc = new nl.lumc.sasc.biopet.extensions.gatk.broad.HaplotypeCaller(qscript) - hc.input_file = inputBams - hc.out = outputFile - add(hc) - } - } - - /** Default mode for UnifiedGenotyper */ - class UnifiedGenotyper extends Variantcaller { - val name = "unifiedgenotyper" - protected val defaultPrio = 20 - - def outputFile = new File(outputDir, namePrefix + ".unifiedgenotyper.vcf.gz") - - def addJobs() { - val ug = new nl.lumc.sasc.biopet.extensions.gatk.broad.UnifiedGenotyper(qscript) - ug.input_file = inputBams - ug.out = outputFile - add(ug) - } - } - - /** Allele mode for Haplotypecaller */ - class HaplotypeCallerAllele extends Variantcaller { - val name = "haplotypecaller_allele" - protected val defaultPrio = 5 - - def outputFile = new File(outputDir, namePrefix + ".haplotypecaller_allele.vcf.gz") - - def addJobs() { - val hc = new nl.lumc.sasc.biopet.extensions.gatk.broad.HaplotypeCaller(qscript) - hc.input_file = inputBams - hc.out = outputFile - hc.alleles = config("input_alleles") - hc.genotyping_mode = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES - add(hc) - } - } - - /** Allele mode for GenotyperAllele */ - class UnifiedGenotyperAllele extends Variantcaller { - val name = "unifiedgenotyper_allele" - protected val defaultPrio = 9 - - def outputFile = new File(outputDir, namePrefix + ".unifiedgenotyper_allele.vcf.gz") - - def addJobs() { - val ug = new nl.lumc.sasc.biopet.extensions.gatk.broad.UnifiedGenotyper(qscript) - ug.input_file = inputBams - ug.out = outputFile - ug.alleles = config("input_alleles") - ug.genotyping_mode = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES - add(ug) - } - } - - /** Gvcf mode for haplotypecaller */ - class HaplotypeCallerGvcf extends Variantcaller { - val name = "haplotypecaller_gvcf" - protected val defaultPrio = 5 - - def outputFile = new File(outputDir, namePrefix + ".haplotypecaller_gvcf.vcf.gz") - - def addJobs() { - val gvcfFiles = for (inputBam <- inputBams) yield { - val hc = new nl.lumc.sasc.biopet.extensions.gatk.broad.HaplotypeCaller(qscript) - hc.input_file = List(inputBam) - hc.out = new File(outputDir, inputBam.getName.stripSuffix(".bam") + ".gvcf.vcf.gz") - hc.useGvcf() - add(hc) - hc.out - } - - val genotypeGVCFs = GenotypeGVCFs(qscript, gvcfFiles, outputFile) - add(genotypeGVCFs) - } - } } /** object to add default main method to pipeline */ diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCaller.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCaller.scala new file mode 100644 index 0000000000000000000000000000000000000000..f18b16812b48375bb7e1d57969ef9b1db7ba5691 --- /dev/null +++ b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCaller.scala @@ -0,0 +1,17 @@ +package nl.lumc.sasc.biopet.pipelines.gatk.variantcallers + +import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers.Variantcaller +import nl.lumc.sasc.biopet.utils.config.Configurable +import nl.lumc.sasc.biopet.extensions.gatk.broad + +/** Default mode for the haplotypecaller */ +class HaplotypeCaller(val root: Configurable) extends Variantcaller { + val name = "haplotypecaller" + protected def defaultPrio = 1 + + def biopetScript() { + val hc = broad.HaplotypeCaller(this, inputBams.values.toList, outputFile) + add(hc) + } +} + diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerAllele.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerAllele.scala new file mode 100644 index 0000000000000000000000000000000000000000..a4bff551c7bff4a6376d16d8b0401b008a6aad8e --- /dev/null +++ b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerAllele.scala @@ -0,0 +1,18 @@ +package nl.lumc.sasc.biopet.pipelines.gatk.variantcallers + +import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers.Variantcaller +import nl.lumc.sasc.biopet.utils.config.Configurable +import nl.lumc.sasc.biopet.extensions.gatk.broad + +/** Allele mode for Haplotypecaller */ +class HaplotypeCallerAllele(val root: Configurable) extends Variantcaller { + val name = "haplotypecaller_allele" + protected def defaultPrio = 5 + + def biopetScript() { + val hc = broad.HaplotypeCaller(this, inputBams.values.toList, outputFile) + hc.alleles = config("input_alleles") + hc.genotyping_mode = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES + add(hc) + } +} diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerGvcf.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerGvcf.scala new file mode 100644 index 0000000000000000000000000000000000000000..cf289cb789e4c5ad944b30eb0b71b163cc1115ff --- /dev/null +++ b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/HaplotypeCallerGvcf.scala @@ -0,0 +1,22 @@ +package nl.lumc.sasc.biopet.pipelines.gatk.variantcallers + +import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers.Variantcaller +import nl.lumc.sasc.biopet.utils.config.Configurable +import nl.lumc.sasc.biopet.extensions.gatk.broad + +/** Gvcf mode for haplotypecaller */ +class HaplotypeCallerGvcf(val root: Configurable) extends Variantcaller { + val name = "haplotypecaller_gvcf" + protected def defaultPrio = 5 + + def biopetScript() { + val gvcfFiles = for ((sample, inputBam) <- inputBams) yield { + val hc = broad.HaplotypeCaller.gvcf(this, inputBam, new File(outputDir, sample + ".gvcf.vcf.gz")) + add(hc) + hc.out + } + + val genotypeGVCFs = broad.GenotypeGVCFs(this, gvcfFiles.toList, outputFile) + add(genotypeGVCFs) + } +} diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyper.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyper.scala new file mode 100644 index 0000000000000000000000000000000000000000..b71273b284c153ed628565d23c8ca0dffd7bd010 --- /dev/null +++ b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyper.scala @@ -0,0 +1,16 @@ +package nl.lumc.sasc.biopet.pipelines.gatk.variantcallers + +import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers.Variantcaller +import nl.lumc.sasc.biopet.utils.config.Configurable +import nl.lumc.sasc.biopet.extensions.gatk.broad + +/** Default mode for UnifiedGenotyper */ +class UnifiedGenotyper(val root: Configurable) extends Variantcaller { + val name = "unifiedgenotyper" + protected def defaultPrio = 20 + + def biopetScript() { + val ug = broad.UnifiedGenotyper(this, inputBams.values.toList, outputFile) + add(ug) + } +} diff --git a/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyperAllele.scala b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyperAllele.scala new file mode 100644 index 0000000000000000000000000000000000000000..61bb63ae3f897cc29bd37fc4c8af53f874faad28 --- /dev/null +++ b/protected/biopet-gatk-pipelines/src/main/scala/nl/lumc/sasc/biopet/pipelines/gatk/variantcallers/UnifiedGenotyperAllele.scala @@ -0,0 +1,18 @@ +package nl.lumc.sasc.biopet.pipelines.gatk.variantcallers + +import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers.Variantcaller +import nl.lumc.sasc.biopet.utils.config.Configurable +import nl.lumc.sasc.biopet.extensions.gatk.broad + +/** Allele mode for GenotyperAllele */ +class UnifiedGenotyperAllele(val root: Configurable) extends Variantcaller { + val name = "unifiedgenotyper_allele" + protected def defaultPrio = 9 + + def biopetScript() { + val ug = broad.UnifiedGenotyper(this, inputBams.values.toList, outputFile) + ug.alleles = config("input_alleles") + ug.genotyping_mode = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES + add(ug) + } +} diff --git a/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcallingTest.scala b/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcallingTest.scala index bd78727cad7e431d9af8dceaa8a4f6aef4c1d163..14207e5a829de7fd7cae4738a10fd9f6b28c5546 100644 --- a/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcallingTest.scala +++ b/protected/biopet-gatk-pipelines/src/test/scala/nl/lumc/sasc/biopet/pipelines/gatk/ShivaVariantcallingTest.scala @@ -76,7 +76,7 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers { val map = Map("variantcallers" -> callers.toList) val pipeline = initPipeline(map) - pipeline.inputBams = (for (n <- 1 to bams) yield ShivaVariantcallingTest.inputTouch("bam_" + n + ".bam")).toList + pipeline.inputBams = (for (n <- 1 to bams) yield n.toString -> ShivaVariantcallingTest.inputTouch("bam_" + n + ".bam")).toMap val illegalArgumentException = pipeline.inputBams.isEmpty || (!raw && !bcftools && diff --git a/public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsMultiTable.ssp b/public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsMultiTable.ssp index f1bae9632f6bb046360496b0bf924515b791f794..af29b48fab64495b517427e08b416f161f7443cd 100644 --- a/public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsMultiTable.ssp +++ b/public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsMultiTable.ssp @@ -10,7 +10,7 @@ <%@ var rootPath: String %> <%@ var outputDir: File %> <%@ var metricsTag: String = "bammetrics" %> -<%@ var target: String %> +<%@ var target: Option[String] %> #{ val samples = sampleId match { case Some(sample) => List(sample.toString) @@ -46,7 +46,7 @@ #if (libs.head != libId) <tr> #end #if (!sampleLevel) <td><a href="${rootPath}Samples/${sample}/Libraries/${libId}/index.html">${libId}</a></td> #end #{ - val prefixPath = List("samples", sample) ::: (if (libId.isEmpty) Nil else List("libraries", libId)) ::: List(metricsTag, "stats", target + "_cov_stats", "coverage", "_all") + val prefixPath = List("samples", sample) ::: (if (libId.isEmpty) Nil else List("libraries", libId)) ::: List(metricsTag, "stats", target.get + "_cov_stats", "coverage", "_all") val total = summary.getValue((prefixPath ::: List("biopet_flagstat", "All")):_*).getOrElse(0L).asInstanceOf[Long] val mapped = summary.getValue((prefixPath ::: List("biopet_flagstat", "Mapped")):_*).getOrElse(0L).asInstanceOf[Long] val duplicates = summary.getValue((prefixPath ::: List("biopet_flagstat", "Duplicates")):_*).getOrElse(0L).asInstanceOf[Long] diff --git a/public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp b/public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp index 8aea9602a65fd2282e38909b170722151ac13603..a89c4eb4cb5262adce57afe546f89db2e49b6849 100644 --- a/public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp +++ b/public/bammetrics/src/main/resources/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp @@ -8,13 +8,13 @@ <%@ var libId: Option[String] = None %> <%@ var outputDir: File %> <%@ var metricsTag: String = "bammetrics" %> -<%@ var target: String %> +<%@ var target: Option[String] %> #{ - val originalPlot = new File(summary.getValue(sampleId, libId, metricsTag, "files", target + "_cov_stats", "plot", "path") + val originalPlot = new File(summary.getValue(sampleId, libId, metricsTag, "files", target.get + "_cov_stats", "plot", "path") .getOrElse(throw new IllegalArgumentException("No plot found in summary")).toString) - val plot = new File(outputDir, target + "_cov_stats.png") + val plot = new File(outputDir, target.get + "_cov_stats.png") - val values = summary.getValue(sampleId, libId, metricsTag, "stats", target + "_cov_stats", "coverage", "_all") + val values = summary.getValue(sampleId, libId, metricsTag, "stats", target.get + "_cov_stats", "coverage", "_all") .getOrElse(throw new IllegalArgumentException("No plot found in summary")).asInstanceOf[Map[String, Any]] if (originalPlot.exists()) IoUtils.copyFile(originalPlot, plot) diff --git a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala b/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala index f0c9478603de90f9401241bfcebf797d24196f52..61f4243261df710a4ef7c19780a7bcc13146e790 100644 --- a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala +++ b/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala @@ -30,19 +30,14 @@ import org.broadinstitute.gatk.queue.QScript class BamMetrics(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag - with Reference { + with Reference + with TargetRegions { def this() = this(null) @Input(doc = "Bam File", shortName = "BAM", required = true) var inputBam: File = _ - /** Bed files for region of interests */ - var roiBedFiles: List[File] = config("regions_of_interest", Nil) - - /** Bed of amplicon that is used */ - var ampliconBedFile: Option[File] = config("amplicon_bed") - /** Settings for CollectRnaSeqMetrics */ var rnaMetricsSettings: Map[String, String] = Map() var transcriptRefFlatFile: Option[File] = config("transcript_refflat") @@ -145,7 +140,7 @@ class BamMetrics(val root: Configurable) extends QScript val pcrMetrics = CollectTargetedPcrMetrics(this, inputBam, ampIntervals, ampIntervals :: roiIntervals.map(_.intervals), outputDir) add(pcrMetrics) - addSummarizable(chsMetrics, "targeted_pcr_metrics") + addSummarizable(pcrMetrics, "targeted_pcr_metrics") Intervals(bedFile, ampIntervals) } diff --git a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BammetricsReport.scala b/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BammetricsReport.scala index 78377793adc53b335e60300afece9f2a49cf3fd6..4da9ec9e5f878262e66473d559cc209580674992 100644 --- a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BammetricsReport.scala +++ b/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BammetricsReport.scala @@ -70,7 +70,7 @@ object BammetricsReport extends ReportBuilder { if (targets.isEmpty) List() else List("Targets" -> ReportPage( List(), - targets.map(t => t -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp", Map("target" -> t))), + targets.map(t => t -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp", Map("target" -> Some(t)))), Map())), List( "Summary" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp"), diff --git a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/TargetRegions.scala b/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/TargetRegions.scala new file mode 100644 index 0000000000000000000000000000000000000000..01c6568c848479dc429b68c771b7f987cc4c9bd6 --- /dev/null +++ b/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/TargetRegions.scala @@ -0,0 +1,16 @@ +package nl.lumc.sasc.biopet.pipelines.bammetrics + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable + +/** + * Created by pjvan_thof on 11/20/15. + */ +trait TargetRegions extends Configurable { + /** Bed files for region of interests */ + var roiBedFiles: List[File] = config("regions_of_interest", Nil) + + /** Bed of amplicon that is used */ + var ampliconBedFile: Option[File] = config("amplicon_bed") +} diff --git a/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTrait.scala b/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTrait.scala index 7da7f9c3ce26248867468bb59398533a87cd007e..675d4d7557be2edf84c41494edfaa30af9d13a78 100644 --- a/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTrait.scala +++ b/public/basty/src/main/scala/nl/lumc/sasc/biopet/pipelines/basty/BastyTrait.scala @@ -27,9 +27,10 @@ import nl.lumc.sasc.biopet.extensions.{ Cat, Raxml, RunGubbins } import nl.lumc.sasc.biopet.pipelines.shiva.{ Shiva, ShivaTrait } import nl.lumc.sasc.biopet.extensions.tools.BastyGenerateFasta import nl.lumc.sasc.biopet.utils.ConfigUtils +import org.broadinstitute.gatk.queue.QScript trait BastyTrait extends MultiSampleQScript { - qscript => + qscript: QScript => case class FastaOutput(variants: File, consensus: File, consensusVariants: File) diff --git a/public/biopet-core/pom.xml b/public/biopet-core/pom.xml index 378295317cb5b79835f6023b4b56a3a455a4d8fe..c2064007a268ffa2a822eb4565a95acb52448a40 100644 --- a/public/biopet-core/pom.xml +++ b/public/biopet-core/pom.xml @@ -57,7 +57,7 @@ <dependency> <groupId>org.broadinstitute.gatk</groupId> <artifactId>gatk-queue</artifactId> - <version>3.4</version> + <version>3.5</version> <exclusions> <exclusion> <groupId>org.broadinstitute.gatk</groupId> @@ -68,7 +68,7 @@ <dependency> <groupId>org.broadinstitute.gatk</groupId> <artifactId>gatk-queue-extensions-public</artifactId> - <version>3.4</version> + <version>3.5</version> </dependency> <dependency> <groupId>org.scalatra.scalate</groupId> diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala index 66c54134218b47a1978fb21efe5393ea782b91a5..c95299b0d4c47ef189a22fce2fcd1dfaff9183f8 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/BiopetQScript.scala @@ -17,17 +17,18 @@ package nl.lumc.sasc.biopet.core import java.io.File +import nl.lumc.sasc.biopet.core.summary.SummaryQScript import nl.lumc.sasc.biopet.utils.config.Configurable import nl.lumc.sasc.biopet.core.report.ReportBuilderExtension import nl.lumc.sasc.biopet.utils.Logging -import org.broadinstitute.gatk.queue.QSettings +import org.broadinstitute.gatk.queue.{ QScript, QSettings } import org.broadinstitute.gatk.queue.function.QFunction import org.broadinstitute.gatk.queue.function.scattergather.ScatterGatherableFunction import org.broadinstitute.gatk.queue.util.{ Logging => GatkLogging } import org.broadinstitute.gatk.utils.commandline.Argument /** Base for biopet pipeline */ -trait BiopetQScript extends Configurable with GatkLogging { +trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript => @Argument(doc = "JSON / YAML config file(s)", fullName = "config_file", shortName = "config", required = false) val configfiles: List[File] = Nil @@ -125,6 +126,24 @@ trait BiopetQScript extends Configurable with GatkLogging { function.isIntermediate = isIntermediate add(function) } + + def add(subPipeline: QScript): Unit = { + subPipeline.qSettings = this.qSettings + subPipeline match { + case that: SummaryQScript => + that.init() + that.biopetScript() + this match { + case s: SummaryQScript => s.addSummaryQScript(that) + case _ => + } + case that: BiopetQScript => + that.init() + that.biopetScript() + case _ => subPipeline.script + } + addAll(subPipeline.functions) + } } object BiopetQScript { diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala index 81c992b4d5911af0b9a808e7750d15f1078c44e9..fa4f40645b49efee9ba5fb9c42a05c1c10006b0d 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/MultiSampleQScript.scala @@ -19,11 +19,11 @@ import java.io.File import nl.lumc.sasc.biopet.core.summary.{ Summarizable, SummaryQScript } import nl.lumc.sasc.biopet.utils.{ Logging, ConfigUtils } +import org.broadinstitute.gatk.queue.QScript import org.broadinstitute.gatk.utils.commandline.Argument /** This trait creates a structured way of use multisample pipelines */ -trait MultiSampleQScript extends SummaryQScript { - qscript => +trait MultiSampleQScript extends SummaryQScript { qscript: QScript => @Argument(doc = "Only Sample", shortName = "s", required = false, fullName = "sample") private[core] val onlySamples: List[String] = Nil diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Reference.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Reference.scala index bcdabfaeb1f95ed3bade6df058b3b9ae2b291f96..152230c8a41c18e28b69e3c5fb9d10febe8eec68 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Reference.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/Reference.scala @@ -132,7 +132,10 @@ object Reference { * @param fastaFile Fasta file */ def requireDict(fastaFile: File): Unit = { - val dict = new File(fastaFile.getAbsolutePath.stripSuffix(".fa").stripSuffix(".fasta") + ".dict") + val dict = new File(fastaFile.getAbsolutePath + .stripSuffix(".fna") + .stripSuffix(".fa") + .stripSuffix(".fasta") + ".dict") if (!dict.exists()) Logging.addError("Reference is missing a dict file") } } diff --git a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScript.scala b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScript.scala index a1568a9b82f0f16ee691d755b0841c4038fcb7b0..0e947ba6c7d3bbf144c98feb1b7cb21601c6629f 100644 --- a/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScript.scala +++ b/public/biopet-core/src/main/scala/nl/lumc/sasc/biopet/core/summary/SummaryQScript.scala @@ -19,6 +19,7 @@ import java.io.File import nl.lumc.sasc.biopet.core._ import nl.lumc.sasc.biopet.core.extensions.{ CheckChecksum, Md5sum } +import org.broadinstitute.gatk.queue.QScript import scala.collection.mutable @@ -27,7 +28,7 @@ import scala.collection.mutable * * Created by pjvan_thof on 2/14/15. */ -trait SummaryQScript extends BiopetQScript { qscript => +trait SummaryQScript extends BiopetQScript { qscript: QScript => /** Key is sample/library, None is sample or library is not applicable */ private[summary] var summarizables: Map[(String, Option[String], Option[String]), List[Summarizable]] = Map() @@ -91,8 +92,11 @@ trait SummaryQScript extends BiopetQScript { qscript => summaryQScripts :+= summaryQScript } + private var addedJobs = false + /** Add jobs to qscript to execute summary, also add checksum jobs */ def addSummaryJobs(): Unit = { + if (addedJobs) throw new IllegalStateException("Summary jobs for this QScript are already executed") val writeSummary = new WriteSummary(this) def addChecksum(file: File): Unit = { @@ -159,6 +163,8 @@ trait SummaryQScript extends BiopetQScript { qscript => logger.info("Write summary is skipped because sample flag is used") case _ => add(writeSummary) } + + addedJobs = true } } diff --git a/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/PipelineCommandTest.scala b/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/PipelineCommandTest.scala index 295164106beebe99b2eee2398bbd79c6aff2b7ce..fb8c5a422359040ba6bf53ec78c78020853a470e 100644 --- a/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/PipelineCommandTest.scala +++ b/public/biopet-core/src/test/scala/nl/lumc/sasc/biopet/core/PipelineCommandTest.scala @@ -20,6 +20,8 @@ import org.scalatest.Matchers import org.scalatest.testng.TestNGSuite import org.testng.annotations.Test +import scala.language.reflectiveCalls + /** * Created by pjvanthof on 17/11/15. */ diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala index e8f9c2caf43ff1163218a5814bfa0ba45f0b6ceb..6e06894f916a5206bcac748d924eb8f3a9f51c53 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Cutadapt.scala @@ -43,6 +43,9 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su def versionCommand = executable + " --version" def versionRegex = """(.*)""".r + /** Name of the key containing clipped adapters information in the summary stats. */ + def adaptersStatsName = "adapters" + var default_clip_mode: String = config("default_clip_mode", default = "3") var opt_adapter: Set[String] = config("adapter", default = Nil) var opt_anywhere: Set[String] = config("anywhere", default = Nil) @@ -89,7 +92,7 @@ class Cutadapt(val root: Configurable) extends BiopetCommandLineFunction with Su Map("num_reads_affected" -> stats("trimmed"), "num_reads_discarded_too_short" -> stats("tooshort"), "num_reads_discarded_too_long" -> stats("toolong"), - "adapters" -> adapter_stats.toMap + adaptersStatsName -> adapter_stats.toMap ) } diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Tabix.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Tabix.scala index daaaf73d96c7d190c61bbdbc45331115eb388aed..a94561d8bc0c2aec368148b20e159e08e2e56ef0 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Tabix.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Tabix.scala @@ -56,7 +56,7 @@ class Tabix(val root: Configurable) extends BiopetCommandLineFunction with Versi var l: Boolean = config("l", default = false) var f: Boolean = config("f", default = false) - executable = config("exe", default = "tabix") + executable = config("exe", default = "tabix", freeVar = false) def versionCommand = executable def versionRegex = """Version: (.*)""".r diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/Bcftools.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/Bcftools.scala index 6e7bf4146464e9c6f69e7759569985fce7204e23..2291df063b26293132bbd95c9f8ccf56c8ddfd13 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/Bcftools.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bcftools/Bcftools.scala @@ -19,7 +19,7 @@ import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction } abstract class Bcftools extends BiopetCommandLineFunction with Version { override def subPath = "bcftools" :: super.subPath - executable = config("exe", default = "bcftools") + executable = config("exe", default = "bcftools", submodule = "bcftools", freeVar = false) def versionCommand = executable def versionRegex = """Version: (.*)""".r override def versionExitcode = List(0, 1) diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/breakdancer/Breakdancer.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/breakdancer/Breakdancer.scala deleted file mode 100644 index 9f662e3d7830f542a7361c11c5803385afef44ac..0000000000000000000000000000000000000000 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/breakdancer/Breakdancer.scala +++ /dev/null @@ -1,80 +0,0 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ -package nl.lumc.sasc.biopet.extensions.breakdancer - -import java.io.File - -import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.core.{ Reference, BiopetQScript, PipelineCommand } -import org.broadinstitute.gatk.queue.QScript - -/// Breakdancer is actually a mini pipeline executing binaries from the breakdancer package -class Breakdancer(val root: Configurable) extends QScript with BiopetQScript with Reference { - def this() = this(null) - - @Input(doc = "Input file (bam)") - var input: File = _ - - @Argument(doc = "Work directory") - var workDir: File = _ - - var deps: List[File] = Nil - - @Output(doc = "Breakdancer config") - lazy val configfile: File = { - new File(workDir, input.getName.substring(0, input.getName.lastIndexOf(".bam")) + ".breakdancer.cfg") - } - @Output(doc = "Breakdancer raw output") - lazy val outputraw: File = { - new File(workDir, input.getName.substring(0, input.getName.lastIndexOf(".bam")) + ".breakdancer.tsv") - } - @Output(doc = "Breakdancer VCF output") - lazy val outputvcf: File = { - new File(workDir, input.getName.substring(0, input.getName.lastIndexOf(".bam")) + ".breakdancer.vcf") - } - - override def init(): Unit = { - } - - def biopetScript() { - // read config and set all parameters for the pipeline - logger.info("Starting Breakdancer configuration") - - val bdcfg = BreakdancerConfig(this, input, this.configfile) - bdcfg.deps = this.deps - outputFiles += ("cfg" -> bdcfg.output) - add(bdcfg) - - val breakdancer = BreakdancerCaller(this, bdcfg.output, this.outputraw) - add(breakdancer) - outputFiles += ("tsv" -> breakdancer.output) - - val bdvcf = BreakdancerVCF(this, breakdancer.output, this.outputvcf) - add(bdvcf) - outputFiles += ("vcf" -> bdvcf.output) - } -} - -object Breakdancer extends PipelineCommand { - def apply(root: Configurable, input: File, runDir: File): Breakdancer = { - val breakdancer = new Breakdancer(root) - breakdancer.input = input - breakdancer.workDir = runDir - breakdancer.init() - breakdancer.biopetScript() - breakdancer - } -} \ No newline at end of file diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/delly/Delly.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/delly/Delly.scala deleted file mode 100644 index b5fabe35e56555ba1723ab9270c04578b6bde23e..0000000000000000000000000000000000000000 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/delly/Delly.scala +++ /dev/null @@ -1,124 +0,0 @@ -/** - * Biopet is built on top of GATK Queue for building bioinformatic - * pipelines. It is mainly intended to support LUMC SHARK cluster which is running - * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) - * should also be able to execute Biopet tools and pipelines. - * - * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center - * - * Contact us at: sasc@lumc.nl - * - * A dual licensing mode is applied. The source code within this project that are - * not part of GATK Queue is freely available for non-commercial use under an AGPL - * license; For commercial users or users who do not want to follow the AGPL - * license, please contact us to obtain a separate license. - */ -package nl.lumc.sasc.biopet.extensions.delly - -import java.io.File - -import nl.lumc.sasc.biopet.core.{ Reference, BiopetQScript, PipelineCommand } -import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.extensions.Ln -import org.broadinstitute.gatk.queue.QScript -import org.broadinstitute.gatk.queue.extensions.gatk.CatVariants - -class Delly(val root: Configurable) extends QScript with BiopetQScript with Reference { - def this() = this(null) - - @Input(doc = "Input file (bam)") - var input: File = _ - - var workDir: File = _ - - @Output(doc = "Delly result VCF") - var outputVcf: File = _ - - var outputName: String = _ - - // select the analysis types DEL,DUP,INV,TRA - var del: Boolean = config("DEL", default = true) - var dup: Boolean = config("DUP", default = true) - var inv: Boolean = config("INV", default = true) - var tra: Boolean = config("TRA", default = true) - - override def init(): Unit = { - if (outputName == null) outputName = input.getName.stripSuffix(".bam") - if (outputVcf == null) outputVcf = new File(workDir, outputName + ".delly.vcf") - } - - def biopetScript() { - // write the pipeline here - logger.info("Configuring Delly pipeline") - var outputFiles: Map[String, File] = Map() - var vcfFiles: Map[String, File] = Map() - - /// start delly and then copy the vcf into the root directory "<sample>.delly/" - if (del) { - val delly = new DellyCaller(this) - delly.input = input - delly.analysistype = "DEL" - delly.outputvcf = new File(workDir, outputName + ".delly.del.vcf") - add(delly) - vcfFiles += ("DEL" -> delly.outputvcf) - } - if (dup) { - val delly = new DellyCaller(this) - delly.input = input - delly.analysistype = "DUP" - delly.outputvcf = new File(workDir, outputName + ".delly.dup.vcf") - add(delly) - vcfFiles += ("DUP" -> delly.outputvcf) - } - if (inv) { - val delly = new DellyCaller(this) - delly.input = input - delly.analysistype = "INV" - delly.outputvcf = new File(workDir, outputName + ".delly.inv.vcf") - add(delly) - vcfFiles += ("INV" -> delly.outputvcf) - } - if (tra) { - val delly = new DellyCaller(this) - delly.input = input - delly.analysistype = "TRA" - delly.outputvcf = new File(workDir, outputName + ".delly.tra.vcf") - // vcfFiles += ("TRA" -> delly.outputvcf) - add(delly) - } - // we need to merge the vcf's - val finalVCF = if (vcfFiles.size > 1) { - // do merging - // CatVariants is a $org.broadinstitute.gatk.utils.commandline.CommandLineProgram$; - //TODO: convert to biopet extension - val variants = new CatVariants() - variants.variant = vcfFiles.values.toList - variants.outputFile = this.outputVcf - variants.reference = referenceFasta() - // add the job - //add(variants) - Some(outputVcf) - } else if (vcfFiles.size == 1) { - // TODO: pretify this - val ln = Ln(this, vcfFiles.head._2, this.outputVcf, relative = true) - //add(ln) - Some(ln.output) - } else None - - finalVCF.foreach(file => outputFiles += ("vcf" -> file)) - } -} - -object Delly extends PipelineCommand { - override val pipeline = "/nl/lumc/sasc/biopet/extensions/svcallers/Delly/Delly.class" - - def apply(root: Configurable, input: File, workDir: File): Delly = { - val dellyPipeline = new Delly(root) - dellyPipeline.input = input - dellyPipeline.workDir = workDir - dellyPipeline.init() - dellyPipeline.biopetScript() - dellyPipeline - } - -} \ No newline at end of file diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariants.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariants.scala new file mode 100644 index 0000000000000000000000000000000000000000..d5e3fffcfaeb7d842743ecc9e8896e7de860bbed --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CatVariants.scala @@ -0,0 +1,55 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.extensions.gatk + +import java.io.File + +import nl.lumc.sasc.biopet.core.{ Reference, BiopetJavaCommandLineFunction } +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } + +class CatVariants(val root: Configurable) extends BiopetJavaCommandLineFunction with Reference { + + javaMainClass = classOf[org.broadinstitute.gatk.tools.CatVariants].getClass.getName + + @Input(required = true) + var inputFiles: List[File] = Nil + + @Output(required = true) + var outputFile: File = null + + @Input + var reference: File = null + + override def beforeGraph(): Unit = { + super.beforeGraph() + if (reference == null) reference = referenceFasta() + } + + override def cmdLine = super.cmdLine + + repeat("-V", inputFiles) + + required("-out", outputFile) + + required("-R", reference) +} + +object CatVariants { + def apply(root: Configurable, input: List[File], output: File): CatVariants = { + val cv = new CatVariants(root) + cv.inputFiles = input + cv.outputFile = output + cv + } +} \ No newline at end of file diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala index 8291e667b67df0b03a10114f6386670f18d572e4..cc230a6ad48e50f782f773a54d1537a72ad85e2d 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/CombineVariants.scala @@ -49,6 +49,7 @@ class CombineVariants(val root: Configurable) extends Gatk { override def beforeGraph(): Unit = { super.beforeGraph() + if (outputFile.getName.endsWith(".vcf.gz")) outputFiles :+= new File(outputFile.getAbsolutePath + ".tbi") genotypeMergeOptions match { case Some("UNIQUIFY") | Some("PRIORITIZE") | Some("UNSORTED") | Some("REQUIRE_UNIQUE") | None => case _ => throw new IllegalArgumentException("Wrong option for genotypeMergeOptions") diff --git a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GatkGeneral.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GatkGeneral.scala similarity index 98% rename from protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GatkGeneral.scala rename to public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GatkGeneral.scala index 11b59f2fa52bf56e455475e2e719a746f6ff41ed..f4efc801d2ee4d60c6ffb949dd602334a8491836 100644 --- a/protected/biopet-gatk-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GatkGeneral.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/gatk/broad/GatkGeneral.scala @@ -10,8 +10,6 @@ import org.broadinstitute.gatk.engine.phonehome.GATKRunReport import org.broadinstitute.gatk.queue.extensions.gatk.CommandLineGATK trait GatkGeneral extends CommandLineGATK with CommandLineResources with Reference with Version { - memoryLimit = Option(3) - var executable: String = config("java", default = "java", submodule = "java", freeVar = false) override def subPath = "gatk" :: super.subPath diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectGcBiasMetrics.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectGcBiasMetrics.scala index 0479bbb754d62a13e4e7d99bf6ac949b8cd69aec..087614a6a81367d6826e6d65ffa97d4610e3cd5d 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectGcBiasMetrics.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/CollectGcBiasMetrics.scala @@ -35,7 +35,7 @@ class CollectGcBiasMetrics(val root: Configurable) extends Picard with Summariza @Output(doc = "Output chart", required = false) var outputChart: File = _ - @Output(doc = "Output summary", required = false) + @Output(doc = "Output summary", required = true) var outputSummary: File = _ @Input(doc = "Reference file", required = false) @@ -67,7 +67,7 @@ class CollectGcBiasMetrics(val root: Configurable) extends Picard with Summariza required("OUTPUT=", output, spaceSeparated = false) + optional("CHART_OUTPUT=", outputChart, spaceSeparated = false) + required("REFERENCE_SEQUENCE=", reference, spaceSeparated = false) + - optional("SUMMARY_OUTPUT=", outputSummary, spaceSeparated = false) + + required("SUMMARY_OUTPUT=", outputSummary, spaceSeparated = false) + optional("WINDOW_SIZE=", windowSize, spaceSeparated = false) + optional("MINIMUM_GENOME_FRACTION=", minGenomeFraction, spaceSeparated = false) + conditional(assumeSorted, "ASSUME_SORTED=TRUE") + @@ -86,6 +86,7 @@ object CollectGcBiasMetrics { val collectGcBiasMetrics = new CollectGcBiasMetrics(root) collectGcBiasMetrics.input :+= input collectGcBiasMetrics.output = new File(outputDir, input.getName.stripSuffix(".bam") + ".gcbiasmetrics") + collectGcBiasMetrics.outputSummary = new File(outputDir, input.getName.stripSuffix(".bam") + ".gcbiasmetrics.summary") collectGcBiasMetrics } } diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/Samtools.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/Samtools.scala index b1f545fb321f79ab2b5605a6e454dee0586ff2eb..f0703990369edb6791ffb11b61bdbbf0927a270a 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/Samtools.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/Samtools.scala @@ -20,7 +20,7 @@ import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction } /** General class for samtools extensions */ abstract class Samtools extends BiopetCommandLineFunction with Version { override def subPath = "samtools" :: super.subPath - executable = config("exe", default = "samtools") + executable = config("exe", default = "samtools", submodule = "samtools", freeVar = false) def versionCommand = executable def versionRegex = """Version: (.*)""".r override def versionExitcode = List(0, 1) diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/vt/Vt.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/vt/Vt.scala new file mode 100644 index 0000000000000000000000000000000000000000..08c04857a60297d0aa8c268764d7bff44a3866ac --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/vt/Vt.scala @@ -0,0 +1,28 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.extensions.vt + +import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction + +/** + * General vt extension + * + * Created by pjvan_thof on 1/16/15. + */ +abstract class Vt extends BiopetCommandLineFunction { + override def subPath = "vt" :: super.subPath + executable = config("exe", default = "vt") +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/vt/VtDecompose.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/vt/VtDecompose.scala new file mode 100644 index 0000000000000000000000000000000000000000..c5acb62ff24dbf189318122453a8f8db92f24c25 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/vt/VtDecompose.scala @@ -0,0 +1,32 @@ +package nl.lumc.sasc.biopet.extensions.vt + +import java.io.File + +import nl.lumc.sasc.biopet.core.{ Reference, Version } +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Input, Output } + +/** + * Created by pjvanthof on 20/11/15. + */ +class VtDecompose(val root: Configurable) extends Vt with Version with Reference { + def versionRegex = """decompose (.*)""".r + override def versionExitcode = List(0, 1) + def versionCommand = executable + " decompose" + + @Input(required = true) + var inputVcf: File = _ + + @Output(required = true) + var outputVcf: File = _ + + var intervalsFile: Option[File] = config("intervals_file") + + val smartDecompose: Boolean = config("smart_decompose", default = false) + + def cmdLine = required(executable) + required("decompose") + + required("-o", outputVcf) + + optional("-I", intervalsFile) + + conditional(smartDecompose, "-s") + + required(inputVcf) +} \ No newline at end of file diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/vt/VtNormalize.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/vt/VtNormalize.scala new file mode 100644 index 0000000000000000000000000000000000000000..26812e2636a4bd794dc8c74e47c73fb137a3740d --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/vt/VtNormalize.scala @@ -0,0 +1,40 @@ +package nl.lumc.sasc.biopet.extensions.vt + +import java.io.File + +import nl.lumc.sasc.biopet.core.{ Reference, Version } +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Output, Input } + +/** + * Created by pjvanthof on 20/11/15. + */ +class VtNormalize(val root: Configurable) extends Vt with Version with Reference { + def versionRegex = """normalize (.*)""".r + override def versionExitcode = List(0, 1) + def versionCommand = executable + " normalize" + + @Input(required = true) + var inputVcf: File = _ + + @Output(required = true) + var outputVcf: File = _ + + var windowSize: Option[Int] = config("windows_size") + + var intervalsFile: Option[File] = config("intervals_file") + + var reference: File = _ + + override def beforeGraph(): Unit = { + super.beforeGraph() + reference = referenceFasta() + } + + def cmdLine = required(executable) + required("normalize") + + required("-o", outputVcf) + + optional("-w", windowSize) + + optional("-I", intervalsFile) + + required("-r", reference) + + required(inputVcf) +} \ No newline at end of file diff --git a/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutablePublic.scala b/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutablePublic.scala index 6dbf83580e16f3c8b19d046142d804052b7ebb80..a7e1ea63f9a07ccdaf62ebd753b8ee5d46b94e27 100644 --- a/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutablePublic.scala +++ b/public/biopet-public-package/src/main/scala/nl/lumc/sasc/biopet/BiopetExecutablePublic.scala @@ -15,6 +15,7 @@ */ package nl.lumc.sasc.biopet +import nl.lumc.sasc.biopet.pipelines.shiva.ShivaVariantcalling import nl.lumc.sasc.biopet.utils.{ BiopetExecutable, MainCommand } object BiopetExecutablePublic extends BiopetExecutable { @@ -34,7 +35,7 @@ object BiopetExecutablePublic extends BiopetExecutable { def pipelines: List[MainCommand] = List( nl.lumc.sasc.biopet.pipelines.shiva.Shiva, - nl.lumc.sasc.biopet.pipelines.shiva.ShivaVariantcalling, + ShivaVariantcalling, nl.lumc.sasc.biopet.pipelines.basty.Basty ) ::: publicPipelines diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MpileupToVcf.scala b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MpileupToVcf.scala index b6f90cc6261b3677340c51c5d1197a50d901181e..6e7f6c86d0c834b193525aa4261cd4a023217412 100644 --- a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MpileupToVcf.scala +++ b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/MpileupToVcf.scala @@ -38,6 +38,9 @@ class MpileupToVcf(val root: Configurable) extends ToolCommandFunction with Refe @Output(doc = "Output tag library", shortName = "output", required = true) var output: File = _ + @Output + private var outputIndex: File = _ + var minDP: Option[Int] = config("min_dp") var minAP: Option[Int] = config("min_ap") var homoFraction: Option[Double] = config("homoFraction") @@ -50,6 +53,7 @@ class MpileupToVcf(val root: Configurable) extends ToolCommandFunction with Refe override def beforeGraph() { super.beforeGraph() if (reference == null) reference = referenceFasta().getAbsolutePath + if (output.getName.endsWith(".vcf.gz")) outputIndex = new File(output.getAbsolutePath + ".tbi") val samtoolsMpileup = new SamtoolsMpileup(this) } diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfFilter.scala b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfFilter.scala index 0c2639c7b1bc0e79cae7486a1775f1039d350a15..df131db588a2709d238213968c18aaa86ffb3962 100644 --- a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfFilter.scala +++ b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfFilter.scala @@ -30,6 +30,9 @@ class VcfFilter(val root: Configurable) extends ToolCommandFunction { @Output(doc = "Output vcf", shortName = "o", required = false) var outputVcf: File = _ + @Output + var outputVcfIndex: File = _ + var minSampleDepth: Option[Int] = config("min_sample_depth") var minTotalDepth: Option[Int] = config("min_total_depth") var minAlternateDepth: Option[Int] = config("min_alternate_depth") @@ -38,6 +41,11 @@ class VcfFilter(val root: Configurable) extends ToolCommandFunction { override def defaultCoreMemory = 3.0 + override def beforeGraph(): Unit = { + super.beforeGraph() + if (outputVcf.getName.endsWith("vcf.gz")) outputVcfIndex = new File(outputVcf.getAbsolutePath + ".tbi") + } + override def cmdLine = super.cmdLine + required("-I", inputVcf) + required("-o", outputVcf) + diff --git a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfStats.scala b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfStats.scala index 4db2b44745416068a95c0da190a541f9c25479ea..bf52dbea4111a1dfa1227addada89392336a1834 100644 --- a/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfStats.scala +++ b/public/biopet-tools-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/tools/VcfStats.scala @@ -54,6 +54,7 @@ class VcfStats(val root: Configurable) extends ToolCommandFunction with Summariz var allInfoTags = false var allGenotypeTags = false var reference: File = _ + var intervals: Option[File] = None override def beforeGraph(): Unit = { reference = referenceFasta() @@ -76,7 +77,8 @@ class VcfStats(val root: Configurable) extends ToolCommandFunction with Summariz repeat("--genotypeTag", genotypeTags) + conditional(allInfoTags, "--allInfoTags") + conditional(allGenotypeTags, "--allGenotypeTags") + - required("-R", reference) + required("-R", reference) + + optional("--intervals", intervals) /** Returns general stats to the summary */ def summaryStats: Map[String, Any] = { diff --git a/public/biopet-utils/pom.xml b/public/biopet-utils/pom.xml index 6875ae4b93edcc15da0ea333f7cd86be79ffef9c..8b70e105c01ea1fd2eda061129fd4cdc8ae1ff3c 100644 --- a/public/biopet-utils/pom.xml +++ b/public/biopet-utils/pom.xml @@ -62,7 +62,7 @@ <dependency> <groupId>com.github.samtools</groupId> <artifactId>htsjdk</artifactId> - <version>1.132</version> + <version>1.141</version> </dependency> <dependency> <groupId>org.scala-lang</groupId> diff --git a/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala new file mode 100644 index 0000000000000000000000000000000000000000..f13230534af0a61fa29f68daa8d48c81a55e1f23 --- /dev/null +++ b/public/biopet-utils/src/main/scala/nl/lumc/sasc/biopet/utils/BamUtils.scala @@ -0,0 +1,33 @@ +package nl.lumc.sasc.biopet.utils + +import java.io.File + +import htsjdk.samtools.SamReaderFactory + +import scala.collection.JavaConversions._ + +/** + * Created by pjvan_thof on 11/19/15. + */ +object BamUtils { + + /** + * This method will convert a list of bam files to a Map[<sampleName>, <bamFile>] + * + * Each sample may only be once in the list + * + * @throws IllegalArgumentException + * @param bamFiles input bam files + * @return + */ + def sampleBamMap(bamFiles: List[File]): Map[String, File] = { + val temp = bamFiles.map { file => + val inputSam = SamReaderFactory.makeDefault.open(file) + val samples = inputSam.getFileHeader.getReadGroups.map(_.getSample).distinct + if (samples.size == 1) samples.head -> file + else throw new IllegalArgumentException("Bam contains multiple sample IDs: " + file) + } + if (temp.map(_._1).distinct.size != temp.size) throw new IllegalArgumentException("Samples has been found twice") + temp.toMap + } +} diff --git a/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala b/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala index de37b352ecf8a9091ee67d1b455e85087156f642..a31ca43355b72f09d219340725e23d6a0aca2eba 100644 --- a/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala +++ b/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala @@ -150,8 +150,6 @@ class Carp(val root: Configurable) extends QScript with MultiSampleQScript with macs2.name = Some(sampleId) macs2.outputdir = sampleDir + File.separator + "macs2" + File.separator + sampleId + File.separator add(macs2) - - addSummaryJobs() } } @@ -176,6 +174,8 @@ class Carp(val root: Configurable) extends QScript with MultiSampleQScript with logger.info("Starting CArP pipeline") addSamplesJobs() + + addSummaryJobs() } def addMultiSampleJobs(): Unit = { diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala new file mode 100644 index 0000000000000000000000000000000000000000..b34b3772296f9de419f7a249d45daefc513c7259 --- /dev/null +++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Cutadapt.scala @@ -0,0 +1,62 @@ +/** + * Biopet is built on top of GATK Queue for building bioinformatic + * pipelines. It is mainly intended to support LUMC SHARK cluster which is running + * SGE. But other types of HPC that are supported by GATK Queue (such as PBS) + * should also be able to execute Biopet tools and pipelines. + * + * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center + * + * Contact us at: sasc@lumc.nl + * + * A dual licensing mode is applied. The source code within this project that are + * not part of GATK Queue is freely available for non-commercial use under an AGPL + * license; For commercial users or users who do not want to follow the AGPL + * license, please contact us to obtain a separate license. + */ +package nl.lumc.sasc.biopet.pipelines.flexiprep + +import nl.lumc.sasc.biopet.utils.config.Configurable + +/** + * Cutadapt wrapper specific for Flexiprep. + * + * This wrapper overrides the summary part so that instead of only displaying the clipped adapters, the sequence names + * are also displayed. In Flexiprep the sequence will always have names since they are detected by FastQC from a list + * of known adapters / contaminants. + * + * @param root: Configurable object from which this wrapper is initialized. + * @param fastqc: Fastqc wrapper that contains adapter information. + */ +class Cutadapt(root: Configurable, fastqc: Fastqc) extends nl.lumc.sasc.biopet.extensions.Cutadapt(root) { + + /** Clipped adapter names from FastQC */ + protected def seqToName = fastqc.foundAdapters + .map(adapter => adapter.seq -> adapter.name).toMap + + override def summaryStats: Map[String, Any] = { + val initStats = super.summaryStats + // Map of adapter sequence and how many times it is found + val adapterCounts: Map[String, Any] = initStats.get(adaptersStatsName) match { + // "adapters" key found in statistics + case Some(m: Map[_, _]) => m.flatMap { + case (seq: String, count) => + seqToName.get(seq) match { + // adapter sequence is found by FastQC + case Some(n) => Some(n -> Map("sequence" -> seq, "count" -> count)) + // adapter sequence is clipped but not found by FastQC ~ should not happen since all clipped adapter + // sequences come from FastQC + case _ => + throw new IllegalStateException(s"Adapter '$seq' is clipped but not found by FastQC in '$fastq_input'.") + } + // FastQC found no adapters + case otherwise => + ; + logger.debug(s"No adapters found for summarizing in '$fastq_input'.") + None + } + // "adapters" key not found ~ something went wrong in our part + case _ => throw new RuntimeException(s"Required key 'adapters' not found in stats entry '$fastq_input'.") + } + initStats.updated(adaptersStatsName, adapterCounts) + } +} diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala index 1f1f2510cc600544c95d8db117bad3c3e888be01..7c2b9ace137c347e9f78d99543eb56ebf28ef5b4 100644 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala +++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/Flexiprep.scala @@ -195,6 +195,7 @@ class Flexiprep(val root: Configurable) extends QScript with SummaryQScript with override def beforeGraph(): Unit = { fqSync.beforeGraph() + commands = qcCmdR1.jobs ::: qcCmdR2.jobs ::: fqSync :: Nil super.beforeGraph() } diff --git a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/QcCommand.scala b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/QcCommand.scala index 4cb06039b07e2bbe33fa4396aebc98a022189952..22a9a4a526a0a8d0c640b9262baef99063f07288 100644 --- a/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/QcCommand.scala +++ b/public/flexiprep/src/main/scala/nl/lumc/sasc/biopet/pipelines/flexiprep/QcCommand.scala @@ -19,7 +19,7 @@ import java.io.File import nl.lumc.sasc.biopet.core.summary.{ SummaryQScript, Summarizable } import nl.lumc.sasc.biopet.core.{ BiopetFifoPipe, BiopetCommandLineFunction } -import nl.lumc.sasc.biopet.extensions.{ Cat, Gzip, Sickle, Cutadapt } +import nl.lumc.sasc.biopet.extensions.{ Cat, Gzip, Sickle } import nl.lumc.sasc.biopet.extensions.seqtk.SeqtkSeq import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Output, Input } @@ -50,7 +50,15 @@ class QcCommand(val root: Configurable, val fastqc: Fastqc) extends BiopetComman val seqtk = new SeqtkSeq(root) var clip: Option[Cutadapt] = None var trim: Option[Sickle] = None - var outputCommand: BiopetCommandLineFunction = null + lazy val outputCommand: BiopetCommandLineFunction = if (compress) { + val gzip = Gzip(root) + gzip.output = output + gzip + } else { + val cat = Cat(root) + cat.output = output + cat + } def jobs = (Some(seqtk) :: clip :: trim :: Some(outputCommand) :: Nil).flatten @@ -93,7 +101,7 @@ class QcCommand(val root: Configurable, val fastqc: Fastqc) extends BiopetComman clip = if (!flexiprep.skipClip) { val foundAdapters = fastqc.foundAdapters.map(_.seq) if (foundAdapters.nonEmpty) { - val cutadapt = new Cutadapt(root) + val cutadapt = new Cutadapt(root, fastqc) cutadapt.fastq_input = seqtk.output cutadapt.fastq_output = new File(output.getParentFile, input.getName + ".cutadapt.fq") cutadapt.stats_output = new File(flexiprep.outputDir, s"${flexiprep.sampleId.getOrElse("x")}-${flexiprep.libId.getOrElse("x")}.$read.clip.stats") @@ -123,16 +131,9 @@ class QcCommand(val root: Configurable, val fastqc: Fastqc) extends BiopetComman case _ => seqtk.output } - if (compress) outputCommand = { - val gzip = new Gzip(root) - gzip.output = output - outputFile :<: gzip - } - else outputCommand = { - val cat = new Cat(root) - cat.input = outputFile :: Nil - cat.output = output - cat + outputCommand match { + case gzip: Gzip => outputFile :<: gzip + case cat: Cat => cat.input = outputFile :: Nil } seqtk.beforeGraph() diff --git a/public/shiva/src/main/resources/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp b/public/shiva/src/main/resources/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp index 8bc095e84ac3d566d3b0f453ecb0eeccd93f553e..674910dabedc3fcb7d7759cd8952d93a68240f01 100644 --- a/public/shiva/src/main/resources/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp +++ b/public/shiva/src/main/resources/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp @@ -8,13 +8,20 @@ <%@ var outputDir: File %> <%@ var showPlot: Boolean = false %> <%@ var showTable: Boolean = true %> -<%@ var showIntro: Boolean = true%> +<%@ var showIntro: Boolean = true %> +<%@ var target: Option[String] = None %> +<%@ var caller: String = "final" %> + #{ val fields = List("Hom", "HomVar", "Het", "HomRef", "NoCall", "Variant", "Total") val samples = sampleId match { case Some(sample) => List(sample.toString) case _ => summary.samples.toList } + val vcfstatsKey = target match { + case Some(t) => s"multisample-vcfstats-$caller-$t" + case _ => s"multisample-vcfstats-$caller" + } }# #if (showIntro) @@ -70,7 +77,7 @@ #for (sample <- samples.toList.sorted) <tr><td><a href="${rootPath}Samples/${sample}/index.html">${sample}</a></td> #for (field <- fields) - <td>${summary.getSampleValue(sample, "shivavariantcalling", "stats", "multisample-vcfstats-final", "genotype", field)}</td> + <td>${summary.getSampleValue(sample, "shivavariantcalling", "stats", vcfstatsKey, "genotype", field)}</td> #end </tr> #end diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaReport.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaReport.scala index e09b34e74fc290034b2238a9a64fb49826ec029e..5c88189833b1b6bd169c3da475c6c2370957abf9 100644 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaReport.scala +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaReport.scala @@ -107,11 +107,14 @@ object ShivaReport extends MultisampleReportBuilder { } if (regionPages.nonEmpty) Some("Regions" -> ReportPage( - List(), - regionPages.map(p => p._1 -> ReportSection( - "/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsMultiTable.ssp", - Map("target" -> p._1.stripSuffix(" (Amplicon)")) + regionPages.map(p => p._1 -> ReportPage(Nil, + List( + "Variants" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/shiva/sampleVariants.ssp", Map("showPlot" -> true)), + "Coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsMultiTable.ssp") + ), + Map("target" -> Some(p._1.stripSuffix(" (Amplicon)"))) )).toList.sortBy(_._1), + List(), Map()) ) else None @@ -178,7 +181,9 @@ object ShivaReport extends MultisampleReportBuilder { prefix: String, summary: Summary, libraryLevel: Boolean = false, - sampleId: Option[String] = None): Unit = { + sampleId: Option[String] = None, + caller: String = "final", + target: Option[String] = None): Unit = { val tsvFile = new File(outputDir, prefix + ".tsv") val pngFile = new File(outputDir, prefix + ".png") val tsvWriter = new PrintWriter(tsvFile) @@ -186,14 +191,14 @@ object ShivaReport extends MultisampleReportBuilder { tsvWriter.println("\tHomVar\tHet\tHomRef\tNoCall") def getLine(summary: Summary, sample: String, lib: Option[String] = None): String = { - val homVar = new SummaryValue(List("shivavariantcalling", "stats", "multisample-vcfstats-final", "genotype", "HomVar"), - summary, Some(sample), lib).value.getOrElse(0).toString.toLong - val homRef = new SummaryValue(List("shivavariantcalling", "stats", "multisample-vcfstats-final", "genotype", "HomRef"), - summary, Some(sample), lib).value.getOrElse(0).toString.toLong - val noCall = new SummaryValue(List("shivavariantcalling", "stats", "multisample-vcfstats-final", "genotype", "NoCall"), - summary, Some(sample), lib).value.getOrElse(0).toString.toLong - val het = new SummaryValue(List("shivavariantcalling", "stats", "multisample-vcfstats-final", "genotype", "Het"), - summary, Some(sample), lib).value.getOrElse(0).toString.toLong + val path = target match { + case Some(t) => List("shivavariantcalling", "stats", s"multisample-vcfstats-$caller-$t", "genotype") + case _ => List("shivavariantcalling", "stats", s"multisample-vcfstats-$caller", "genotype") + } + val homVar = new SummaryValue(path :+ "HomVar", summary, Some(sample), lib).value.getOrElse(0).toString.toLong + val homRef = new SummaryValue(path :+ "HomRef", summary, Some(sample), lib).value.getOrElse(0).toString.toLong + val noCall = new SummaryValue(path :+ "NoCall", summary, Some(sample), lib).value.getOrElse(0).toString.toLong + val het = new SummaryValue(path :+ "Het", summary, Some(sample), lib).value.getOrElse(0).toString.toLong val sb = new StringBuffer() if (lib.isDefined) sb.append(sample + "-" + lib.get + "\t") else sb.append(sample + "\t") sb.append(homVar + "\t") diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala index 79a5219751a35691885b4f61365f4d71b0333105..a810b5257a94479e5e7125e6755722543457ec30 100644 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaSvCalling.scala @@ -15,18 +15,13 @@ */ package nl.lumc.sasc.biopet.pipelines.shiva -import htsjdk.samtools.SamReaderFactory import nl.lumc.sasc.biopet.core.summary.SummaryQScript import nl.lumc.sasc.biopet.core.{ PipelineCommand, Reference, SampleLibraryTag } -import nl.lumc.sasc.biopet.extensions.breakdancer.Breakdancer -import nl.lumc.sasc.biopet.extensions.clever.CleverCaller -import nl.lumc.sasc.biopet.extensions.delly.Delly -import nl.lumc.sasc.biopet.utils.Logging +import nl.lumc.sasc.biopet.pipelines.shiva.svcallers.{ Delly, Breakdancer, Clever, SvCaller } +import nl.lumc.sasc.biopet.utils.{ BamUtils, Logging } import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.queue.QScript -import scala.collection.JavaConversions._ - /** * Common trait for ShivaVariantcalling * @@ -40,25 +35,11 @@ class ShivaSvCalling(val root: Configurable) extends QScript with SummaryQScript @Input(doc = "Bam files (should be deduped bams)", shortName = "BAM", required = true) protected var inputBamsArg: List[File] = Nil - protected var inputBams: Map[String, File] = Map() - - def addBamFile(file: File, sampleId: Option[String] = None): Unit = { - sampleId match { - case Some(sample) => inputBams += sample -> file - case _ if !file.exists() => throw new IllegalArgumentException("Bam file does not exits: " + file) - case _ => { - val inputSam = SamReaderFactory.makeDefault.open(file) - val samples = inputSam.getFileHeader.getReadGroups.map(_.getSample).distinct - if (samples.size == 1) { - inputBams += samples.head -> file - } else throw new IllegalArgumentException("Bam contains multiple sample IDs: " + file) - } - } - } + var inputBams: Map[String, File] = Map() /** Executed before script */ def init(): Unit = { - inputBamsArg.foreach(addBamFile(_)) + if (inputBamsArg.nonEmpty) inputBams = BamUtils.sampleBamMap(inputBamsArg) } /** Variantcallers requested by the config */ @@ -76,68 +57,16 @@ class ShivaSvCalling(val root: Configurable) extends QScript with SummaryQScript require(inputBams.nonEmpty, "No input bams found") require(callers.nonEmpty, "must select at least 1 SV caller, choices are: " + callersList.map(_.name).mkString(", ")) - callers.foreach(_.addJobs()) + callers.foreach { caller => + caller.outputDir = new File(outputDir, caller.name) + add(caller) + } addSummaryJobs() } /** Will generate all available variantcallers */ - protected def callersList: List[SvCaller] = List(new Breakdancer, new Clever, new Delly) - - /** General trait for a variantcaller mode */ - trait SvCaller { - /** Name of mode, this should also be used in the config */ - val name: String - - /** Output dir for this mode */ - def outputDir = new File(qscript.outputDir, name) - - /** This should add the variantcaller jobs */ - def addJobs() - } - - /** default mode of freebayes */ - class Breakdancer extends SvCaller { - val name = "breakdancer" - - def addJobs() { - //TODO: move minipipeline of breakdancer to here - for ((sample, bamFile) <- inputBams) { - val breakdancerDir = new File(outputDir, sample) - val breakdancer = Breakdancer(qscript, bamFile, breakdancerDir) - addAll(breakdancer.functions) - } - } - } - - /** default mode of bcftools */ - class Clever extends SvCaller { - val name = "clever" - - def addJobs() { - //TODO: check double directories - for ((sample, bamFile) <- inputBams) { - val cleverDir = new File(outputDir, sample) - val clever = CleverCaller(qscript, bamFile, cleverDir) - add(clever) - } - } - } - - /** Makes a vcf file from a mpileup without statistics */ - class Delly extends SvCaller { - val name = "delly" - - def addJobs() { - //TODO: Move mini delly pipeline to here - for ((sample, bamFile) <- inputBams) { - val dellyDir = new File(outputDir, sample) - val delly = Delly(qscript, bamFile, dellyDir) - delly.outputName = sample - addAll(delly.functions) - } - } - } + protected def callersList: List[SvCaller] = List(new Breakdancer(this), new Clever(this), new Delly(this)) /** Location of summary file */ def summaryFile = new File(outputDir, "ShivaSvCalling.summary.json") diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala index 5c243df8660bfc0e3333e21dff782e9fc404c5ad..ca2706f710c23ae1625d2908bdc914a2a687afb2 100644 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaTrait.scala @@ -15,16 +15,15 @@ */ package nl.lumc.sasc.biopet.pipelines.shiva -import java.io.File - import htsjdk.samtools.SamReaderFactory import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, Reference } import nl.lumc.sasc.biopet.extensions.Ln import nl.lumc.sasc.biopet.extensions.picard.{ AddOrReplaceReadGroups, MarkDuplicates, SamToFastq } -import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics +import nl.lumc.sasc.biopet.pipelines.bammetrics.{ TargetRegions, BamMetrics } import nl.lumc.sasc.biopet.pipelines.mapping.Mapping import nl.lumc.sasc.biopet.pipelines.toucan.Toucan import nl.lumc.sasc.biopet.utils.Logging +import org.broadinstitute.gatk.queue.QScript import scala.collection.JavaConversions._ @@ -33,8 +32,7 @@ import scala.collection.JavaConversions._ * * Created by pjvan_thof on 2/26/15. */ -trait ShivaTrait extends MultiSampleQScript with Reference { - qscript => +trait ShivaTrait extends MultiSampleQScript with Reference with TargetRegions { qscript: QScript => /** Executed before running the script */ def init(): Unit = { @@ -233,8 +231,8 @@ trait ShivaTrait extends MultiSampleQScript with Reference { vc.sampleId = Some(sampleId) vc.libId = Some(libId) vc.outputDir = new File(libDir, "variantcalling") - if (preProcessBam.isDefined) vc.inputBams = preProcessBam.get :: Nil - else vc.inputBams = bamFile.get :: Nil + if (preProcessBam.isDefined) vc.inputBams = Map(sampleId -> preProcessBam.get) + else vc.inputBams = Map(sampleId -> bamFile.get) vc.init() vc.biopetScript() addAll(vc.functions) @@ -299,7 +297,7 @@ trait ShivaTrait extends MultiSampleQScript with Reference { variantcalling.foreach(vc => { vc.sampleId = Some(sampleId) vc.outputDir = new File(sampleDir, "variantcalling") - vc.inputBams = bam :: Nil + vc.inputBams = Map(sampleId -> bam) vc.init() vc.biopetScript() addAll(vc.functions) @@ -326,7 +324,7 @@ trait ShivaTrait extends MultiSampleQScript with Reference { def addMultiSampleJobs(): Unit = { multisampleVariantCalling.foreach(vc => { vc.outputDir = new File(outputDir, "variantcalling") - vc.inputBams = samples.flatMap(_._2.preProcessBam).toList + vc.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) } vc.init() vc.biopetScript() addAll(vc.functions) @@ -344,7 +342,7 @@ trait ShivaTrait extends MultiSampleQScript with Reference { svCalling.foreach(sv => { sv.outputDir = new File(outputDir, "sv_calling") - samples.foreach(x => x._2.preProcessBam.foreach(bam => sv.addBamFile(bam, Some(x._1)))) + sv.inputBams = samples.flatMap { case (sampleId, sample) => sample.preProcessBam.map(sampleId -> _) } sv.init() sv.biopetScript() addAll(sv.functions) @@ -356,19 +354,14 @@ trait ShivaTrait extends MultiSampleQScript with Reference { def summaryFile = new File(outputDir, "Shiva.summary.json") /** Settings of pipeline for summary */ - def summarySettings = { - val roiBedFiles: List[File] = config("regions_of_interest", Nil) - val ampliconBedFile: Option[File] = config("amplicon_bed") - - Map( - "reference" -> referenceSummary, - "regions_of_interest" -> roiBedFiles.map(_.getName.stripSuffix(".bed")), - "amplicon_bed" -> ampliconBedFile.map(_.getName.stripSuffix(".bed")), - "annotation" -> annotation.isDefined, - "multisample_variantcalling" -> multisampleVariantCalling.isDefined, - "sv_calling" -> svCalling.isDefined - ) - } + def summarySettings = Map( + "reference" -> referenceSummary, + "annotation" -> annotation.isDefined, + "multisample_variantcalling" -> multisampleVariantCalling.isDefined, + "sv_calling" -> svCalling.isDefined, + "regions_of_interest" -> roiBedFiles.map(_.getName.stripSuffix(".bed")), + "amplicon_bed" -> ampliconBedFile.map(_.getName.stripSuffix(".bed")) + ) /** Files for the summary */ def summaryFiles = Map("referenceFasta" -> referenceFasta()) diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTrait.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTrait.scala index 6eb2832c17acea6bcdc0d6766d48d3100dc73472..31ba0a53f0fc034b406cca237656722ca9de9d6f 100644 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTrait.scala +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTrait.scala @@ -15,28 +15,37 @@ */ package nl.lumc.sasc.biopet.pipelines.shiva -import java.io.File - import nl.lumc.sasc.biopet.core.summary.SummaryQScript import nl.lumc.sasc.biopet.core.{ Reference, SampleLibraryTag } -import nl.lumc.sasc.biopet.extensions.bcftools.{ BcftoolsCall, BcftoolsMerge } -import nl.lumc.sasc.biopet.extensions.gatk.{ GenotypeConcordance, CombineVariants } -import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsMpileup -import nl.lumc.sasc.biopet.extensions.tools.{ MpileupToVcf, VcfFilter, VcfStats } -import nl.lumc.sasc.biopet.extensions.{ Ln, Bgzip, Tabix } -import nl.lumc.sasc.biopet.utils.Logging -import org.broadinstitute.gatk.utils.commandline.Input +import nl.lumc.sasc.biopet.extensions.Tabix +import nl.lumc.sasc.biopet.extensions.gatk.{ CombineVariants, GenotypeConcordance } +import nl.lumc.sasc.biopet.extensions.tools.VcfStats +import nl.lumc.sasc.biopet.extensions.vt.{ VtDecompose, VtNormalize } +import nl.lumc.sasc.biopet.pipelines.bammetrics.TargetRegions +import nl.lumc.sasc.biopet.pipelines.shiva.variantcallers._ +import nl.lumc.sasc.biopet.utils.{ BamUtils, Logging } +import org.broadinstitute.gatk.queue.QScript /** * Common trait for ShivaVariantcalling * * Created by pjvan_thof on 2/26/15. */ -trait ShivaVariantcallingTrait extends SummaryQScript with SampleLibraryTag with Reference { - qscript => +trait ShivaVariantcallingTrait extends SummaryQScript + with SampleLibraryTag + with Reference + with TargetRegions { + qscript: QScript => @Input(doc = "Bam files (should be deduped bams)", shortName = "BAM", required = true) - var inputBams: List[File] = Nil + protected var inputBamsArg: List[File] = Nil + + var inputBams: Map[String, File] = Map() + + /** Executed before script */ + def init(): Unit = { + if (inputBamsArg.nonEmpty) inputBams = BamUtils.sampleBamMap(inputBamsArg) + } var referenceVcf: Option[File] = config("reference_vcf") @@ -53,25 +62,22 @@ trait ShivaVariantcallingTrait extends SummaryQScript with SampleLibraryTag with override def defaults = Map("bcftoolscall" -> Map("f" -> List("GQ"))) - /** Executed before script */ - def init(): Unit = { - } - /** Final merged output files of all variantcaller modes */ def finalFile = new File(outputDir, namePrefix + ".final.vcf.gz") /** Variantcallers requested by the config */ protected val configCallers: Set[String] = config("variantcallers") + protected val callers: List[Variantcaller] = { + (for (name <- configCallers) yield { + if (!callersList.exists(_.name == name)) + Logging.addError(s"variantcaller '$name' does not exist, possible to use: " + callersList.map(_.name).mkString(", ")) + callersList.find(_.name == name) + }).flatten.toList.sortBy(_.prio) + } + /** This will add jobs for this pipeline */ def biopetScript(): Unit = { - for (cal <- configCallers) { - if (!callersList.exists(_.name == cal)) - Logging.addError("variantcaller '" + cal + "' does not exist, possible to use: " + callersList.map(_.name).mkString(", ")) - } - - val callers = callersList.filter(x => configCallers.contains(x.name)).sortBy(_.prio) - require(inputBams.nonEmpty, "No input bams found") require(callers.nonEmpty, "must select at least 1 variantcaller, choices are: " + callersList.map(_.name).mkString(", ")) @@ -81,215 +87,89 @@ trait ShivaVariantcallingTrait extends SummaryQScript with SampleLibraryTag with cv.genotypeMergeOptions = Some("PRIORITIZE") cv.rodPriorityList = callers.map(_.name).mkString(",") for (caller <- callers) { - caller.addJobs() - cv.addInput(caller.outputFile, caller.name) - - val vcfStats = new VcfStats(qscript) - vcfStats.input = caller.outputFile - vcfStats.setOutputDir(new File(caller.outputDir, "vcfstats")) - add(vcfStats) - addSummarizable(vcfStats, namePrefix + "-vcfstats-" + caller.name) - - referenceVcf.foreach(referenceVcfFile => { - val gc = new GenotypeConcordance(this) - gc.evalFile = caller.outputFile - gc.compFile = referenceVcfFile - gc.outputFile = new File(caller.outputDir, s"$namePrefix-genotype_concordance.${caller.name}.txt") - referenceVcfRegions.foreach(gc.intervals ::= _) - add(gc) - addSummarizable(gc, s"$namePrefix-genotype_concordance-${caller.name}") - }) + caller.inputBams = inputBams + caller.namePrefix = namePrefix + caller.outputDir = new File(outputDir, caller.name) + add(caller) + addStats(caller.outputFile, caller.name) + val normalize: Boolean = config("execute_vt_normalize", default = false, submodule = caller.configName) + val decompose: Boolean = config("execute_vt_decompose", default = false, submodule = caller.configName) + + val vtNormalize = new VtNormalize(this) + vtNormalize.inputVcf = caller.outputFile + val vtDecompose = new VtDecompose(this) + + if (normalize && decompose) { + vtNormalize.outputVcf = swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".normalized.vcf.gz") + vtNormalize.isIntermediate = true + add(vtNormalize, Tabix(this, vtNormalize.outputVcf)) + vtDecompose.inputVcf = vtNormalize.outputVcf + vtDecompose.outputVcf = swapExt(caller.outputDir, vtNormalize.outputVcf, ".vcf.gz", ".decompose.vcf.gz") + add(vtDecompose, Tabix(this, vtDecompose.outputVcf)) + cv.addInput(vtDecompose.outputVcf, caller.name) + } else if (normalize && !decompose) { + vtNormalize.outputVcf = swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".normalized.vcf.gz") + add(vtNormalize, Tabix(this, vtNormalize.outputVcf)) + cv.addInput(vtNormalize.outputVcf, caller.name) + } else if (!normalize && decompose) { + vtDecompose.inputVcf = caller.outputFile + vtDecompose.outputVcf = swapExt(caller.outputDir, caller.outputFile, ".vcf.gz", ".decompose.vcf.gz") + add(vtDecompose, Tabix(this, vtDecompose.outputVcf)) + cv.addInput(vtDecompose.outputVcf, caller.name) + } else cv.addInput(caller.outputFile, caller.name) } add(cv) + addStats(finalFile, "final") + + addSummaryJobs() + } + + protected def addStats(vcfFile: File, name: String): Unit = { val vcfStats = new VcfStats(qscript) - vcfStats.input = finalFile - vcfStats.setOutputDir(new File(outputDir, "vcfstats")) - vcfStats.infoTags :+= cv.setKey + vcfStats.input = vcfFile + vcfStats.setOutputDir(new File(vcfFile.getParentFile, "vcfstats")) + if (name == "final") vcfStats.infoTags :+= "VariantCaller" add(vcfStats) - addSummarizable(vcfStats, namePrefix + "-vcfstats-final") + addSummarizable(vcfStats, s"$namePrefix-vcfstats-$name") referenceVcf.foreach(referenceVcfFile => { val gc = new GenotypeConcordance(this) - gc.evalFile = finalFile + gc.evalFile = vcfFile gc.compFile = referenceVcfFile - gc.outputFile = new File(outputDir, s"$namePrefix-genotype_concordance.final.txt") + gc.outputFile = new File(vcfFile.getParentFile, s"$namePrefix-genotype_concordance.$name.txt") referenceVcfRegions.foreach(gc.intervals ::= _) add(gc) - addSummarizable(gc, s"$namePrefix-genotype_concordance-final") + addSummarizable(gc, s"$namePrefix-genotype_concordance-$name") }) - addSummaryJobs() - } - - /** Will generate all available variantcallers */ - protected def callersList: List[Variantcaller] = List(new Freebayes, new RawVcf, new Bcftools, new BcftoolsSingleSample) - - /** General trait for a variantcaller mode */ - trait Variantcaller { - /** Name of mode, this should also be used in the config */ - val name: String - - /** Output dir for this mode */ - def outputDir = new File(qscript.outputDir, name) - - /** Prio in merging in the final file */ - protected val defaultPrio: Int - - /** Prio from the config */ - lazy val prio: Int = config("prio_" + name, default = defaultPrio) - - /** This should add the variantcaller jobs */ - def addJobs() - - /** Final output file of this mode */ - def outputFile: File - } - - /** default mode of freebayes */ - class Freebayes extends Variantcaller { - val name = "freebayes" - protected val defaultPrio = 7 - - /** Final output file of this mode */ - def outputFile = new File(outputDir, namePrefix + ".freebayes.vcf.gz") - - def addJobs() { - val fb = new nl.lumc.sasc.biopet.extensions.Freebayes(qscript) - fb.bamfiles = inputBams - fb.outputVcf = new File(outputDir, namePrefix + ".freebayes.vcf") - fb.isIntermediate = true - add(fb) - - //TODO: need piping for this, see also issue #114 - val bz = new Bgzip(qscript) - bz.input = List(fb.outputVcf) - bz.output = outputFile - add(bz) - - val ti = new Tabix(qscript) - ti.input = bz.output - ti.p = Some("vcf") - add(ti) - } - } - - /** default mode of bcftools */ - class Bcftools extends Variantcaller { - val name = "bcftools" - protected val defaultPrio = 8 - - /** Final output file of this mode */ - def outputFile = new File(outputDir, namePrefix + ".bcftools.vcf.gz") - - def addJobs() { - val mp = new SamtoolsMpileup(qscript) - mp.input = inputBams - mp.u = true - mp.reference = referenceFasta() - - val bt = new BcftoolsCall(qscript) - bt.O = Some("z") - bt.v = true - bt.c = true - - add(mp | bt > outputFile) - add(Tabix(qscript, outputFile)) - } - } - - /** default mode of bcftools */ - class BcftoolsSingleSample extends Variantcaller { - val name = "bcftools_singlesample" - protected val defaultPrio = 8 - - /** Final output file of this mode */ - def outputFile = new File(outputDir, namePrefix + ".bcftools_singlesample.vcf.gz") - - def addJobs() { - val sampleVcfs = for (inputBam <- inputBams) yield { - val mp = new SamtoolsMpileup(qscript) - mp.input :+= inputBam - mp.u = true - mp.reference = referenceFasta() - - val bt = new BcftoolsCall(qscript) - bt.O = Some("z") - bt.v = true - bt.c = true - bt.output = new File(outputDir, inputBam.getName + ".vcf.gz") - - add(mp | bt) - add(Tabix(qscript, bt.output)) - bt.output - } - - if (sampleVcfs.size > 1) { - val bcfmerge = new BcftoolsMerge(qscript) - bcfmerge.input = sampleVcfs - bcfmerge.output = outputFile - bcfmerge.O = Some("z") - add(bcfmerge) - } else add(Ln.apply(qscript, sampleVcfs.head, outputFile)) - add(Tabix(qscript, outputFile)) + for (bedFile <- ampliconBedFile.toList ::: roiBedFiles) { + val regionName = bedFile.getName.stripSuffix(".bed") + val vcfStats = new VcfStats(qscript) + vcfStats.input = vcfFile + vcfStats.intervals = Some(bedFile) + vcfStats.setOutputDir(new File(vcfFile.getParentFile, s"vcfstats-$regionName")) + if (name == "final") vcfStats.infoTags :+= "VariantCaller" + add(vcfStats) + addSummarizable(vcfStats, s"$namePrefix-vcfstats-$name-$regionName") } } - /** Makes a vcf file from a mpileup without statistics */ - class RawVcf extends Variantcaller { - val name = "raw" - - // This caller is designed as fallback when other variantcallers fails to report - protected val defaultPrio = Int.MaxValue - - /** Final output file of this mode */ - def outputFile = new File(outputDir, namePrefix + ".raw.vcf.gz") - - def addJobs() { - val rawFiles = inputBams.map(bamFile => { - val mp = new SamtoolsMpileup(qscript) { - override def configName = "samtoolsmpileup" - override def defaults = Map("samtoolsmpileup" -> Map("disable_baq" -> true, "min_map_quality" -> 1)) - } - mp.input :+= bamFile - - val m2v = new MpileupToVcf(qscript) - m2v.inputBam = bamFile - m2v.output = new File(outputDir, bamFile.getName.stripSuffix(".bam") + ".raw.vcf") - add(mp | m2v) - - val vcfFilter = new VcfFilter(qscript) { - override def configName = "vcffilter" - override def defaults = Map("min_sample_depth" -> 8, - "min_alternate_depth" -> 2, - "min_samples_pass" -> 1, - "filter_ref_calls" -> true - ) - } - vcfFilter.inputVcf = m2v.output - vcfFilter.outputVcf = new File(outputDir, bamFile.getName.stripSuffix(".bam") + ".raw.filter.vcf.gz") - add(vcfFilter) - vcfFilter.outputVcf - }) - - val cv = new CombineVariants(qscript) - cv.inputFiles = rawFiles - cv.outputFile = outputFile - cv.setKey = "null" - cv.excludeNonVariants = true - add(cv) - } - } + /** Will generate all available variantcallers */ + protected def callersList: List[Variantcaller] = List(new Freebayes(this), new RawVcf(this), new Bcftools(this), new BcftoolsSingleSample(this)) /** Location of summary file */ def summaryFile = new File(outputDir, "ShivaVariantcalling.summary.json") /** Settings for the summary */ - def summarySettings = Map("variantcallers" -> configCallers.toList) + def summarySettings = Map( + "variantcallers" -> configCallers.toList, + "regions_of_interest" -> roiBedFiles.map(_.getName.stripSuffix(".bed")), + "amplicon_bed" -> ampliconBedFile.map(_.getName.stripSuffix(".bed")) + ) /** Files for the summary */ def summaryFiles: Map[String, File] = { - val callers: Set[String] = config("variantcallers") - callersList.filter(x => callers.contains(x.name)).map(x => x.name -> x.outputFile).toMap + ("final" -> finalFile) + callers.map(x => x.name -> x.outputFile).toMap + ("final" -> finalFile) } } \ No newline at end of file diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Breakdancer.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Breakdancer.scala new file mode 100644 index 0000000000000000000000000000000000000000..d7ff2ac12e78f7e149a3beea14eaea766d385079 --- /dev/null +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Breakdancer.scala @@ -0,0 +1,23 @@ +package nl.lumc.sasc.biopet.pipelines.shiva.svcallers + +import nl.lumc.sasc.biopet.extensions.breakdancer.{ BreakdancerVCF, BreakdancerCaller, BreakdancerConfig } +import nl.lumc.sasc.biopet.utils.config.Configurable + +/** Script for sv caler Breakdancer */ +class Breakdancer(val root: Configurable) extends SvCaller { + def name = "breakdancer" + + def biopetScript() { + for ((sample, bamFile) <- inputBams) { + val breakdancerSampleDir = new File(outputDir, sample) + + // read config and set all parameters for the pipeline + logger.debug("Starting Breakdancer configuration") + + val bdcfg = BreakdancerConfig(this, bamFile, new File(breakdancerSampleDir, sample + ".breakdancer.cfg")) + val breakdancer = BreakdancerCaller(this, bdcfg.output, new File(breakdancerSampleDir, sample + ".breakdancer.tsv")) + val bdvcf = BreakdancerVCF(this, breakdancer.output, new File(breakdancerSampleDir, sample + ".breakdancer.vcf")) + add(bdcfg, breakdancer, bdvcf) + } + } +} diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Clever.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Clever.scala new file mode 100644 index 0000000000000000000000000000000000000000..ff98b32e5f9fe23f6f9e8b52334742b29973da5c --- /dev/null +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Clever.scala @@ -0,0 +1,18 @@ +package nl.lumc.sasc.biopet.pipelines.shiva.svcallers + +import nl.lumc.sasc.biopet.extensions.clever.CleverCaller +import nl.lumc.sasc.biopet.utils.config.Configurable + +/** Script for sv caler Clever */ +class Clever(val root: Configurable) extends SvCaller { + def name = "clever" + + def biopetScript() { + //TODO: check double directories + for ((sample, bamFile) <- inputBams) { + val cleverDir = new File(outputDir, sample) + val clever = CleverCaller(this, bamFile, cleverDir) + add(clever) + } + } +} diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Delly.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Delly.scala new file mode 100644 index 0000000000000000000000000000000000000000..8197f8cc7ea4ff6c72b37e913b8030af17e51151 --- /dev/null +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/Delly.scala @@ -0,0 +1,60 @@ +package nl.lumc.sasc.biopet.pipelines.shiva.svcallers + +import nl.lumc.sasc.biopet.extensions.delly.DellyCaller +import nl.lumc.sasc.biopet.extensions.gatk.CatVariants +import nl.lumc.sasc.biopet.utils.config.Configurable + +/** Script for sv caler delly */ +class Delly(val root: Configurable) extends SvCaller { + def name = "delly" + + val del: Boolean = config("DEL", default = true) + val dup: Boolean = config("DUP", default = true) + val inv: Boolean = config("INV", default = true) + val tra: Boolean = config("TRA", default = true) + + def biopetScript() { + for ((sample, bamFile) <- inputBams) { + val dellyDir = new File(outputDir, sample) + + val catVariants = new CatVariants(this) + catVariants.outputFile = new File(dellyDir, sample + ".delly.vcf.gz") + + /// start delly and then copy the vcf into the root directory "<sample>.delly/" + if (del) { + val delly = new DellyCaller(this) + delly.input = bamFile + delly.analysistype = "DEL" + delly.outputvcf = new File(dellyDir, sample + ".delly.del.vcf") + add(delly) + catVariants.inputFiles :+= delly.outputvcf + } + if (dup) { + val delly = new DellyCaller(this) + delly.input = bamFile + delly.analysistype = "DUP" + delly.outputvcf = new File(dellyDir, sample + ".delly.dup.vcf") + add(delly) + catVariants.inputFiles :+= delly.outputvcf + } + if (inv) { + val delly = new DellyCaller(this) + delly.input = bamFile + delly.analysistype = "INV" + delly.outputvcf = new File(dellyDir, sample + ".delly.inv.vcf") + add(delly) + catVariants.inputFiles :+= delly.outputvcf + } + if (tra) { + val delly = new DellyCaller(this) + delly.input = bamFile + delly.analysistype = "TRA" + delly.outputvcf = new File(dellyDir, sample + ".delly.tra.vcf") + catVariants.inputFiles :+= delly.outputvcf + add(delly) + } + + add(catVariants) + } + } +} diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/SvCaller.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/SvCaller.scala new file mode 100644 index 0000000000000000000000000000000000000000..1c63aa7b86954ea2d1fba84473b0cc67eb9a76d0 --- /dev/null +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/svcallers/SvCaller.scala @@ -0,0 +1,19 @@ +package nl.lumc.sasc.biopet.pipelines.shiva.svcallers + +import nl.lumc.sasc.biopet.core.{ Reference, BiopetQScript } +import org.broadinstitute.gatk.queue.QScript + +/** + * Created by pjvanthof on 23/11/15. + */ +trait SvCaller extends QScript with BiopetQScript with Reference { + + /** Name of mode, this should also be used in the config */ + def name: String + + var namePrefix: String = _ + + var inputBams: Map[String, File] = _ + + def init() = {} +} diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Bcftools.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Bcftools.scala new file mode 100644 index 0000000000000000000000000000000000000000..fc88e6c212eb8fc4f6dbfb5fbaced0b1a666b195 --- /dev/null +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Bcftools.scala @@ -0,0 +1,27 @@ +package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers + +import nl.lumc.sasc.biopet.extensions.Tabix +import nl.lumc.sasc.biopet.extensions.bcftools.BcftoolsCall +import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsMpileup +import nl.lumc.sasc.biopet.utils.config.Configurable + +/** default mode of bcftools */ +class Bcftools(val root: Configurable) extends Variantcaller { + val name = "bcftools" + protected def defaultPrio = 8 + + def biopetScript { + val mp = new SamtoolsMpileup(this) + mp.input = inputBams.values.toList + mp.u = true + mp.reference = referenceFasta() + + val bt = new BcftoolsCall(this) + bt.O = Some("z") + bt.v = true + bt.c = true + + add(mp | bt > outputFile) + add(Tabix(this, outputFile)) + } +} diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/BcftoolsSingleSample.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/BcftoolsSingleSample.scala new file mode 100644 index 0000000000000000000000000000000000000000..80f0980e3d31cd658c30f93693fa844f0218ef6b --- /dev/null +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/BcftoolsSingleSample.scala @@ -0,0 +1,42 @@ +package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers + +import java.io.File + +import nl.lumc.sasc.biopet.extensions.{ Ln, Tabix } +import nl.lumc.sasc.biopet.extensions.bcftools.{ BcftoolsMerge, BcftoolsCall } +import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsMpileup +import nl.lumc.sasc.biopet.utils.config.Configurable + +/** default mode of bcftools */ +class BcftoolsSingleSample(val root: Configurable) extends Variantcaller { + val name = "bcftools_singlesample" + protected def defaultPrio = 8 + + def biopetScript { + val sampleVcfs = for ((sample, inputBam) <- inputBams.toList) yield { + val mp = new SamtoolsMpileup(this) + mp.input :+= inputBam + mp.u = true + mp.reference = referenceFasta() + + val bt = new BcftoolsCall(this) + bt.O = Some("z") + bt.v = true + bt.c = true + bt.output = new File(outputDir, sample + ".vcf.gz") + + add(mp | bt) + add(Tabix(this, bt.output)) + bt.output + } + + if (sampleVcfs.size > 1) { + val bcfmerge = new BcftoolsMerge(this) + bcfmerge.input = sampleVcfs + bcfmerge.output = outputFile + bcfmerge.O = Some("z") + add(bcfmerge) + } else add(Ln.apply(this, sampleVcfs.head, outputFile)) + add(Tabix(this, outputFile)) + } +} diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Freebayes.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Freebayes.scala new file mode 100644 index 0000000000000000000000000000000000000000..3227c2dc2fc845fdeb18f3915ae4c692369fdcca --- /dev/null +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Freebayes.scala @@ -0,0 +1,28 @@ +package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers + +import java.io.File + +import nl.lumc.sasc.biopet.extensions.{ Tabix, Bgzip } +import nl.lumc.sasc.biopet.utils.config.Configurable + +/** default mode of freebayes */ +class Freebayes(val root: Configurable) extends Variantcaller { + val name = "freebayes" + protected def defaultPrio = 7 + + def biopetScript { + val fb = new nl.lumc.sasc.biopet.extensions.Freebayes(this) + fb.bamfiles = inputBams.values.toList + fb.outputVcf = new File(outputDir, namePrefix + ".freebayes.vcf") + fb.isIntermediate = true + add(fb) + + //TODO: need piping for this, see also issue #114 + val bz = new Bgzip(this) + bz.input = List(fb.outputVcf) + bz.output = outputFile + add(bz) + + add(Tabix.apply(this, bz.output)) + } +} diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/RawVcf.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/RawVcf.scala new file mode 100644 index 0000000000000000000000000000000000000000..3999ba64b0c8ec36eca933127ca60d8e7da8d201 --- /dev/null +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/RawVcf.scala @@ -0,0 +1,52 @@ +package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers + +import java.io.File + +import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants +import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsMpileup +import nl.lumc.sasc.biopet.extensions.tools.{ VcfFilter, MpileupToVcf } +import nl.lumc.sasc.biopet.utils.config.Configurable + +/** Makes a vcf file from a mpileup without statistics */ +class RawVcf(val root: Configurable) extends Variantcaller { + val name = "raw" + + // This caller is designed as fallback when other variantcallers fails to report + protected def defaultPrio = Int.MaxValue + + def biopetScript { + val rawFiles = inputBams.map { + case (sample, bamFile) => + val mp = new SamtoolsMpileup(this) { + override def configName = "samtoolsmpileup" + override def defaults = Map("samtoolsmpileup" -> Map("disable_baq" -> true, "min_map_quality" -> 1)) + } + mp.input :+= bamFile + + val m2v = new MpileupToVcf(this) + m2v.inputBam = bamFile + m2v.output = new File(outputDir, sample + ".raw.vcf") + add(mp | m2v) + + val vcfFilter = new VcfFilter(this) { + override def configName = "vcffilter" + override def defaults = Map("min_sample_depth" -> 8, + "min_alternate_depth" -> 2, + "min_samples_pass" -> 1, + "filter_ref_calls" -> true + ) + } + vcfFilter.inputVcf = m2v.output + vcfFilter.outputVcf = new File(outputDir, bamFile.getName.stripSuffix(".bam") + ".raw.filter.vcf.gz") + add(vcfFilter) + vcfFilter.outputVcf + } + + val cv = new CombineVariants(this) + cv.inputFiles = rawFiles.toList + cv.outputFile = outputFile + cv.setKey = "null" + cv.excludeNonVariants = true + add(cv) + } +} diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Variantcaller.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Variantcaller.scala new file mode 100644 index 0000000000000000000000000000000000000000..eb0a9ffccd11e6bb9d3a4d92af18ba381ce009eb --- /dev/null +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/Variantcaller.scala @@ -0,0 +1,29 @@ +package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers + +import nl.lumc.sasc.biopet.core.{ BiopetQScript, Reference } +import org.broadinstitute.gatk.queue.QScript + +/** + * Created by pjvan_thof on 11/19/15. + */ +trait Variantcaller extends QScript with BiopetQScript with Reference { + + /** Name of mode, this should also be used in the config */ + def name: String + + var namePrefix: String = _ + + var inputBams: Map[String, File] = _ + + def init() = {} + + /** Prio in merging in the final file */ + protected def defaultPrio: Int + + /** Prio from the config */ + lazy val prio: Int = config("prio_" + name, default = defaultPrio) + + /** Final output file of this mode */ + def outputFile: File = new File(outputDir, namePrefix + s".$name.vcf.gz") +} + diff --git a/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala b/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala index 54f5a455364dcd09878667d3a52ca139222190b6..94edda97a3e893ab8bba6d4016f003c2b0242b57 100644 --- a/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala +++ b/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala @@ -21,7 +21,7 @@ import com.google.common.io.Files import nl.lumc.sasc.biopet.utils.config.Config import nl.lumc.sasc.biopet.extensions.Freebayes import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants -import nl.lumc.sasc.biopet.extensions.tools.{ MpileupToVcf, VcfFilter } +import nl.lumc.sasc.biopet.extensions.tools.VcfFilter import nl.lumc.sasc.biopet.utils.ConfigUtils import org.apache.commons.io.FileUtils import org.broadinstitute.gatk.queue.QSettings @@ -72,7 +72,7 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers { val map = Map("variantcallers" -> callers.toList) val pipeline = initPipeline(map) - pipeline.inputBams = (for (n <- 1 to bams) yield ShivaVariantcallingTest.inputTouch("bam_" + n + ".bam")).toList + pipeline.inputBams = (for (n <- 1 to bams) yield n.toString -> ShivaVariantcallingTest.inputTouch("bam_" + n + ".bam")).toMap val illegalArgumentException = pipeline.inputBams.isEmpty || (!raw && !bcftools && !bcftools_singlesample && !freebayes) diff --git a/public/yamsvp/src_old/main/scala/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.scala b/public/yamsvp/src_old/main/scala/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.scala index a0ade5706206c60d9a1a470002c783d0b98a3590..59489d2caa89ac74af08b53c46b781f3e074760d 100644 --- a/public/yamsvp/src_old/main/scala/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.scala +++ b/public/yamsvp/src_old/main/scala/nl/lumc/sasc/biopet/pipelines/yamsvp/Yamsvp.scala @@ -21,15 +21,15 @@ package nl.lumc.sasc.biopet.pipelines.yamsvp import java.io.File -import nl.lumc.sasc.biopet.utils.config.Configurable -import nl.lumc.sasc.biopet.core.{ MultiSampleQScript, PipelineCommand } +import nl.lumc.sasc.biopet.core.{MultiSampleQScript, PipelineCommand} import nl.lumc.sasc.biopet.extensions.Ln import nl.lumc.sasc.biopet.extensions.breakdancer.Breakdancer import nl.lumc.sasc.biopet.extensions.clever.CleverCaller import nl.lumc.sasc.biopet.extensions.igvtools.IGVToolsCount -import nl.lumc.sasc.biopet.extensions.sambamba.{ SambambaMarkdup, SambambaMerge } +import nl.lumc.sasc.biopet.extensions.sambamba.{SambambaMarkdup, SambambaMerge} +import nl.lumc.sasc.biopet.pipelines.shiva.Delly +import nl.lumc.sasc.biopet.utils.config.Configurable //import nl.lumc.sasc.biopet.extensions.pindel.Pindel -import nl.lumc.sasc.biopet.extensions.delly.Delly import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics import nl.lumc.sasc.biopet.pipelines.mapping.Mapping import org.broadinstitute.gatk.queue.QScript