diff --git a/public/biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/varscan/fix_mpileup.py b/public/biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/varscan/fix_mpileup.py new file mode 100644 index 0000000000000000000000000000000000000000..3cbaf06412488bece8e795ef097c2582130920a3 --- /dev/null +++ b/public/biopet-extensions/src/main/resources/nl/lumc/sasc/biopet/extensions/varscan/fix_mpileup.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python +# +# Biopet is built on top of GATK Queue for building bioinformatic +# pipelines. It is mainly intended to support LUMC SHARK cluster which is running +# SGE. But other types of HPC that are supported by GATK Queue (such as PBS) +# should also be able to execute Biopet tools and pipelines. +# +# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center +# +# Contact us at: sasc@lumc.nl +# +# A dual licensing mode is applied. The source code within this project that are +# not part of GATK Queue is freely available for non-commercial use under an AGPL +# license; For commercial users or users who do not want to follow the AGPL +# license, please contact us to obtain a separate license. +# + + +from __future__ import print_function + +__author__="Wai Yi Leung" + +import sys + +if __name__ == "__main__": + """ + Fix the mpileupformat for RNA mpileup + Solution offered by Irina Pulyakhina (LUMC-HG) + http://www.biostars.org/p/78542/ + """ + for line in sys.stdin: + l = line.strip().split("\t") + if l[3] == "0": + # no alignment to this position + print("\t".join(map(str, l))) + continue + + fix_col = l[4].replace('<', '').replace('>', '') + + new_size = len(fix_col) + old_size = len(l[4]) + if new_size != old_size: + l[4] = fix_col + l[3] = "{}".format(new_size) + + if new_size == 0: + l[5] = "" + + print("\t".join(map(str, l))) diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Bgzip.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Bgzip.scala index 321cb8b9c5960936da9d8f5bcac0d2fdb9937627..a43fc4734852642b92e7924e2209073616197f6c 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Bgzip.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/Bgzip.scala @@ -36,5 +36,5 @@ class Bgzip(val root: Configurable) extends BiopetCommandLineFunction { def cmdLine = required(executable) + conditional(f, "-f") + " -c " + repeat(input) + - " > " + required(output) + (if (outputAsStsout) "" else " > " + required(output)) } diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/FixMpileup.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/FixMpileup.scala new file mode 100644 index 0000000000000000000000000000000000000000..827da73e3779003746a8ec16044c7726faa03337 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/FixMpileup.scala @@ -0,0 +1,12 @@ +package nl.lumc.sasc.biopet.extensions.varscan + +import nl.lumc.sasc.biopet.core.extensions.PythonCommandLineFunction +import nl.lumc.sasc.biopet.utils.config.Configurable + +/** + * Created by sajvanderzeeuw on 19-1-16. + */ +class FixMpileup(val root: Configurable) extends PythonCommandLineFunction { + setPythonScript("fix_mpileup.py", "/nl/lumc/sasc/biopet/extensions/varscan/") + def cmdLine = getPythonCommand +} diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/Varscan.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/Varscan.scala index 76da0423cea32f599c978e852ca0aa35a740137e..67d3d34ee7be4dbd7e77fe030ee9914674522348 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/Varscan.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/Varscan.scala @@ -23,7 +23,7 @@ abstract class Varscan extends BiopetJavaCommandLineFunction with Version { jarFile = config("varscan_jar") - def versionCommand = super.commandLine + def versionCommand = s"$executable -jar $jarFile" def versionRegex = """VarScan v(.*)""".r } diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/Mpileup2cns.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/VarscanMpileup2cns.scala similarity index 75% rename from public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/Mpileup2cns.scala rename to public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/VarscanMpileup2cns.scala index 4fb0a05fd7e04617c15a68c63763d7e60da322d4..085b1bc37031fbf1ef99bd8448dbcba6fc0d07a8 100644 --- a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/Mpileup2cns.scala +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/varscan/VarscanMpileup2cns.scala @@ -20,7 +20,7 @@ import java.io.File import nl.lumc.sasc.biopet.utils.config.Configurable import org.broadinstitute.gatk.utils.commandline.{ Input, Output } -class Mpileup2cns(val root: Configurable) extends Varscan { +class VarscanMpileup2cns(val root: Configurable) extends Varscan { @Input(doc = "Input mpileup file", required = false) // if not defined, input is stdin var input: Option[File] = None @@ -49,22 +49,17 @@ class Mpileup2cns(val root: Configurable) extends Varscan { variants.foreach { case v => require(validValues.contains(v), "variants value must be either 0 or 1") } } - override def cmdLine = { - val baseCommand = super.cmdLine + required("mpileup2cns") + - required("", input) + - required("--min-coverage", minCoverage) + - required("--min-reads2", minReads2) + - required("--min-avg-qual", minAvgQual) + - required("--min-var-freq", minVarFreq) + - required("--min-freq-for-hom", minFreqForHom) + - required("--p-value", pValue) + - required("--strand-filter", strandFilter) + - required("--output-vcf", outputVcf) + - required("--vcf-sample-list", vcfSampleList) + - required("--variants", variants) - - if (output.isDefined) baseCommand + " > " + required(output) - else baseCommand - } - + override def cmdLine = super.cmdLine + required("mpileup2cns") + + required(input) + + optional("--min-coverage", minCoverage) + + optional("--min-reads2", minReads2) + + optional("--min-avg-qual", minAvgQual) + + optional("--min-var-freq", minVarFreq) + + optional("--min-freq-for-hom", minFreqForHom) + + optional("--p-value", pValue) + + optional("--strand-filter", strandFilter) + + optional("--output-vcf", outputVcf) + + optional("--vcf-sample-list", vcfSampleList) + + optional("--variants", variants) + + (if (outputAsStsout) "" else " > " + required(output)) } diff --git a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/extensions/CustomVarScan.scala b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/extensions/CustomVarScan.scala index f2f11a03e775ebe5a7fa4161813a0157c8f7976a..2d1f270c680338c099baf27c6325551d8f0dd590 100644 --- a/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/extensions/CustomVarScan.scala +++ b/public/gentrap/src/main/scala/nl/lumc/sasc/biopet/pipelines/gentrap/extensions/CustomVarScan.scala @@ -21,7 +21,7 @@ import nl.lumc.sasc.biopet.core.{ Reference, BiopetCommandLineFunction } import nl.lumc.sasc.biopet.core.extensions.PythonCommandLineFunction import nl.lumc.sasc.biopet.utils.config.Configurable import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsMpileup -import nl.lumc.sasc.biopet.extensions.varscan.Mpileup2cns +import nl.lumc.sasc.biopet.extensions.varscan.VarscanMpileup2cns import nl.lumc.sasc.biopet.extensions.{ Bgzip, Tabix } import org.broadinstitute.gatk.utils.commandline.{ Input, Output } @@ -65,7 +65,7 @@ class CustomVarScan(val root: Configurable) extends BiopetCommandLineFunction wi override def cmdLine: String = required(executable) + required("-vP") + required("""\t\t""") } - private val varscan = new Mpileup2cns(wrapper.root) { + private val varscan = new VarscanMpileup2cns(wrapper.root) { override def configName = wrapper.configName strandFilter = Option(0) outputVcf = Option(1) diff --git a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingReport.scala b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingReport.scala index 2ace6f962b245acca0e65212ea6ee11bb478ebf0..4207bf0a358393df51037fdc644bd9c00578c947 100644 --- a/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingReport.scala +++ b/public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/MultisampleMappingReport.scala @@ -28,6 +28,8 @@ trait MultisampleMappingReportTrait extends MultisampleReportBuilder { val wgsExecuted = summary.getSampleValues("bammetrics", "stats", "wgs").values.exists(_.isDefined) val rnaExecuted = summary.getSampleValues("bammetrics", "stats", "rna").values.exists(_.isDefined) + val flexiprepExecuted = summary.getLibraryValues("flexiprep") + .exists { case ((sample, lib), value) => value.isDefined } ReportPage( List("Samples" -> generateSamplesPage(pageArgs)) ++ @@ -51,25 +53,35 @@ trait MultisampleMappingReportTrait extends MultisampleReportBuilder { (if (rnaExecuted) List("Rna coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/rnaHistogram.ssp", Map("sampleLevel" -> true, "showPlot" -> true, "showTable" -> false))) else Nil) ++ - List("QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp", + (if (flexiprepExecuted) List("QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp", Map("showPlot" -> true, "showTable" -> false)), "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp", Map("showPlot" -> true, "showTable" -> false)) - ), + ) + else Nil), pageArgs ) } /** Files page, can be used general or at sample level */ - def filesPage: ReportPage = ReportPage(List(), List( - "Input fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepInputfiles.ssp"), - "After QC fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepOutputfiles.ssp"), - "Bam files per lib" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/mapping/outputBamfiles.ssp", Map("sampleLevel" -> false)), - "Preprocessed bam files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/mapping/outputBamfiles.ssp", - Map("pipelineName" -> pipelineName, "fileTag" -> "output_bam_preprocess"))), Map()) + def filesPage: ReportPage = { + val flexiprepExecuted = summary.getLibraryValues("flexiprep") + .exists { case ((sample, lib), value) => value.isDefined } + + ReportPage(List(), (if (flexiprepExecuted) List( + "Input fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepInputfiles.ssp"), + "After QC fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepOutputfiles.ssp")) + else Nil) ::: + List("Bam files per lib" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/mapping/outputBamfiles.ssp", Map("sampleLevel" -> false)), + "Preprocessed bam files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/mapping/outputBamfiles.ssp", + Map("pipelineName" -> pipelineName, "fileTag" -> "output_bam_preprocess"))), Map()) + } /** Single sample page */ def samplePage(sampleId: String, args: Map[String, Any]): ReportPage = { + val flexiprepExecuted = summary.getLibraryValues("flexiprep") + .exists { case ((sample, lib), value) => sample == sampleId && value.isDefined } + ReportPage(List( "Libraries" -> generateLibraryPage(args), "Alignment" -> BammetricsReport.bamMetricsPage(summary, Some(sampleId), None), @@ -78,20 +90,23 @@ trait MultisampleMappingReportTrait extends MultisampleReportBuilder { "Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp", if (summary.libraries(sampleId).size > 1) Map("showPlot" -> true) else Map()), "Preprocessing" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp", Map("sampleLevel" -> true))) ++ - List("QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp"), + (if (flexiprepExecuted) List("QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp"), "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp") - ), args) + ) + else Nil), args) } /** Library page */ def libraryPage(sampleId: String, libId: String, args: Map[String, Any]): ReportPage = { - ReportPage(List( - "Alignment" -> BammetricsReport.bamMetricsPage(summary, Some(sampleId), Some(libId)), - "QC" -> FlexiprepReport.flexiprepPage - ), List( - "Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp"), - "QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp"), - "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp") - ), args) + val flexiprepExecuted = summary.getValue(Some(sampleId), Some(libId), "flexiprep").isDefined + + ReportPage( + ("Alignment" -> BammetricsReport.bamMetricsPage(summary, Some(sampleId), Some(libId))) :: + (if (flexiprepExecuted) List("QC" -> FlexiprepReport.flexiprepPage) else Nil), + "Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp") :: + (if (flexiprepExecuted) List("QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp"), + "QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp")) + else Nil), + args) } } \ No newline at end of file diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTrait.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTrait.scala index 31ba0a53f0fc034b406cca237656722ca9de9d6f..f08b4cc18402e995375f14f1095e922883b8dc98 100644 --- a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTrait.scala +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTrait.scala @@ -156,7 +156,12 @@ trait ShivaVariantcallingTrait extends SummaryQScript } /** Will generate all available variantcallers */ - protected def callersList: List[Variantcaller] = List(new Freebayes(this), new RawVcf(this), new Bcftools(this), new BcftoolsSingleSample(this)) + protected def callersList: List[Variantcaller] = List( + new Freebayes(this), + new RawVcf(this), + new Bcftools(this), + new BcftoolsSingleSample(this), + new VarscanCnsSingleSample(this)) /** Location of summary file */ def summaryFile = new File(outputDir, "ShivaVariantcalling.summary.json") diff --git a/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/VarscanCnsSingleSample.scala b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/VarscanCnsSingleSample.scala new file mode 100644 index 0000000000000000000000000000000000000000..157013e5bbd0f22c8d74bf04355872f4bdb6cfaa --- /dev/null +++ b/public/shiva/src/main/scala/nl/lumc/sasc/biopet/pipelines/shiva/variantcallers/VarscanCnsSingleSample.scala @@ -0,0 +1,61 @@ +package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers + +import java.io.PrintWriter + +import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants +import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsMpileup +import nl.lumc.sasc.biopet.extensions.varscan.{ FixMpileup, VarscanMpileup2cns } +import nl.lumc.sasc.biopet.extensions.{ Bgzip, Tabix } +import nl.lumc.sasc.biopet.utils.config.Configurable + +/** + * Created by sajvanderzeeuw on 15-1-16. + */ +class VarscanCnsSingleSample(val root: Configurable) extends Variantcaller { + val name = "varscan_cns_singlesample" + protected def defaultPrio = 25 + + override def defaults = Map( + "samtoolsmpileup" -> Map( + "disable_baq" -> true, + "depth" -> 1000000 + ), + "varscanmpileup2cns" -> Map("strand_filter" -> 0) + ) + + override def fixedValues = Map( + "samtoolsmpileup" -> Map("output_mapping_quality" -> true), + "varscanmpileup2cns" -> Map("output_vcf" -> 1) + ) + + def biopetScript: Unit = { + val sampleVcfs = for ((sample, inputBam) <- inputBams.toList) yield { + val mpileup = new SamtoolsMpileup(this) + mpileup.input = List(inputBam) + + val sampleVcf = new File(outputDir, s"${name}_$sample.vcf.gz") + + val sampleFile = new File(outputDir, s"$sample.name.txt") + sampleFile.getParentFile.mkdirs() + sampleFile.deleteOnExit() + val writer = new PrintWriter(sampleFile) + writer.println(sample) + writer.close() + + val varscan = new VarscanMpileup2cns(this) + varscan.vcfSampleList = Some(sampleFile) + + add(mpileup | new FixMpileup(this) | varscan | new Bgzip(this) > sampleVcf) + add(Tabix(this, sampleVcf)) + + sampleVcf + } + + val cv = new CombineVariants(this) + cv.inputFiles = sampleVcfs + cv.outputFile = outputFile + cv.setKey = "null" + cv.excludeNonVariants = true + add(cv) + } +} diff --git a/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala b/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala index 94edda97a3e893ab8bba6d4016f003c2b0242b57..dda3fb8c166cfcfbcc883d8c289cf172207a8964 100644 --- a/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala +++ b/public/shiva/src/test/scala/nl/lumc/sasc/biopet/pipelines/shiva/ShivaVariantcallingTest.scala @@ -53,28 +53,31 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers { bams <- 0 to 3; raw <- bool; bcftools <- bool; - bcftools_singlesample <- bool; - freebayes <- bool - ) yield Array(bams, raw, bcftools, bcftools_singlesample, freebayes)).toArray + bcftoolsSinglesample <- bool; + freebayes <- bool; + varscanCnsSinglesample <- bool + ) yield Array(bams, raw, bcftools, bcftoolsSinglesample, freebayes, varscanCnsSinglesample)).toArray } @Test(dataProvider = "shivaVariantcallingOptions") def testShivaVariantcalling(bams: Int, raw: Boolean, bcftools: Boolean, - bcftools_singlesample: Boolean, - freebayes: Boolean) = { + bcftoolsSinglesample: Boolean, + freebayes: Boolean, + varscanCnsSinglesample: Boolean) = { val callers: ListBuffer[String] = ListBuffer() if (raw) callers.append("raw") if (bcftools) callers.append("bcftools") - if (bcftools_singlesample) callers.append("bcftools_singlesample") + if (bcftoolsSinglesample) callers.append("bcftools_singlesample") if (freebayes) callers.append("freebayes") + if (varscanCnsSinglesample) callers.append("varscan_cns_singlesample") val map = Map("variantcallers" -> callers.toList) val pipeline = initPipeline(map) pipeline.inputBams = (for (n <- 1 to bams) yield n.toString -> ShivaVariantcallingTest.inputTouch("bam_" + n + ".bam")).toMap - val illegalArgumentException = pipeline.inputBams.isEmpty || (!raw && !bcftools && !bcftools_singlesample && !freebayes) + val illegalArgumentException = pipeline.inputBams.isEmpty || (!raw && !bcftools && !bcftoolsSinglesample && !freebayes && !varscanCnsSinglesample) if (illegalArgumentException) intercept[IllegalArgumentException] { pipeline.script() @@ -83,7 +86,7 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers { if (!illegalArgumentException) { pipeline.script() - pipeline.functions.count(_.isInstanceOf[CombineVariants]) shouldBe 1 + (if (raw) 1 else 0) + pipeline.functions.count(_.isInstanceOf[CombineVariants]) shouldBe (1 + (if (raw) 1 else 0) + (if (varscanCnsSinglesample) 1 else 0)) //pipeline.functions.count(_.isInstanceOf[Bcftools]) shouldBe (if (bcftools) 1 else 0) //FIXME: Can not check for bcftools because of piping pipeline.functions.count(_.isInstanceOf[Freebayes]) shouldBe (if (freebayes) 1 else 0) @@ -130,6 +133,7 @@ object ShivaVariantcallingTest { "freebayes" -> Map("exe" -> "test"), "md5sum" -> Map("exe" -> "test"), "bgzip" -> Map("exe" -> "test"), - "tabix" -> Map("exe" -> "test") + "tabix" -> Map("exe" -> "test"), + "varscan_jar" -> "test" ) } \ No newline at end of file