Commit b81ad8bb authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Merge remote-tracking branch 'remotes/origin/develop' into feature-qiime

parents 8afdecfc 52776123
#!/usr/bin/env python
#
# Biopet is built on top of GATK Queue for building bioinformatic
# pipelines. It is mainly intended to support LUMC SHARK cluster which is running
# SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
# should also be able to execute Biopet tools and pipelines.
#
# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
#
# Contact us at: sasc@lumc.nl
#
# A dual licensing mode is applied. The source code within this project that are
# not part of GATK Queue is freely available for non-commercial use under an AGPL
# license; For commercial users or users who do not want to follow the AGPL
# license, please contact us to obtain a separate license.
#
from __future__ import print_function
__author__="Wai Yi Leung"
import sys
if __name__ == "__main__":
"""
Fix the mpileupformat for RNA mpileup
Solution offered by Irina Pulyakhina (LUMC-HG)
http://www.biostars.org/p/78542/
"""
for line in sys.stdin:
l = line.strip().split("\t")
if l[3] == "0":
# no alignment to this position
print("\t".join(map(str, l)))
continue
fix_col = l[4].replace('<', '').replace('>', '')
new_size = len(fix_col)
old_size = len(l[4])
if new_size != old_size:
l[4] = fix_col
l[3] = "{}".format(new_size)
if new_size == 0:
l[5] = ""
print("\t".join(map(str, l)))
......@@ -36,5 +36,5 @@ class Bgzip(val root: Configurable) extends BiopetCommandLineFunction {
def cmdLine = required(executable) +
conditional(f, "-f") +
" -c " + repeat(input) +
" > " + required(output)
(if (outputAsStsout) "" else " > " + required(output))
}
package nl.lumc.sasc.biopet.extensions.varscan
import nl.lumc.sasc.biopet.core.extensions.PythonCommandLineFunction
import nl.lumc.sasc.biopet.utils.config.Configurable
/**
* Created by sajvanderzeeuw on 19-1-16.
*/
class FixMpileup(val root: Configurable) extends PythonCommandLineFunction {
setPythonScript("fix_mpileup.py", "/nl/lumc/sasc/biopet/extensions/varscan/")
def cmdLine = getPythonCommand
}
......@@ -23,7 +23,7 @@ abstract class Varscan extends BiopetJavaCommandLineFunction with Version {
jarFile = config("varscan_jar")
def versionCommand = super.commandLine
def versionCommand = s"$executable -jar $jarFile"
def versionRegex = """VarScan v(.*)""".r
}
......@@ -20,7 +20,7 @@ import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
class Mpileup2cns(val root: Configurable) extends Varscan {
class VarscanMpileup2cns(val root: Configurable) extends Varscan {
@Input(doc = "Input mpileup file", required = false) // if not defined, input is stdin
var input: Option[File] = None
......@@ -49,22 +49,17 @@ class Mpileup2cns(val root: Configurable) extends Varscan {
variants.foreach { case v => require(validValues.contains(v), "variants value must be either 0 or 1") }
}
override def cmdLine = {
val baseCommand = super.cmdLine + required("mpileup2cns") +
required("", input) +
required("--min-coverage", minCoverage) +
required("--min-reads2", minReads2) +
required("--min-avg-qual", minAvgQual) +
required("--min-var-freq", minVarFreq) +
required("--min-freq-for-hom", minFreqForHom) +
required("--p-value", pValue) +
required("--strand-filter", strandFilter) +
required("--output-vcf", outputVcf) +
required("--vcf-sample-list", vcfSampleList) +
required("--variants", variants)
if (output.isDefined) baseCommand + " > " + required(output)
else baseCommand
}
override def cmdLine = super.cmdLine + required("mpileup2cns") +
required(input) +
optional("--min-coverage", minCoverage) +
optional("--min-reads2", minReads2) +
optional("--min-avg-qual", minAvgQual) +
optional("--min-var-freq", minVarFreq) +
optional("--min-freq-for-hom", minFreqForHom) +
optional("--p-value", pValue) +
optional("--strand-filter", strandFilter) +
optional("--output-vcf", outputVcf) +
optional("--vcf-sample-list", vcfSampleList) +
optional("--variants", variants) +
(if (outputAsStsout) "" else " > " + required(output))
}
......@@ -21,7 +21,7 @@ import nl.lumc.sasc.biopet.core.{ Reference, BiopetCommandLineFunction }
import nl.lumc.sasc.biopet.core.extensions.PythonCommandLineFunction
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsMpileup
import nl.lumc.sasc.biopet.extensions.varscan.Mpileup2cns
import nl.lumc.sasc.biopet.extensions.varscan.VarscanMpileup2cns
import nl.lumc.sasc.biopet.extensions.{ Bgzip, Tabix }
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
......@@ -65,7 +65,7 @@ class CustomVarScan(val root: Configurable) extends BiopetCommandLineFunction wi
override def cmdLine: String = required(executable) + required("-vP") + required("""\t\t""")
}
private val varscan = new Mpileup2cns(wrapper.root) {
private val varscan = new VarscanMpileup2cns(wrapper.root) {
override def configName = wrapper.configName
strandFilter = Option(0)
outputVcf = Option(1)
......
......@@ -28,6 +28,8 @@ trait MultisampleMappingReportTrait extends MultisampleReportBuilder {
val wgsExecuted = summary.getSampleValues("bammetrics", "stats", "wgs").values.exists(_.isDefined)
val rnaExecuted = summary.getSampleValues("bammetrics", "stats", "rna").values.exists(_.isDefined)
val flexiprepExecuted = summary.getLibraryValues("flexiprep")
.exists { case ((sample, lib), value) => value.isDefined }
ReportPage(
List("Samples" -> generateSamplesPage(pageArgs)) ++
......@@ -51,25 +53,35 @@ trait MultisampleMappingReportTrait extends MultisampleReportBuilder {
(if (rnaExecuted) List("Rna coverage" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/rnaHistogram.ssp",
Map("sampleLevel" -> true, "showPlot" -> true, "showTable" -> false)))
else Nil) ++
List("QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp",
(if (flexiprepExecuted) List("QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp",
Map("showPlot" -> true, "showTable" -> false)),
"QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp",
Map("showPlot" -> true, "showTable" -> false))
),
)
else Nil),
pageArgs
)
}
/** Files page, can be used general or at sample level */
def filesPage: ReportPage = ReportPage(List(), List(
"Input fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepInputfiles.ssp"),
"After QC fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepOutputfiles.ssp"),
"Bam files per lib" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/mapping/outputBamfiles.ssp", Map("sampleLevel" -> false)),
"Preprocessed bam files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/mapping/outputBamfiles.ssp",
Map("pipelineName" -> pipelineName, "fileTag" -> "output_bam_preprocess"))), Map())
def filesPage: ReportPage = {
val flexiprepExecuted = summary.getLibraryValues("flexiprep")
.exists { case ((sample, lib), value) => value.isDefined }
ReportPage(List(), (if (flexiprepExecuted) List(
"Input fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepInputfiles.ssp"),
"After QC fastq files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepOutputfiles.ssp"))
else Nil) :::
List("Bam files per lib" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/mapping/outputBamfiles.ssp", Map("sampleLevel" -> false)),
"Preprocessed bam files" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/mapping/outputBamfiles.ssp",
Map("pipelineName" -> pipelineName, "fileTag" -> "output_bam_preprocess"))), Map())
}
/** Single sample page */
def samplePage(sampleId: String, args: Map[String, Any]): ReportPage = {
val flexiprepExecuted = summary.getLibraryValues("flexiprep")
.exists { case ((sample, lib), value) => sample == sampleId && value.isDefined }
ReportPage(List(
"Libraries" -> generateLibraryPage(args),
"Alignment" -> BammetricsReport.bamMetricsPage(summary, Some(sampleId), None),
......@@ -78,20 +90,23 @@ trait MultisampleMappingReportTrait extends MultisampleReportBuilder {
"Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp",
if (summary.libraries(sampleId).size > 1) Map("showPlot" -> true) else Map()),
"Preprocessing" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp", Map("sampleLevel" -> true))) ++
List("QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp"),
(if (flexiprepExecuted) List("QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp"),
"QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp")
), args)
)
else Nil), args)
}
/** Library page */
def libraryPage(sampleId: String, libId: String, args: Map[String, Any]): ReportPage = {
ReportPage(List(
"Alignment" -> BammetricsReport.bamMetricsPage(summary, Some(sampleId), Some(libId)),
"QC" -> FlexiprepReport.flexiprepPage
), List(
"Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp"),
"QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp"),
"QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp")
), args)
val flexiprepExecuted = summary.getValue(Some(sampleId), Some(libId), "flexiprep").isDefined
ReportPage(
("Alignment" -> BammetricsReport.bamMetricsPage(summary, Some(sampleId), Some(libId))) ::
(if (flexiprepExecuted) List("QC" -> FlexiprepReport.flexiprepPage) else Nil),
"Alignment" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp") ::
(if (flexiprepExecuted) List("QC reads" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepReadSummary.ssp"),
"QC bases" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/flexiprep/flexiprepBaseSummary.ssp"))
else Nil),
args)
}
}
\ No newline at end of file
......@@ -156,7 +156,12 @@ trait ShivaVariantcallingTrait extends SummaryQScript
}
/** Will generate all available variantcallers */
protected def callersList: List[Variantcaller] = List(new Freebayes(this), new RawVcf(this), new Bcftools(this), new BcftoolsSingleSample(this))
protected def callersList: List[Variantcaller] = List(
new Freebayes(this),
new RawVcf(this),
new Bcftools(this),
new BcftoolsSingleSample(this),
new VarscanCnsSingleSample(this))
/** Location of summary file */
def summaryFile = new File(outputDir, "ShivaVariantcalling.summary.json")
......
package nl.lumc.sasc.biopet.pipelines.shiva.variantcallers
import java.io.PrintWriter
import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsMpileup
import nl.lumc.sasc.biopet.extensions.varscan.{ FixMpileup, VarscanMpileup2cns }
import nl.lumc.sasc.biopet.extensions.{ Bgzip, Tabix }
import nl.lumc.sasc.biopet.utils.config.Configurable
/**
* Created by sajvanderzeeuw on 15-1-16.
*/
class VarscanCnsSingleSample(val root: Configurable) extends Variantcaller {
val name = "varscan_cns_singlesample"
protected def defaultPrio = 25
override def defaults = Map(
"samtoolsmpileup" -> Map(
"disable_baq" -> true,
"depth" -> 1000000
),
"varscanmpileup2cns" -> Map("strand_filter" -> 0)
)
override def fixedValues = Map(
"samtoolsmpileup" -> Map("output_mapping_quality" -> true),
"varscanmpileup2cns" -> Map("output_vcf" -> 1)
)
def biopetScript: Unit = {
val sampleVcfs = for ((sample, inputBam) <- inputBams.toList) yield {
val mpileup = new SamtoolsMpileup(this)
mpileup.input = List(inputBam)
val sampleVcf = new File(outputDir, s"${name}_$sample.vcf.gz")
val sampleFile = new File(outputDir, s"$sample.name.txt")
sampleFile.getParentFile.mkdirs()
sampleFile.deleteOnExit()
val writer = new PrintWriter(sampleFile)
writer.println(sample)
writer.close()
val varscan = new VarscanMpileup2cns(this)
varscan.vcfSampleList = Some(sampleFile)
add(mpileup | new FixMpileup(this) | varscan | new Bgzip(this) > sampleVcf)
add(Tabix(this, sampleVcf))
sampleVcf
}
val cv = new CombineVariants(this)
cv.inputFiles = sampleVcfs
cv.outputFile = outputFile
cv.setKey = "null"
cv.excludeNonVariants = true
add(cv)
}
}
......@@ -53,28 +53,31 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers {
bams <- 0 to 3;
raw <- bool;
bcftools <- bool;
bcftools_singlesample <- bool;
freebayes <- bool
) yield Array(bams, raw, bcftools, bcftools_singlesample, freebayes)).toArray
bcftoolsSinglesample <- bool;
freebayes <- bool;
varscanCnsSinglesample <- bool
) yield Array(bams, raw, bcftools, bcftoolsSinglesample, freebayes, varscanCnsSinglesample)).toArray
}
@Test(dataProvider = "shivaVariantcallingOptions")
def testShivaVariantcalling(bams: Int,
raw: Boolean,
bcftools: Boolean,
bcftools_singlesample: Boolean,
freebayes: Boolean) = {
bcftoolsSinglesample: Boolean,
freebayes: Boolean,
varscanCnsSinglesample: Boolean) = {
val callers: ListBuffer[String] = ListBuffer()
if (raw) callers.append("raw")
if (bcftools) callers.append("bcftools")
if (bcftools_singlesample) callers.append("bcftools_singlesample")
if (bcftoolsSinglesample) callers.append("bcftools_singlesample")
if (freebayes) callers.append("freebayes")
if (varscanCnsSinglesample) callers.append("varscan_cns_singlesample")
val map = Map("variantcallers" -> callers.toList)
val pipeline = initPipeline(map)
pipeline.inputBams = (for (n <- 1 to bams) yield n.toString -> ShivaVariantcallingTest.inputTouch("bam_" + n + ".bam")).toMap
val illegalArgumentException = pipeline.inputBams.isEmpty || (!raw && !bcftools && !bcftools_singlesample && !freebayes)
val illegalArgumentException = pipeline.inputBams.isEmpty || (!raw && !bcftools && !bcftoolsSinglesample && !freebayes && !varscanCnsSinglesample)
if (illegalArgumentException) intercept[IllegalArgumentException] {
pipeline.script()
......@@ -83,7 +86,7 @@ class ShivaVariantcallingTest extends TestNGSuite with Matchers {
if (!illegalArgumentException) {
pipeline.script()
pipeline.functions.count(_.isInstanceOf[CombineVariants]) shouldBe 1 + (if (raw) 1 else 0)
pipeline.functions.count(_.isInstanceOf[CombineVariants]) shouldBe (1 + (if (raw) 1 else 0) + (if (varscanCnsSinglesample) 1 else 0))
//pipeline.functions.count(_.isInstanceOf[Bcftools]) shouldBe (if (bcftools) 1 else 0)
//FIXME: Can not check for bcftools because of piping
pipeline.functions.count(_.isInstanceOf[Freebayes]) shouldBe (if (freebayes) 1 else 0)
......@@ -130,6 +133,7 @@ object ShivaVariantcallingTest {
"freebayes" -> Map("exe" -> "test"),
"md5sum" -> Map("exe" -> "test"),
"bgzip" -> Map("exe" -> "test"),
"tabix" -> Map("exe" -> "test")
"tabix" -> Map("exe" -> "test"),
"varscan_jar" -> "test"
)
}
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment