Commit 14d8d45f authored by bow's avatar bow
Browse files

Merge branch 'develop' into feature-gentrap

Conflicts:
	public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsMpileup.scala
	public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala
parents 1c9c025e 118f09fd
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.bcftools
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
abstract class Bcftools extends BiopetCommandLineFunction {
override def subPath = "bcftools" :: super.subPath
executable = config("exe", default = "bcftools")
override def versionCommand = executable
override val versionRegex = """Version: (.*)""".r
override val versionExitcode = List(0, 1)
}
\ No newline at end of file
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.bcftools
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/**
* This extension is based on bcftools 1.1-134
* @param root
*/
class BcftoolsCall(val root: Configurable) extends Bcftools {
@Input(doc = "Input File")
var input: File = _
@Output(doc = "output File")
var output: File = _
var O: String = null
var v: Boolean = config("v", default = true)
var c: Boolean = config("c", default = false)
var m: Boolean = config("m", default = false)
override def beforeGraph: Unit = {
require(c != m)
}
def cmdBase = required(executable) +
required("call") +
optional("-O", O) +
conditional(v, "-v") +
conditional(c, "-c") +
conditional(m, "-m")
def cmdPipeInput = cmdBase + "-"
def cmdPipe = cmdBase + input
def cmdLine = cmdPipe + " > " + required(output)
}
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
/**
* Created by pjvan_thof on 2/26/15.
*/
class CombineVariants(val root: Configurable) extends Gatk {
val analysisType = "CombineVariants"
@Input(doc = "", required = true)
var inputFiles: List[File] = Nil
@Output(doc = "", required = true)
var outputFile: File = null
var setKey: String = null
var rodPriorityList: String = null
var minimumN: Int = config("minimumN", default = 1)
var genotypeMergeOptions: Option[String] = config("genotypeMergeOptions")
var excludeNonVariants: Boolean = false
var inputMap: Map[File, String] = Map()
def addInput(file: File, name: String): Unit = {
inputFiles :+= file
inputMap += file -> name
}
override def beforeGraph: Unit = {
genotypeMergeOptions match {
case Some("UNIQUIFY") | Some("PRIORITIZE") | Some("UNSORTED") | Some("REQUIRE_UNIQUE") | None =>
case _ => throw new IllegalArgumentException("Wrong option for genotypeMergeOptions")
}
}
override def commandLine = super.commandLine +
(for (file <- inputFiles) yield {
inputMap.get(file) match {
case Some(name) => required("-V:" + name, file)
case _ => required("-V", file)
}
}).mkString +
required("-o", outputFile) +
optional("--setKey", setKey) +
optional("--rod_priority_list", rodPriorityList) +
optional("-genotypeMergeOptions", genotypeMergeOptions) +
conditional(excludeNonVariants, "--excludeNonVariants")
}
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.Input
/**
* Created by pjvan_thof on 2/26/15.
*/
abstract class Gatk extends BiopetJavaCommandLineFunction {
override def subPath = "gatk" :: super.subPath
jarFile = config("gatk_jar")
val analysisType: String
override val defaultVmem = "5G"
@Input(required = true)
var reference: File = config("reference")
@Input(required = false)
var gatkKey: Option[File] = config("gatk_key")
@Input(required = false)
var intervals: List[File] = config("intervals", default = Nil)
@Input(required = false)
var excludeIntervals: List[File] = config("exclude_intervals", default = Nil)
@Input(required = false)
var pedigree: List[File] = config("pedigree", default = Nil)
override def commandLine = super.commandLine +
required("-T", analysisType) +
required("-R", reference) +
optional("-K", gatkKey) +
repeat("-I", intervals) +
repeat("-XL", excludeIntervals) +
repeat("-ped", pedigree)
}
\ No newline at end of file
......@@ -51,7 +51,7 @@ class SamToFastq(val root: Configurable) extends Picard {
var includeNonPjReads: Boolean = config("includeNonPjReads", default = false)
@Argument(doc = "clippingAtribute", required = false)
var clippingAtribute: String = config("clippingAtribute")
var clippingAtribute: Option[String] = config("clippingAtribute")
@Argument(doc = "clippingAction", required = false)
var clippingAction: Option[String] = config("clippingAction")
......
......@@ -22,7 +22,7 @@ import java.io.File
/** Extension for samtools mpileup */
class SamtoolsMpileup(val root: Configurable) extends Samtools {
@Input(doc = "Bam File")
var input: File = _
var input: List[File] = Nil
@Output(doc = "output File")
var output: File = _
......@@ -34,6 +34,7 @@ class SamtoolsMpileup(val root: Configurable) extends Samtools {
var intervalBed: Option[File] = config("interval_bed")
var disableBaq: Boolean = config("disable_baq", default = false)
var u: Boolean = config("u", default = false)
var minMapQuality: Option[Int] = config("min_map_quality")
var minBaseQuality: Option[Int] = config("min_base_quality")
var depth: Option[Int] = config("depth")
......@@ -47,9 +48,10 @@ class SamtoolsMpileup(val root: Configurable) extends Samtools {
optional("-Q", minBaseQuality) +
optional("-d", depth) +
conditional(outputMappingQuality, "-s") +
conditional(disableBaq, "-B")
conditional(disableBaq, "-B") +
conditional(u, "-u")
def cmdPipeInput = cmdBase + "-"
def cmdPipe = cmdBase + required(input)
def cmdPipe = cmdBase + repeat(input)
/** Returns command to execute */
def cmdLine = cmdPipe + " > " + required(output)
......@@ -58,7 +60,7 @@ class SamtoolsMpileup(val root: Configurable) extends Samtools {
object SamtoolsMpileup {
def apply(root: Configurable, input: File, output: File): SamtoolsMpileup = {
val mpileup = new SamtoolsMpileup(root)
mpileup.input = input
mpileup.input = List(input)
mpileup.output = output
return mpileup
}
......
......@@ -268,7 +268,7 @@ object BastyGenerateFasta extends ToolCommand {
val genotype = vcfRecord.getGenotype(cmdArgs.sampleName)
if (genotype == null) return fillAllele("", maxSize)
val AD = genotype.getAD
val AD = if (genotype.hasAD) genotype.getAD else Array.fill(vcfRecord.getAlleles.size())(cmdArgs.minAD)
if (AD == null) return fillAllele("", maxSize)
val maxADid = AD.zipWithIndex.maxBy(_._1)._2
if (AD(maxADid) < cmdArgs.minAD) return fillAllele("", maxSize)
......
......@@ -50,7 +50,8 @@ class FastqSync(val root: Configurable) extends BiopetJavaCommandLineFunction wi
@Output(doc = "Sync statistics", required = true)
var outputStats: File = null
override val defaultVmem = "5G"
override val defaultVmem = "4G"
memoryLimit = Some(1.7)
// executed command line
override def commandLine =
......
......@@ -17,6 +17,7 @@ package nl.lumc.sasc.biopet.tools
import java.io.File
import java.io.PrintWriter
import htsjdk.samtools.SamReaderFactory
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
import nl.lumc.sasc.biopet.core.ToolCommand
import nl.lumc.sasc.biopet.core.config.Configurable
......@@ -59,10 +60,20 @@ class MpileupToVcf(val root: Configurable) extends BiopetJavaCommandLineFunction
val samtoolsMpileup = new SamtoolsMpileup(this)
}
override def beforeCmd: Unit = {
if (sample == null && inputBam.exists()) {
val inputSam = SamReaderFactory.makeDefault.open(inputBam)
val readGroups = inputSam.getFileHeader.getReadGroups
val samples = readGroups.map(readGroup => readGroup.getSample).distinct
sample = samples.head
inputSam.close
}
}
override def commandLine = {
(if (inputMpileup == null) {
val samtoolsMpileup = new SamtoolsMpileup(this)
samtoolsMpileup.input = inputBam
samtoolsMpileup.input = List(inputBam)
samtoolsMpileup.cmdPipe + " | "
} else "") +
super.commandLine +
......
......@@ -313,6 +313,16 @@ object ConfigUtils extends Logging {
any2list(any).map(_.toString)
}
/**
* Convert Any to List[File]
* @param any Input Any value
* @return
*/
def any2fileList(any: Any): List[File] = {
if (any == null) return null
any2list(any).map(x => new File(x.toString))
}
/**
* Convert Any to Map[String, Any]
* @param any Input Any value
......@@ -505,6 +515,16 @@ object ConfigUtils extends Logging {
else Nil
}
/**
* Convert ConfigValue to List[File]
* @param value Input ConfigValue
* @return
*/
implicit def configValue2fileList(value: ConfigValue): List[File] = {
if (requiredValue(value)) any2fileList(value.value)
else Nil
}
/**
* Convert ConfigValue to Set[String]
* @param value Input ConfigValue
......
......@@ -55,23 +55,24 @@ class VcfStatsTest extends TestNGSuite with Matchers {
s1.sampleToSample("s1").alleleOverlap = 1
s2.sampleToSample("s2").alleleOverlap = 2
s1.genotypeStats += "1" -> mutable.Map(1 -> 1)
s2.genotypeStats += "2" -> mutable.Map(2 -> 2)
val bla1 = s1.genotypeStats.getOrElse("chr", mutable.Map[String, mutable.Map[Any, Int]]()) += "1" -> mutable.Map(1 -> 1)
s1.genotypeStats += "chr" -> bla1
val bla2 = s2.genotypeStats.getOrElse("chr", mutable.Map[String, mutable.Map[Any, Int]]()) += "2" -> mutable.Map(2 -> 2)
s2.genotypeStats += "chr" -> bla2
val ss1 = SampleToSampleStats()
val ss2 = SampleToSampleStats()
s1 += s2
s1.genotypeStats shouldBe mutable.Map("1" -> mutable.Map(1 -> 1), "2" -> mutable.Map(2 -> 2))
s1.genotypeStats.getOrElse("chr", mutable.Map[String, mutable.Map[Any, Int]]()) shouldBe mutable.Map("1" -> mutable.Map(1 -> 1), "2" -> mutable.Map(2 -> 2))
ss1.alleleOverlap = 1
ss2.alleleOverlap = 2
s1.sampleToSample shouldBe mutable.Map("s1" -> ss1, "s2" -> ss2)
s1 += s2
s1.genotypeStats shouldBe mutable.Map("1" -> mutable.Map(1 -> 1), "2" -> mutable.Map(2 -> 4))
s1.genotypeStats.getOrElse("chr", mutable.Map[String, mutable.Map[Any, Int]]()) shouldBe mutable.Map("1" -> mutable.Map(1 -> 1), "2" -> mutable.Map(2 -> 4))
s1 += s1
s1.genotypeStats shouldBe mutable.Map("1" -> mutable.Map(1 -> 2), "2" -> mutable.Map(2 -> 8))
s1.genotypeStats.getOrElse("chr", mutable.Map[String, mutable.Map[Any, Int]]()) shouldBe mutable.Map("1" -> mutable.Map(1 -> 2), "2" -> mutable.Map(2 -> 8))
}
}
......@@ -95,6 +95,16 @@
<artifactId>Toucan</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Shiva</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Basty</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
<build>
<plugins>
......
......@@ -16,7 +16,7 @@
package nl.lumc.sasc.biopet.core
object BiopetExecutablePublic extends BiopetExecutable {
def pipelines: List[MainCommand] = List(
def protectedPipelines: List[MainCommand] = List(
nl.lumc.sasc.biopet.pipelines.flexiprep.Flexiprep,
nl.lumc.sasc.biopet.pipelines.mapping.Mapping,
nl.lumc.sasc.biopet.pipelines.gentrap.Gentrap,
......@@ -29,6 +29,12 @@ object BiopetExecutablePublic extends BiopetExecutable {
nl.lumc.sasc.biopet.pipelines.toucan.Toucan
)
def pipelines: List[MainCommand] = List(
nl.lumc.sasc.biopet.pipelines.shiva.Shiva,
nl.lumc.sasc.biopet.pipelines.shiva.ShivaVariantcalling,
nl.lumc.sasc.biopet.pipelines.basty.Basty
) ::: protectedPipelines
def tools: List[MainCommand] = List(
nl.lumc.sasc.biopet.tools.MergeTables,
nl.lumc.sasc.biopet.tools.WipeReads,
......
......@@ -24,7 +24,8 @@ import org.broadinstitute.gatk.queue.QScript
import nl.lumc.sasc.biopet.core.{ SampleLibraryTag, PipelineCommand }
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.extensions.{ Ln, Star, Stampy, Bowtie }
import nl.lumc.sasc.biopet.core.{ SampleLibraryTag, BiopetQScript, PipelineCommand }
import nl.lumc.sasc.biopet.extensions._
import nl.lumc.sasc.biopet.extensions.bwa.{ BwaSamse, BwaSampe, BwaAln, BwaMem }
import nl.lumc.sasc.biopet.extensions.{ Gsnap, Tophat }
import nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig
......@@ -167,9 +168,14 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
chunks += (chunkDir -> (removeGz(chunkDir + input_R1.getName),
if (paired) removeGz(chunkDir + input_R2.get.getName) else ""))
}
else chunks += (outputDir -> (
flexiprep.extractIfNeeded(input_R1, flexiprep.outputDir),
if (paired) flexiprep.extractIfNeeded(input_R2.get, flexiprep.outputDir) else "")
else if (skipFlexiprep) {
chunks += (outputDir -> (
extractIfNeeded(input_R1, flexiprep.outputDir),
if (paired) extractIfNeeded(input_R2.get, outputDir) else "")
)
} else chunks += (outputDir -> (
flexiprep.outputFiles("fastq_input_R1"),
if (paired) flexiprep.outputFiles("fastq_input_R2") else "")
)
if (chunking) {
......@@ -495,6 +501,32 @@ class Mapping(val root: Configurable) extends QScript with SummaryQScript with S
RG.substring(0, RG.lastIndexOf("\\t"))
}
//FIXME: This is code duplication from flexiprep, need general class to pass jobs inside a util function
/**
* Extracts file if file is compressed
* @param file
* @param runDir
* @return returns extracted file
*/
def extractIfNeeded(file: File, runDir: File): File = {
if (file == null) return file
else if (file.getName().endsWith(".gz") || file.getName().endsWith(".gzip")) {
var newFile: File = swapExt(runDir, file, ".gz", "")
if (file.getName().endsWith(".gzip")) newFile = swapExt(runDir, file, ".gzip", "")
val zcatCommand = Zcat(this, file, newFile)
zcatCommand.isIntermediate = true
add(zcatCommand)
return newFile
} else if (file.getName().endsWith(".bz2")) {
val newFile = swapExt(runDir, file, ".bz2", "")
val pbzip2 = Pbzip2(this, file, newFile)
pbzip2.isIntermediate = true
add(pbzip2)
return newFile
} else return file
}
}
object Mapping extends PipelineCommand
\ No newline at end of file
......@@ -37,18 +37,23 @@ class MappingTest extends TestNGSuite with Matchers {
val chunks = Array(1, 5, 10, 100)
val skipMarkDuplicates = Array(true, false)
val skipFlexipreps = Array(true, false)
val zipped = Array(true, false)
for (
aligner <- aligners;
pair <- paired;
chunk <- chunks;
skipMarkDuplicate <- skipMarkDuplicates;
skipFlexiprep <- skipFlexipreps
) yield Array(aligner, pair, chunk, skipMarkDuplicate, skipFlexiprep)
skipFlexiprep <- skipFlexipreps;
zipped <- zipped
) yield Array(aligner, pair, chunk, skipMarkDuplicate, skipFlexiprep, zipped)
}
@Test(dataProvider = "mappingOptions")
def testMapping(aligner: String, paired: Boolean, chunks: Int, skipMarkDuplicate: Boolean, skipFlexiprep: Boolean) = {
def testMapping(aligner: String, paired: Boolean, chunks: Int,
skipMarkDuplicate: Boolean,
skipFlexiprep: Boolean,
zipped: Boolean) = {
val map = ConfigUtils.mergeMaps(Map("output_dir" -> MappingTest.outputDir,
"aligner" -> aligner,
"number_chunks" -> chunks,
......@@ -57,15 +62,20 @@ class MappingTest extends TestNGSuite with Matchers {
), Map(MappingTest.executables.toSeq: _*))
val mapping: Mapping = initPipeline(map)
if (zipped) {
mapping.input_R1 = new File(mapping.outputDir, "bla_R1.fq.gz")
if (paired) mapping.input_R2 = Some(new File(mapping.outputDir, "bla_R2.fq.gz"))
} else {
mapping.input_R1 = new File(mapping.outputDir, "bla_R1.fq")
if (paired) mapping.input_R2 = Some(new File(mapping.outputDir, "bla_R2.fq"))
}
mapping.sampleId = Some("1")
mapping.libId = Some("1")
mapping.script()
//Flexiprep
mapping.functions.count(_.isInstanceOf[Fastqc]) shouldBe (if (skipFlexiprep) 0 else if (paired) 4 else 2)
mapping.functions.count(_.isInstanceOf[Zcat]) shouldBe 0
mapping.functions.count(_.isInstanceOf[Zcat]) shouldBe (if (!zipped || (chunks > 1 && skipFlexiprep)) 0 else if (paired) 2 else 1)
mapping.functions.count(_.isInstanceOf[Seqstat]) shouldBe ((if (skipFlexiprep) 0 else if (paired) 4 else 2) * chunks)
mapping.functions.count(_.isInstanceOf[SeqtkSeq]) shouldBe ((if (skipFlexiprep) 0 else if (paired) 2 else 1) * chunks)
mapping.functions.count(_.isInstanceOf[Cutadapt]) shouldBe ((if (skipFlexiprep) 0 else if (paired) 2 else 1) * chunks)
......
......@@ -37,6 +37,8 @@
<module>bam2wig</module>
<module>carp</module>
<module>toucan</module>
<module>shiva</module>
<module>basty</module>
</modules>
<properties>
......
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<parent>
<artifactId>Biopet</artifactId>
<groupId>nl.lumc.sasc</groupId>
<version>0.3.0-DEV</version>
</parent>
<modelVersion>4.0.0</modelVersion>
<inceptionYear>2015</inceptionYear>
<artifactId>Shiva</artifactId>
<dependencies>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>BiopetFramework</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Mapping</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
<version>6.8</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_2.11</artifactId>
<version>2.2.1</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>
\ No newline at end of file
package nl.lumc.sasc.biopet.pipelines.shiva
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.core.PipelineCommand
import org.broadinstitute.gatk.queue.QScript