Commit 14d8d45f authored by bow's avatar bow
Browse files

Merge branch 'develop' into feature-gentrap

Conflicts:
	public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/samtools/SamtoolsMpileup.scala
	public/mapping/src/main/scala/nl/lumc/sasc/biopet/pipelines/mapping/Mapping.scala
parents 1c9c025e 118f09fd
...@@ -8,9 +8,7 @@ package nl.lumc.sasc.biopet.pipelines.gatk ...@@ -8,9 +8,7 @@ package nl.lumc.sasc.biopet.pipelines.gatk
import nl.lumc.sasc.biopet.core.BiopetQScript import nl.lumc.sasc.biopet.core.BiopetQScript
import nl.lumc.sasc.biopet.core.PipelineCommand import nl.lumc.sasc.biopet.core.PipelineCommand
import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.extensions.gatk.ApplyRecalibration import nl.lumc.sasc.biopet.extensions.gatk.broad.{ ApplyRecalibration, VariantAnnotator, VariantRecalibrator }
import nl.lumc.sasc.biopet.extensions.gatk.VariantAnnotator
import nl.lumc.sasc.biopet.extensions.gatk.VariantRecalibrator
import org.broadinstitute.gatk.queue.QScript import org.broadinstitute.gatk.queue.QScript
class GatkVariantRecalibration(val root: Configurable) extends QScript with BiopetQScript { class GatkVariantRecalibration(val root: Configurable) extends QScript with BiopetQScript {
......
...@@ -8,9 +8,9 @@ package nl.lumc.sasc.biopet.pipelines.gatk ...@@ -8,9 +8,9 @@ package nl.lumc.sasc.biopet.pipelines.gatk
import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand } import nl.lumc.sasc.biopet.core.{ BiopetQScript, PipelineCommand }
import java.io.File import java.io.File
import nl.lumc.sasc.biopet.extensions.Ln import nl.lumc.sasc.biopet.extensions.Ln
import nl.lumc.sasc.biopet.extensions.gatk.broad._
import nl.lumc.sasc.biopet.tools.{ VcfStats, MpileupToVcf, VcfFilter, MergeAlleles } import nl.lumc.sasc.biopet.tools.{ VcfStats, MpileupToVcf, VcfFilter, MergeAlleles }
import nl.lumc.sasc.biopet.core.config.Configurable import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.extensions.gatk.{ AnalyzeCovariates, BaseRecalibrator, GenotypeGVCFs, HaplotypeCaller, IndelRealigner, PrintReads, RealignerTargetCreator, SelectVariants, CombineVariants, UnifiedGenotyper }
import nl.lumc.sasc.biopet.extensions.picard.MarkDuplicates import nl.lumc.sasc.biopet.extensions.picard.MarkDuplicates
import nl.lumc.sasc.biopet.utils.ConfigUtils import nl.lumc.sasc.biopet.utils.ConfigUtils
import org.broadinstitute.gatk.queue.QScript import org.broadinstitute.gatk.queue.QScript
......
package nl.lumc.sasc.biopet.pipelines.gatk
import nl.lumc.sasc.biopet.core.PipelineCommand
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.extensions.gatk.broad._
import nl.lumc.sasc.biopet.pipelines.shiva.{ ShivaVariantcallingTrait, ShivaTrait }
import org.broadinstitute.gatk.queue.QScript
/**
* Created by pjvan_thof on 2/26/15.
*/
class Shiva(val root: Configurable) extends QScript with ShivaTrait {
qscript =>
def this() = this(null)
/** Make variantcalling submodule, this with the gatk modes in there */
override def makeVariantcalling(multisample: Boolean = false): ShivaVariantcallingTrait = {
if (multisample) new ShivaVariantcalling(qscript) {
override def namePrefix = "multisample"
override def configName = "shivavariantcalling"
override def configPath: List[String] = super.configPath ::: "multisample" :: Nil
}
else new ShivaVariantcalling(qscript) {
override def configName = "shivavariantcalling"
}
}
/** Makes a sample */
override def makeSample(id: String) = new this.Sample(id)
/** Class will generate sample jobs */
class Sample(sampleId: String) extends super.Sample(sampleId) {
/** Makes a library */
override def makeLibrary(id: String) = new this.Library(id)
/** Class will generate library jobs */
class Library(libId: String) extends super.Library(libId) {
val useIndelRealigner: Boolean = config("use_indel_realigner", default = true)
val useBaseRecalibration: Boolean = config("use_base_recalibration", default = true)
/** Return true when baserecalibration is executed */
protected def doneBaseRecalibrator: Boolean = {
val br = new BaseRecalibrator(qscript)
useBaseRecalibration && !br.knownSites.isEmpty
}
/** This will adds preprocess steps, gatk indel realignment and base recalibration is included here */
override def preProcess(input: File): Option[File] = {
if (!useIndelRealigner && !doneBaseRecalibrator) None
else {
val indelRealignFile = useIndelRealigner match {
case true => addIndelRealign(input, libDir, doneBaseRecalibrator || libraries.size > 1)
case false => input
}
useBaseRecalibration match {
case true => Some(addBaseRecalibrator(indelRealignFile, libDir, libraries.size > 1))
case false => Some(indelRealignFile)
}
}
}
}
/** This methods will add double preprocess steps, with GATK indel realignment */
override protected def addDoublePreProcess(input: List[File], isIntermediate: Boolean = false): Option[File] = {
if (input.size <= 1) super.addDoublePreProcess(input)
else super.addDoublePreProcess(input, true).collect {
case file => {
config("use_indel_realigner", default = true).asBoolean match {
case true => addIndelRealign(file, sampleDir, false)
case false => file
}
}
}
}
}
/** Adds indel realignment jobs */
def addIndelRealign(inputBam: File, dir: File, isIntermediate: Boolean): File = {
val realignerTargetCreator = RealignerTargetCreator(this, inputBam, dir)
realignerTargetCreator.isIntermediate = true
add(realignerTargetCreator)
val indelRealigner = IndelRealigner(this, inputBam, realignerTargetCreator.out, dir)
indelRealigner.isIntermediate = isIntermediate
add(indelRealigner)
return indelRealigner.o
}
/** Adds base recalibration jobs */
def addBaseRecalibrator(inputBam: File, dir: File, isIntermediate: Boolean): File = {
val baseRecalibrator = BaseRecalibrator(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal"))
if (baseRecalibrator.knownSites.isEmpty) {
logger.warn("No Known site found, skipping base recalibration, file: " + inputBam)
return inputBam
}
add(baseRecalibrator)
if (config("use_analyze_covariates", default = false).asBoolean) {
val baseRecalibratorAfter = BaseRecalibrator(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.after"))
baseRecalibratorAfter.BQSR = baseRecalibrator.o
add(baseRecalibratorAfter)
add(AnalyzeCovariates(this, baseRecalibrator.o, baseRecalibratorAfter.o, swapExt(dir, inputBam, ".bam", ".baserecal.pdf")))
}
val printReads = PrintReads(this, inputBam, swapExt(dir, inputBam, ".bam", ".baserecal.bam"))
printReads.BQSR = baseRecalibrator.o
printReads.isIntermediate = isIntermediate
add(printReads)
return printReads.o
}
}
/** This object give a default main methods for this pipeline */
object Shiva extends PipelineCommand
\ No newline at end of file
package nl.lumc.sasc.biopet.pipelines.gatk
import nl.lumc.sasc.biopet.core.PipelineCommand
import nl.lumc.sasc.biopet.core.config.Configurable
import nl.lumc.sasc.biopet.extensions.gatk.broad.GenotypeGVCFs
import nl.lumc.sasc.biopet.pipelines.shiva.ShivaVariantcallingTrait
import org.broadinstitute.gatk.queue.QScript
/**
* Created by pjvan_thof on 2/26/15.
*/
class ShivaVariantcalling(val root: Configurable) extends QScript with ShivaVariantcallingTrait {
qscript =>
def this() = this(null)
/** Will generate all available variantcallers */
override def callersList = {
new HaplotypeCallerGvcf ::
new HaplotypeCallerAllele ::
new UnifiedGenotyperAllele ::
new UnifiedGenotyper ::
new HaplotypeCaller ::
super.callersList
}
/** Default mode for the haplotypecaller */
class HaplotypeCaller extends Variantcaller {
val name = "haplotypecaller"
protected val defaultPrio = 1
def outputFile = new File(outputDir, namePrefix + ".haplotypecaller.vcf.gz")
def addJobs() {
val hc = new nl.lumc.sasc.biopet.extensions.gatk.broad.HaplotypeCaller(qscript)
hc.input_file = inputBams
hc.out = outputFile
add(hc)
}
}
/** Default mode for UnifiedGenotyper */
class UnifiedGenotyper extends Variantcaller {
val name = "unifiedgenotyper"
protected val defaultPrio = 20
def outputFile = new File(outputDir, namePrefix + ".unifiedgenotyper.vcf.gz")
def addJobs() {
val ug = new nl.lumc.sasc.biopet.extensions.gatk.broad.UnifiedGenotyper(qscript)
ug.input_file = inputBams
ug.out = outputFile
add(ug)
}
}
/** Allele mode for Haplotypecaller */
class HaplotypeCallerAllele extends Variantcaller {
val name = "haplotypecaller_allele"
protected val defaultPrio = 5
def outputFile = new File(outputDir, namePrefix + ".haplotypecaller_allele.vcf.gz")
def addJobs() {
val hc = new nl.lumc.sasc.biopet.extensions.gatk.broad.HaplotypeCaller(qscript)
hc.input_file = inputBams
hc.out = outputFile
hc.alleles = config("input_alleles")
hc.genotyping_mode = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES
add(hc)
}
}
/** Allele mode for GenotyperAllele */
class UnifiedGenotyperAllele extends Variantcaller {
val name = "unifiedgenotyper_allele"
protected val defaultPrio = 9
def outputFile = new File(outputDir, namePrefix + ".unifiedgenotyper_allele.vcf.gz")
def addJobs() {
val ug = new nl.lumc.sasc.biopet.extensions.gatk.broad.UnifiedGenotyper(qscript)
ug.input_file = inputBams
ug.out = outputFile
ug.alleles = config("input_alleles")
ug.genotyping_mode = org.broadinstitute.gatk.tools.walkers.genotyper.GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES
add(ug)
}
}
/** Gvcf mode for haplotypecaller */
class HaplotypeCallerGvcf extends Variantcaller {
val name = "haplotypecaller_gvcf"
protected val defaultPrio = 5
def outputFile = new File(outputDir, namePrefix + ".haplotypecaller_gvcf.vcf.gz")
def addJobs() {
val gvcfFiles = for (inputBam <- inputBams) yield {
val hc = new nl.lumc.sasc.biopet.extensions.gatk.broad.HaplotypeCaller(qscript)
hc.input_file = List(inputBam)
hc.out = new File(outputDir, inputBam.getName.stripSuffix(".bam") + ".gvcf.vcf.gz")
hc.useGvcf()
add(hc)
hc.out
}
val genotypeGVCFs = GenotypeGVCFs(qscript, gvcfFiles, outputFile)
add(genotypeGVCFs)
}
}
}
/** object to add default main method to pipeline */
object ShivaVariantcalling extends PipelineCommand
\ No newline at end of file
package nl.lumc.sasc.biopet.pipelines.gatk
import com.google.common.io.Files
import nl.lumc.sasc.biopet.core.config.Config
import nl.lumc.sasc.biopet.extensions.bwa.BwaMem
import nl.lumc.sasc.biopet.extensions.gatk.broad._
import nl.lumc.sasc.biopet.extensions.picard.{ MarkDuplicates, SortSam }
import nl.lumc.sasc.biopet.tools.VcfStats
import nl.lumc.sasc.biopet.utils.ConfigUtils
import org.broadinstitute.gatk.queue.QSettings
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.{ Test, DataProvider }
/**
* Created by pjvan_thof on 3/2/15.
*/
class ShivaTest extends TestNGSuite with Matchers {
def initPipeline(map: Map[String, Any]): Shiva = {
new Shiva() {
override def configName = "shiva"
override def globalConfig = new Config(ConfigUtils.mergeMaps(map, ShivaTest.config))
qSettings = new QSettings
qSettings.runName = "test"
}
}
@DataProvider(name = "shivaOptions")
def shivaOptions = {
val bool = Array(true, false)
for (
s1 <- bool; s2 <- bool; s3 <- bool; multi <- bool; single <- bool;
library <- bool; dbsnp <- bool; covariates <- bool; realign <- bool; baseRecalibration <- bool
) yield Array("", s1, s2, s3, multi, single, library, dbsnp, covariates, realign, baseRecalibration)
}
@Test(dataProvider = "shivaOptions")
def testShiva(f: String, sample1: Boolean, sample2: Boolean, sample3: Boolean,
multi: Boolean, single: Boolean, library: Boolean, dbsnp: Boolean,
covariates: Boolean, realign: Boolean, baseRecalibration: Boolean): Unit = {
val map = {
var m: Map[String, Any] = ShivaTest.config
if (sample1) m = ConfigUtils.mergeMaps(ShivaTest.sample1, m.toMap)
if (sample2) m = ConfigUtils.mergeMaps(ShivaTest.sample2, m.toMap)
if (sample3) m = ConfigUtils.mergeMaps(ShivaTest.sample3, m.toMap)
if (dbsnp) m = ConfigUtils.mergeMaps(Map("dbsnp" -> "test"), m.toMap)
ConfigUtils.mergeMaps(Map("multisample_sample_variantcalling" -> multi,
"single_sample_variantcalling" -> single,
"library_variantcalling" -> library,
"use_analyze_covariates" -> covariates,
"use_indel_realigner" -> realign,
"use_base_recalibration" -> baseRecalibration), m.toMap)
}
if (!sample1 && !sample2 && !sample3) { // When no samples
intercept[IllegalArgumentException] {
initPipeline(map).script()
}
} else {
val pipeline = initPipeline(map)
pipeline.script()
val numberLibs = (if (sample1) 1 else 0) + (if (sample2) 1 else 0) + (if (sample3) 2 else 0)
val numberSamples = (if (sample1) 1 else 0) + (if (sample2) 1 else 0) + (if (sample3) 1 else 0)
pipeline.functions.count(_.isInstanceOf[BwaMem]) shouldBe numberLibs
pipeline.functions.count(_.isInstanceOf[SortSam]) shouldBe numberLibs
pipeline.functions.count(_.isInstanceOf[MarkDuplicates]) shouldBe (numberLibs + (if (sample3) 1 else 0))
// Gatk preprocess
pipeline.functions.count(_.isInstanceOf[IndelRealigner]) shouldBe (numberLibs + (if (sample3) 1 else 0)) * (if (realign) 1 else 0)
pipeline.functions.count(_.isInstanceOf[RealignerTargetCreator]) shouldBe (numberLibs + (if (sample3) 1 else 0)) * (if (realign) 1 else 0)
pipeline.functions.count(_.isInstanceOf[BaseRecalibrator]) shouldBe (if (dbsnp && baseRecalibration) numberLibs else 0) * (if (covariates) 2 else 1)
pipeline.functions.count(_.isInstanceOf[AnalyzeCovariates]) shouldBe (if (dbsnp && covariates && baseRecalibration) numberLibs else 0)
pipeline.functions.count(_.isInstanceOf[PrintReads]) shouldBe (if (dbsnp && baseRecalibration) numberLibs else 0)
pipeline.functions.count(_.isInstanceOf[VcfStats]) shouldBe (if (multi) 2 else 0) +
(if (single) numberSamples * 2 else 0) + (if (library) numberLibs * 2 else 0)
}
}
}
object ShivaTest {
val outputDir = Files.createTempDir()
val config = Map(
"name_prefix" -> "test",
"output_dir" -> outputDir,
"reference" -> "test",
"gatk_jar" -> "test",
"samtools" -> Map("exe" -> "test"),
"bcftools" -> Map("exe" -> "test"),
"fastqc" -> Map("exe" -> "test"),
"input_alleles" -> "test",
"variantcallers" -> "raw",
"fastqc" -> Map("exe" -> "test"),
"seqtk" -> Map("exe" -> "test"),
"sickle" -> Map("exe" -> "test"),
"cutadapt" -> Map("exe" -> "test"),
"bwa" -> Map("exe" -> "test"),
"samtools" -> Map("exe" -> "test"),
"macs2" -> Map("exe" -> "test"),
"igvtools" -> Map("exe" -> "test"),
"wigtobigwig" -> Map("exe" -> "test")
)
val sample1 = Map(
"samples" -> Map("sample1" -> Map("libraries" -> Map(
"lib1" -> Map(
"R1" -> "1_1_R1.fq",
"R2" -> "1_1_R2.fq"
)
)
)))
val sample2 = Map(
"samples" -> Map("sample2" -> Map("libraries" -> Map(
"lib1" -> Map(
"R1" -> "2_1_R1.fq",
"R2" -> "2_1_R2.fq"
)
)
)))
val sample3 = Map(
"samples" -> Map("sample3" -> Map("libraries" -> Map(
"lib1" -> Map(
"R1" -> "3_1_R1.fq",
"R2" -> "3_1_R2.fq"
),
"lib2" -> Map(
"R1" -> "3_2_R1.fq",
"R2" -> "3_2_R2.fq"
)
)
)))
}
\ No newline at end of file
package nl.lumc.sasc.biopet.pipelines.gatk
import java.io.File
import com.google.common.io.Files
import nl.lumc.sasc.biopet.core.config.Config
import nl.lumc.sasc.biopet.extensions.gatk.CombineVariants
import nl.lumc.sasc.biopet.extensions.gatk.broad.{ UnifiedGenotyper, HaplotypeCaller }
import nl.lumc.sasc.biopet.tools.{ VcfStats, MpileupToVcf, VcfFilter }
import nl.lumc.sasc.biopet.utils.ConfigUtils
import org.apache.commons.io.FileUtils
import org.broadinstitute.gatk.queue.QSettings
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.{ DataProvider, Test, AfterClass }
import scala.collection.mutable.ListBuffer
/**
* Created by pjvan_thof on 3/2/15.
*/
class ShivaVariantcallingTest extends TestNGSuite with Matchers {
def initPipeline(map: Map[String, Any]): ShivaVariantcalling = {
new ShivaVariantcalling() {
override def configName = "shivavariantcalling"
override def globalConfig = new Config(ConfigUtils.mergeMaps(map, ShivaVariantcallingTest.config))
qSettings = new QSettings
qSettings.runName = "test"
}
}
@DataProvider(name = "shivaVariantcallingOptions")
def shivaVariantcallingOptions = {
val bool = Array(true, false)
(for (
bams <- 0 to 2;
raw <- bool;
bcftools <- bool;
haplotypeCallerGvcf <- bool;
haplotypeCallerAllele <- bool;
unifiedGenotyperAllele <- bool;
unifiedGenotyper <- bool;
haplotypeCaller <- bool
) yield Array[Any](bams, raw, bcftools, unifiedGenotyper, haplotypeCaller, haplotypeCallerGvcf, haplotypeCallerAllele, unifiedGenotyperAllele)
).toArray
}
@Test(dataProvider = "shivaVariantcallingOptions")
def testShivaVariantcalling(bams: Int,
raw: Boolean,
bcftools: Boolean,
unifiedGenotyper: Boolean,
haplotypeCaller: Boolean,
haplotypeCallerGvcf: Boolean,
haplotypeCallerAllele: Boolean,
unifiedGenotyperAllele: Boolean) = {
val callers: ListBuffer[String] = ListBuffer()
if (raw) callers.append("raw")
if (bcftools) callers.append("bcftools")
if (unifiedGenotyper) callers.append("unifiedgenotyper")
if (haplotypeCallerGvcf) callers.append("haplotypecaller_gvcf")
if (haplotypeCallerAllele) callers.append("haplotypecaller_allele")
if (unifiedGenotyperAllele) callers.append("unifiedgenotyper_allele")
if (haplotypeCaller) callers.append("haplotypecaller")
val map = Map("variantcallers" -> callers.toList)
val pipeline = initPipeline(map)
pipeline.inputBams = (for (n <- 1 to bams) yield new File("bam_" + n + ".bam")).toList
val illegalArgumentException = pipeline.inputBams.isEmpty ||
(!raw && !bcftools &&
!haplotypeCaller && !unifiedGenotyper &&
!haplotypeCallerGvcf && !haplotypeCallerAllele && !unifiedGenotyperAllele)
if (illegalArgumentException) intercept[IllegalArgumentException] {
pipeline.script()
}
if (!illegalArgumentException) {
pipeline.script()
pipeline.functions.count(_.isInstanceOf[CombineVariants]) shouldBe 1 + (if (raw) 1 else 0)
//pipeline.functions.count(_.isInstanceOf[Bcftools]) shouldBe (if (bcftools) 1 else 0)
//FIXME: Can not check for bcftools because of piping
pipeline.functions.count(_.isInstanceOf[MpileupToVcf]) shouldBe (if (raw) bams else 0)
pipeline.functions.count(_.isInstanceOf[VcfFilter]) shouldBe (if (raw) bams else 0)
pipeline.functions.count(_.isInstanceOf[HaplotypeCaller]) shouldBe (if (haplotypeCaller) 1 else 0) +
(if (haplotypeCallerAllele) 1 else 0) + (if (haplotypeCallerGvcf) bams else 0)
pipeline.functions.count(_.isInstanceOf[UnifiedGenotyper]) shouldBe (if (unifiedGenotyper) 1 else 0) +
(if (unifiedGenotyperAllele) 1 else 0)
pipeline.functions.count(_.isInstanceOf[VcfStats]) shouldBe (1 + callers.size)
}
}
@AfterClass def removeTempOutputDir() = {
FileUtils.deleteDirectory(ShivaVariantcallingTest.outputDir)
}
}
object ShivaVariantcallingTest {
val outputDir = Files.createTempDir()
val config = Map(
"name_prefix" -> "test",
"output_dir" -> outputDir,
"reference" -> "test",
"gatk_jar" -> "test",
"samtools" -> Map("exe" -> "test"),
"bcftools" -> Map("exe" -> "test"),
"input_alleles" -> "test"
)
}
\ No newline at end of file
...@@ -6,13 +6,12 @@ ...@@ -6,13 +6,12 @@
package nl.lumc.sasc.biopet.core package nl.lumc.sasc.biopet.core
object BiopetExecutableProtected extends BiopetExecutable { object BiopetExecutableProtected extends BiopetExecutable {
def pipelines: List[MainCommand] = BiopetExecutablePublic.pipelines ::: List( def pipelines: List[MainCommand] = BiopetExecutablePublic.protectedPipelines ::: List(
nl.lumc.sasc.biopet.pipelines.gatk.GatkBenchmarkGenotyping,
nl.lumc.sasc.biopet.pipelines.gatk.GatkGenotyping,
nl.lumc.sasc.biopet.pipelines.gatk.GatkVariantcalling, nl.lumc.sasc.biopet.pipelines.gatk.GatkVariantcalling,
nl.lumc.sasc.biopet.pipelines.gatk.GatkPipeline, nl.lumc.sasc.biopet.pipelines.gatk.GatkPipeline,
nl.lumc.sasc.biopet.pipelines.gatk.GatkVariantRecalibration, nl.lumc.sasc.biopet.pipelines.gatk.Shiva,
nl.lumc.sasc.biopet.pipelines.basty.Basty) nl.lumc.sasc.biopet.pipelines.gatk.ShivaVariantcalling,
nl.lumc.sasc.biopet.pipelines.gatk.Basty)
def tools = BiopetExecutablePublic.tools def tools = BiopetExe