Commit c19f372f authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Added variantcalling pipeline based on bam files

parent bb32c1aa
/target/
\ No newline at end of file
{
"reference" : "/data/DIV5/SASC/common/gatk_bundle_2.8/hg19/ucsc.hg19.fasta",
"dbsnp": "bla",
"haplotypecaller": {
"stand_call_conf": 20,
"stand_emit_conf": 20
}
}
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>nl.lumc.sasc</groupId>
<artifactId>GatkVariantcalling</artifactId>
<version>0.1.3</version>
<packaging>jar</packaging>
<name>GatkVariantcalling</name>
<url>http://maven.apache.org</url>
<parent>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Biopet</artifactId>
<version>0.1.3</version>
<relativePath>../</relativePath>
</parent>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<sting.unpack.phase>prepare-package</sting.unpack.phase>
<sting.shade.phase>package</sting.shade.phase>
<app.main.class>nl.lumc.sasc.biopet.pipelines.gatk.GatkVariantcalling</app.main.class>
</properties>
<dependencies>
<!-- <dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>-->
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-compiler</artifactId>
<version>2.11.0</version>
</dependency>
<!-- <dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_2.9.2</artifactId>
<version>2.0.M4</version>
<scope>test</scope>
</dependency>-->
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Biopet-Framework</artifactId>
<version>0.1.3</version>
</dependency>
<dependency>
<groupId>org.broadinstitute.sting</groupId>
<artifactId>queue-package</artifactId>
<version>3.1</version>
</dependency>
</dependencies>
<build>
<resources>
<resource>
<directory>scripts</directory>
<includes>
<include>**/*</include>
</includes>
</resource>
</resources>
<plugins>
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<version>2.15.2</version>
<executions>
<execution>
<id>scala-compile</id>
<goals>
<goal>compile</goal>
<goal>testCompile</goal>
</goals>
<configuration>
<args>
<arg>-dependencyfile</arg>
<arg>${project.build.directory}/.scala_dependencies</arg>
</args>
</configuration>
</execution>
</executions>
</plugin>
<!-- <plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.7.2</version>
<executions>
<execution>
<id>default-test</id>
Disable the default-test by putting it in phase none
<phase>none</phase>
</execution>
</executions>
</plugin>-->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>2.3</version>
<configuration>
<finalName>${project.artifactId}-${project.version}</finalName>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<manifestEntries>
<Main-Class>${app.main.class}</Main-Class>
<X-Compile-Source-JDK>${maven.compile.source}</X-Compile-Source-JDK>
<X-Compile-Target-JDK>${maven.compile.target}</X-Compile-Target-JDK>
</manifestEntries>
</transformer>
</transformers>
<artifactSet>
<excludes>
<exclude>org.broadinstitute.sting:queue-package</exclude>
<exclude>junit:junit</exclude>
<exclude>org.scala-lang:scala-compiler</exclude>
<exclude>org.scala-lang:scala-library</exclude>
<exclude>org.scala-lang:scala-reflect</exclude>
<exclude>org.scala-lang.modules:*</exclude>
</excludes>
</artifactSet>
</configuration>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
package nl.lumc.sasc.biopet.pipelines.gatk
import nl.lumc.sasc.biopet.core._
import nl.lumc.sasc.biopet.core.config._
import nl.lumc.sasc.biopet.function._
import org.broadinstitute.sting.queue.QScript
import org.broadinstitute.sting.queue.extensions.gatk.{BaseRecalibrator, CommandLineGATK, HaplotypeCaller, IndelRealigner, PrintReads, RealignerTargetCreator, GenotypeGVCFs}
import org.broadinstitute.sting.queue.function._
import org.broadinstitute.sting.commandline._
import org.broadinstitute.sting.utils.variant.GATKVCFIndexType
class GatkVariantcalling(val root:Configurable) extends QScript with BiopetQScript {
def this() = this(null)
@Input(doc="Bam files (should be deduped bams)", shortName="BAM")
var inputBams: List[File] = Nil
@Argument(doc="Reference", shortName="R", required=false)
var reference: File = _
@Argument(doc="Dbsnp", shortName="dbsnp", required=false)
var dbsnp: File = _
@Argument(doc="OutputName", required=false)
var outputName: String = "hc"
@Output(doc="OutputFile", required=false)
var outputFile: File = _
var gvcfMode = true
var singleGenotyping = false
def init() {
if (gvcfMode) gvcfMode = config("gvcfmode", true)
if (!singleGenotyping) singleGenotyping = config("singlegenotyping", false)
if (reference == null) reference = config("reference")
if (dbsnp == null && configContains("dbsnp")) dbsnp = config("dbsnp")
if (outputFile == null) outputFile = outputDir + outputName + (if (gvcfMode) ".gvcf.vcf" else ".vcf")
}
def biopetScript() {
var bamFiles: List[File] = Nil
for (inputBam <- inputBams) {
var bamFile = if (dbsnp != null) addIndelRealign(inputBam, outputDir) else inputBam
bamFiles :+= addBaseRecalibrator(bamFile, outputDir)
}
addHaplotypeCaller(bamFiles, outputFile)
if (gvcfMode && singleGenotyping) addGenotypeGVCFs(List(outputFile), outputDir)
}
trait gatkArguments extends CommandLineGATK {
this.reference_sequence = reference
this.memoryLimit = 2
this.jobResourceRequests :+= "h_vmem=4G"
}
def addIndelRealign(inputBam:File, dir:String): File = {
val realignerTargetCreator = new RealignerTargetCreator with gatkArguments {
this.I :+= inputBam
this.o = swapExt(dir,inputBam,".bam",".realign.intervals")
this.jobResourceRequests :+= "h_vmem=5G"
if (configContains("scattercount", "realignertargetcreator")) this.scatterCount = config("scattercount", 1, "realignertargetcreator")
}
realignerTargetCreator.isIntermediate = true
add(realignerTargetCreator)
val indelRealigner = new IndelRealigner with gatkArguments {
this.I :+= inputBam
this.targetIntervals = realignerTargetCreator.o
this.o = swapExt(dir,inputBam,".bam",".realign.bam")
if (configContains("scattercount", "indelrealigner")) this.scatterCount = config("scattercount", 1, "indelrealigner")
}
indelRealigner.isIntermediate = true
add(indelRealigner)
return indelRealigner.o
}
def addBaseRecalibrator(inputBam:File, dir:String): File = {
val baseRecalibrator = new BaseRecalibrator with gatkArguments {
this.I :+= inputBam
this.o = swapExt(dir,inputBam,".bam",".baserecal")
if (dbsnp != null) this.knownSites :+= dbsnp
if (configContains("scattercount", "baserecalibrator")) this.scatterCount = config("scattercount", 1, "baserecalibrator")
this.nct = config("threads", 2, "baserecalibrator")
}
baseRecalibrator.isIntermediate = true
add(baseRecalibrator)
val printReads = new PrintReads with gatkArguments {
this.I :+= inputBam
this.o = swapExt(dir,inputBam,".bam",".baserecal.bam")
this.BQSR = baseRecalibrator.o
if (configContains("scattercount", "printreads")) this.scatterCount = config("scattercount", 1, "printreads")
}
printReads.isIntermediate = true
add(printReads)
return printReads.o
}
def addHaplotypeCaller(bamfiles:List[File], outputfile:File): File = {
val haplotypeCaller = new HaplotypeCaller with gatkArguments {
if (configContains("scattercount", "haplotypecaller")) this.scatterCount = config("scattercount", 1, "haplotypecaller")
this.input_file = bamfiles
this.out = outputfile
if (configContains("dbsnp")) this.dbsnp = config("dbsnp")
this.nct = config("threads", 3, "haplotypecaller")
this.memoryLimit = this.nct * 2
// GVCF options
if (gvcfMode) {
this.emitRefConfidence = org.broadinstitute.sting.gatk.walkers.haplotypecaller.HaplotypeCaller.ReferenceConfidenceMode.GVCF
this.variant_index_type = GATKVCFIndexType.LINEAR
this.variant_index_parameter = 128000
}
val inputType:String = config("inputtype", "dna")
if (inputType == "rna") {
this.dontUseSoftClippedBases = config("dontusesoftclippedbases", true, "haplotypecaller")
this.recoverDanglingHeads = config("recoverdanglingheads", true, "haplotypecaller")
this.stand_call_conf = config("stand_call_conf", 20, "haplotypecaller")
this.stand_emit_conf = config("stand_emit_conf", 20, "haplotypecaller")
} else {
this.dontUseSoftClippedBases = config("dontusesoftclippedbases", false, "haplotypecaller")
this.recoverDanglingHeads = config("recoverdanglingheads", false, "haplotypecaller")
this.stand_call_conf = config("stand_call_conf", 30, "haplotypecaller")
this.stand_emit_conf = config("stand_emit_conf", 10, "haplotypecaller")
}
}
add(haplotypeCaller)
return haplotypeCaller.out
}
def addGenotypeGVCFs(gvcfFiles: List[File], dir:String): File = {
val genotypeGVCFs = new GenotypeGVCFs() with gatkArguments {
this.variant = gvcfFiles
this.annotation ++= Seq("FisherStrand", "QualByDepth", "ChromosomeCounts")
if (configContains("dbsnp")) this.dbsnp = config("dbsnp")
if (configContains("scattercount", "genotypegvcfs")) this.scatterCount = config("scattercount", 1, "genotypegvcfs")
this.out = outputDir + outputName + ".vcf"
}
add(genotypeGVCFs)
return genotypeGVCFs.out
}
}
object GatkVariantcalling extends PipelineCommand {
override val pipeline = "/nl/lumc/sasc/biopet/pipelines/gatk/GatkVariantcalling.class"
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment