Commit ea95c71d authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Added a benchmark pipeline for gvcf

parent f49e6275
/target/
\ No newline at end of file
{
"gvcffiles": ["test4.vcf", "test5.vcf"],
"reference" : "/data/DIV5/SASC/common/gatk_bundle_2.8/hg19/ucsc.hg19.fasta",
"dbsnp": "bla",
"haplotypecaller": {
"stand_call_conf": 20,
"stand_emit_conf": 20
}
}
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>nl.lumc.sasc</groupId>
<artifactId>GatkBenchmarkGenotyping</artifactId>
<version>0.1.3</version>
<packaging>jar</packaging>
<name>GatkBenchmarkGenotyping</name>
<url>http://maven.apache.org</url>
<parent>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Biopet</artifactId>
<version>0.1.3</version>
<relativePath>../</relativePath>
</parent>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<sting.unpack.phase>prepare-package</sting.unpack.phase>
<sting.shade.phase>package</sting.shade.phase>
<app.main.class>nl.lumc.sasc.biopet.pipelines.gatk.GatkBenchmarkGenotyping</app.main.class>
</properties>
<dependencies>
<!-- <dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>-->
<dependency>
<groupId>org.scala-lang</groupId>
<artifactId>scala-compiler</artifactId>
<version>2.11.0</version>
</dependency>
<!-- <dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_2.9.2</artifactId>
<version>2.0.M4</version>
<scope>test</scope>
</dependency>-->
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>GatkGenotyping</artifactId>
<version>0.1.3</version>
</dependency>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Biopet-Framework</artifactId>
<version>0.1.3</version>
</dependency>
<dependency>
<groupId>org.broadinstitute.gatk</groupId>
<artifactId>queue-package</artifactId>
<version>3.2-2</version>
</dependency>
</dependencies>
<build>
<resources>
<resource>
<directory>scripts</directory>
<includes>
<include>**/*</include>
</includes>
</resource>
</resources>
<plugins>
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<version>2.15.2</version>
<executions>
<execution>
<id>scala-compile</id>
<goals>
<goal>compile</goal>
<goal>testCompile</goal>
</goals>
<configuration>
<args>
<arg>-dependencyfile</arg>
<arg>${project.build.directory}/.scala_dependencies</arg>
</args>
</configuration>
</execution>
</executions>
</plugin>
<!-- <plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.7.2</version>
<executions>
<execution>
<id>default-test</id>
Disable the default-test by putting it in phase none
<phase>none</phase>
</execution>
</executions>
</plugin>-->
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>2.3</version>
<configuration>
<finalName>${project.artifactId}-${project.version}</finalName>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<manifestEntries>
<Main-Class>${app.main.class}</Main-Class>
<X-Compile-Source-JDK>${maven.compile.source}</X-Compile-Source-JDK>
<X-Compile-Target-JDK>${maven.compile.target}</X-Compile-Target-JDK>
</manifestEntries>
</transformer>
</transformers>
<artifactSet>
<excludes>
<exclude>org.broadinstitute.gatk:queue-package</exclude>
<exclude>junit:junit</exclude>
<exclude>org.scala-lang:scala-compiler</exclude>
<exclude>org.scala-lang:scala-library</exclude>
<exclude>org.scala-lang:scala-reflect</exclude>
<exclude>org.scala-lang.modules:*</exclude>
</excludes>
</artifactSet>
</configuration>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
package nl.lumc.sasc.biopet.pipelines.gatk
import nl.lumc.sasc.biopet.core._
import nl.lumc.sasc.biopet.core.config._
import nl.lumc.sasc.biopet.function._
import org.broadinstitute.gatk.queue.QScript
import org.broadinstitute.gatk.queue.extensions.gatk.{CommandLineGATK, GenotypeGVCFs, SelectVariants}
import org.broadinstitute.gatk.queue.function._
import org.broadinstitute.gatk.utils.commandline.{Input, Output, Argument}
import scala.util.Random
class GatkBenchmarkGenotyping(val root:Configurable) extends QScript with BiopetQScript {
def this() = this(null)
@Input(doc="Sample gvcf file")
var sampleGvcf: File = _
@Argument(doc="SampleName", required=true)
var sampleName: String = _
@Input(doc="Gvcf files", shortName="I", required=false)
var gvcfFiles: List[File] = Nil
@Argument(doc="Reference", shortName="R", required=false)
var reference: File = _
@Argument(doc="Dbsnp", shortName="dbsnp", required=false)
var dbsnp: File = _
def init() {
if (configContains("gvcffiles")) for (file <- config("gvcffiles").getList) {
gvcfFiles ::= file.toString
}
if (reference == null) reference = config("reference")
if (dbsnp == null && configContains("dbsnp")) dbsnp = config("dbsnp")
if (outputDir == null) throw new IllegalStateException("Missing Output directory on gatk module")
else if (!outputDir.endsWith("/")) outputDir += "/"
}
def biopetScript() {
var todoGvcfs = gvcfFiles
var gvcfPool: List[File] = Nil
addGenotypingPipeline(gvcfPool)
while (todoGvcfs.size > 0) {
val index = Random.nextInt(todoGvcfs.size)
gvcfPool ::= todoGvcfs(index)
addGenotypingPipeline(gvcfPool)
todoGvcfs = todoGvcfs.filter(b => b != todoGvcfs(index))
}
}
trait gatkArguments extends CommandLineGATK {
this.reference_sequence = reference
this.memoryLimit = 2
this.jobResourceRequests :+= "h_vmem=4G"
}
def addGenotypingPipeline(gvcfPool: List[File]) {
val gatkGenotyping = new GatkGenotyping(this)
gatkGenotyping.inputGvcfs = sampleGvcf :: gvcfPool
gatkGenotyping.samples :+= sampleName
gatkGenotyping.outputDir = outputDir + "samples_" + gvcfPool.size + "/"
gatkGenotyping.init
gatkGenotyping.biopetScript
addAll(gatkGenotyping.functions)
}
}
object GatkBenchmarkGenotyping extends PipelineCommand {
override val pipeline = "/nl/lumc/sasc/biopet/pipelines/gatk/GatkBenchmarkGenotyping.class"
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment