Commit cb7a051c authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Merge branch 'release-0.7.0' into 'master'

Release 0.7.0



See merge request !449
parents 23c1ccea bf548a42
*.bam binary
*.bam.bai binary
# Project-related
dependency-reduced-pom.xml
git.properties
# gedit
*~
# Vim
*.swp
# IntelliJ
.idea/workspace.xml
/target/
/public/target/
/protected/target/
<?xml version="1.0" encoding="UTF-8"?>
<!--
Biopet is built on top of GATK Queue for building bioinformatic
pipelines. It is mainly intended to support LUMC SHARK cluster which is running
SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
should also be able to execute Biopet tools and pipelines.
Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
Contact us at: sasc@lumc.nl
A dual licensing mode is applied. The source code within this project that are
not part of GATK Queue is freely available for non-commercial use under an AGPL
license; For commercial users or users who do not want to follow the AGPL
license, please contact us to obtain a separate license.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>Bam2Wig</artifactId>
<packaging>jar</packaging>
<parent>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Biopet</artifactId>
<version>0.6.0</version>
<relativePath>../</relativePath>
</parent>
<inceptionYear>2014</inceptionYear>
<name>Bam2Wig</name>
<dependencies>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>BiopetCore</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>BiopetExtensions</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
</project>
\ No newline at end of file
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.core.{ Version, BiopetCommandLineFunction, Reference }
import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output }
/**
* Extension for STAR
*/
class Star(val root: Configurable) extends BiopetCommandLineFunction with Reference with Version {
@Input(doc = "The reference file for the bam files.", required = false)
var reference: File = null
@Input(doc = "Fastq file R1", required = false)
var R1: File = _
@Input(doc = "Fastq file R2", required = false)
var R2: File = _
@Output(doc = "Output SAM file", required = false)
var outputSam: File = _
@Output(doc = "Output tab file", required = false)
var outputTab: File = _
@Input(doc = "sjdbFileChrStartEnd file", required = false)
var sjdbFileChrStartEnd: File = _
@Output(doc = "Output genome file", required = false)
var outputGenome: File = _
@Output(doc = "Output SA file", required = false)
var outputSA: File = _
@Output(doc = "Output SAindex file", required = false)
var outputSAindex: File = _
executable = config("exe", "STAR")
def versionCommand = executable + " --version"
def versionRegex = """(.*)""".r
@Argument(doc = "Output Directory")
var outputDir: File = _
var genomeDir: File = null
var runmode: String = _
var sjdbOverhang: Option[Int] = None
var outFileNamePrefix: String = _
var runThreadN: Option[Int] = config("runThreadN")
override def defaultCoreMemory = 6.0
override def defaultThreads = 8
/** Sets output files for the graph */
override def beforeGraph() {
super.beforeGraph()
if (reference == null) reference = referenceFasta()
if (outFileNamePrefix != null && !outFileNamePrefix.endsWith(".")) outFileNamePrefix += "."
val prefix = if (outFileNamePrefix != null) outputDir + File.separator + outFileNamePrefix else outputDir + File.separator
if (runmode == null) {
outputSam = new File(prefix + "Aligned.out.sam")
outputTab = new File(prefix + "SJ.out.tab")
genomeDir = config("genomeDir", new File(reference.getAbsoluteFile.getParent, "star"))
} else if (runmode == "genomeGenerate") {
genomeDir = outputDir
outputGenome = new File(prefix + "Genome")
outputSA = new File(prefix + "SA")
outputSAindex = new File(prefix + "SAindex")
sjdbOverhang = config("sjdboverhang")
}
}
/** Returns command to execute */
def cmdLine = {
var cmd: String = required("cd", outputDir) + " && " + required(executable)
if (runmode != null && runmode == "genomeGenerate") { // Create index
cmd += required("--runMode", runmode) +
required("--genomeFastaFiles", reference)
} else { // Aligner
cmd += required("--readFilesIn", R1) + optional(R2)
}
cmd += required("--genomeDir", genomeDir) +
optional("--sjdbFileChrStartEnd", sjdbFileChrStartEnd) +
optional("--runThreadN", threads) +
optional("--outFileNamePrefix", outFileNamePrefix) +
optional("--sjdbOverhang", sjdbOverhang)
cmd
}
}
object Star {
/**
* Create default star
* @param configurable root object
* @param R1 R1 fastq file
* @param R2 R2 fastq file
* @param outputDir Outputdir for Star
* @param isIntermediate When set true jobs are flaged as intermediate
* @param deps Deps to add to wait on run
* @return Return Star
*
*/
def apply(configurable: Configurable, R1: File, R2: Option[File], outputDir: File, isIntermediate: Boolean = false, deps: List[File] = Nil): Star = {
val star = new Star(configurable)
star.R1 = R1
R2.foreach(R2 => star.R2 = R2)
star.outputDir = outputDir
star.isIntermediate = isIntermediate
star.deps = deps
star.beforeGraph()
star
}
/**
* returns Star with 2pass star method
* @param configurable root object
* @param R1 R1 fastq file
* @param R2 R2 fastq file
* @param outputDir Outputdir for Star
* @param isIntermediate When set true jobs are flaged as intermediate
* @param deps Deps to add to wait on run
* @return Return Star
*/
def _2pass(configurable: Configurable,
R1: File,
R2: Option[File],
outputDir: File,
isIntermediate: Boolean = false,
deps: List[File] = Nil): (File, List[Star]) = {
val starCommandPass1 = Star(configurable, R1, R2, new File(outputDir, "aln-pass1"))
starCommandPass1.isIntermediate = isIntermediate
starCommandPass1.deps = deps
starCommandPass1.beforeGraph()
val starCommandReindex = new Star(configurable)
starCommandReindex.sjdbFileChrStartEnd = starCommandPass1.outputTab
starCommandReindex.outputDir = new File(outputDir, "re-index")
starCommandReindex.runmode = "genomeGenerate"
starCommandReindex.isIntermediate = isIntermediate
starCommandReindex.beforeGraph()
val starCommandPass2 = Star(configurable, R1, R2, new File(outputDir, "aln-pass2"))
starCommandPass2.genomeDir = starCommandReindex.outputDir
starCommandPass2.isIntermediate = isIntermediate
starCommandPass2.deps = deps
starCommandPass2.beforeGraph()
(starCommandPass2.outputSam, List(starCommandPass1, starCommandReindex, starCommandPass2))
}
}
\ No newline at end of file
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
/**
* Extension for CombineVariants from GATK
*
* Created by pjvan_thof on 2/26/15.
*/
class CombineVariants(val root: Configurable) extends Gatk {
val analysisType = "CombineVariants"
@Input(doc = "", required = true)
var inputFiles: List[File] = Nil
@Output(doc = "", required = true)
var outputFile: File = null
var setKey: String = null
var rodPriorityList: String = null
var minimumN: Int = config("minimumN", default = 1)
var genotypeMergeOptions: Option[String] = config("genotypeMergeOptions")
var excludeNonVariants: Boolean = false
var inputMap: Map[File, String] = Map()
def addInput(file: File, name: String): Unit = {
inputFiles :+= file
inputMap += file -> name
}
override def beforeGraph(): Unit = {
super.beforeGraph()
if (outputFile.getName.endsWith(".vcf.gz")) outputFiles :+= new File(outputFile.getAbsolutePath + ".tbi")
genotypeMergeOptions match {
case Some("UNIQUIFY") | Some("PRIORITIZE") | Some("UNSORTED") | Some("REQUIRE_UNIQUE") | None =>
case _ => throw new IllegalArgumentException("Wrong option for genotypeMergeOptions")
}
deps :::= inputFiles.filter(_.getName.endsWith("vcf.gz")).map(x => new File(x.getAbsolutePath + ".tbi"))
deps = deps.distinct
}
override def cmdLine = super.cmdLine +
(for (file <- inputFiles) yield {
inputMap.get(file) match {
case Some(name) => required("-V:" + name, file)
case _ => required("-V", file)
}
}).mkString +
required("-o", outputFile) +
optional("--setKey", setKey) +
optional("--rod_priority_list", rodPriorityList) +
optional("-genotypeMergeOptions", genotypeMergeOptions) +
conditional(excludeNonVariants, "--excludeNonVariants")
}
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.gatk
import java.io.File
import nl.lumc.sasc.biopet.core.{ Version, BiopetJavaCommandLineFunction, Reference }
import org.broadinstitute.gatk.utils.commandline.Input
/**
* General extension for GATK module
*
* Created by pjvan_thof on 2/26/15.
*/
abstract class Gatk extends BiopetJavaCommandLineFunction with Reference with Version {
override def subPath = "gatk" :: super.subPath
jarFile = config("gatk_jar")
val analysisType: String
override def defaultCoreMemory = 3.0
@Input(required = true)
var reference: File = null
@Input(required = false)
var gatkKey: Option[File] = config("gatk_key")
@Input(required = false)
var intervals: List[File] = config("intervals", default = Nil)
@Input(required = false)
var excludeIntervals: List[File] = config("exclude_intervals", default = Nil)
@Input(required = false)
var pedigree: List[File] = config("pedigree", default = Nil)
var et: Option[String] = config("et")
def versionRegex = """(.*)""".r
override def versionExitcode = List(0, 1)
def versionCommand = executable + " -jar " + jarFile + " -version"
override def getVersion = super.getVersion.collect { case version => "Gatk " + version }
override def dictRequired = true
override def beforeGraph(): Unit = {
super.beforeGraph()
if (reference == null) reference = referenceFasta()
}
override def cmdLine = super.cmdLine +
required("-T", analysisType) +
required("-R", reference) +
optional("-K", gatkKey) +
optional("-et", et) +
repeat("-L", intervals) +
repeat("-XL", excludeIntervals) +
repeat("-ped", pedigree)
}
\ No newline at end of file
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
/**
* Due to the license issue with GATK, this part of Biopet can only be used inside the
* LUMC. Please refer to https://git.lumc.nl/biopet/biopet/wikis/home for instructions
* on how to use this protected part of biopet or contact us at sasc@lumc.nl
*/
package nl.lumc.sasc.biopet.extensions.gatk.broad
import nl.lumc.sasc.biopet.core._
import org.broadinstitute.gatk.engine.phonehome.GATKRunReport
import org.broadinstitute.gatk.queue.extensions.gatk.CommandLineGATK
trait GatkGeneral extends CommandLineGATK with CommandLineResources with Reference with Version {
var executable: String = config("java", default = "java", submodule = "java", freeVar = false)
override def subPath = "gatk" :: super.subPath
jarFile = config("gatk_jar")
reference_sequence = referenceFasta()
override def defaultCoreMemory = 4.0
override def faiRequired = true
override def dictRequired = true
if (config.contains("intervals")) intervals = config("intervals").asFileList
if (config.contains("exclude_intervals")) excludeIntervals = config("exclude_intervals").asFileList
Option(config("et").value) match {
case Some("NO_ET") => et = GATKRunReport.PhoneHomeOption.NO_ET
case Some("AWS") => et = GATKRunReport.PhoneHomeOption.AWS
case Some("STDOUT") => et = GATKRunReport.PhoneHomeOption.STDOUT
case Some(x) => throw new IllegalArgumentException(s"Unknown et option for gatk: $x")
case _ =>
}
if (config.contains("gatk_key")) gatk_key = config("gatk_key")
if (config.contains("pedigree")) pedigree = config("pedigree")
def versionRegex = """(.*)""".r
override def versionExitcode = List(0, 1)
def versionCommand = "java" + " -jar " + jarFile + " -version"
override def getVersion = {
BiopetCommandLineFunction.preProcessExecutable(executable).path.foreach(executable = _)
super.getVersion.collect { case v => "Gatk " + v }
}
}
#
# Biopet is built on top of GATK Queue for building bioinformatic
# pipelines. It is mainly intended to support LUMC SHARK cluster which is running
# SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
# should also be able to execute Biopet tools and pipelines.
#
# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
#
# Contact us at: sasc@lumc.nl
#
# A dual licensing mode is applied. The source code within this project that are
# not part of GATK Queue is freely available for non-commercial use under an AGPL
# license; For commercial users or users who do not want to follow the AGPL
# license, please contact us to obtain a separate license.
#
# Set root logger level to DEBUG and its only appender to A1.
log4j.rootLogger=ERROR, A1
# A1 is set to be a ConsoleAppender.
log4j.appender.A1=org.apache.log4j.ConsoleAppender
# A1 uses PatternLayout.
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n
\ No newline at end of file
#
# Biopet is built on top of GATK Queue for building bioinformatic
# pipelines. It is mainly intended to support LUMC SHARK cluster which is running
# SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
# should also be able to execute Biopet tools and pipelines.
#
# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
#
# Contact us at: sasc@lumc.nl
#
# A dual licensing mode is applied. The source code within this project that are
# not part of GATK Queue is freely available for non-commercial use under an AGPL
# license; For commercial users or users who do not want to follow the AGPL
# license, please contact us to obtain a separate license.
#
# Set root logger level to DEBUG and its only appender to A1.
log4j.rootLogger=ERROR, A1
# A1 is set to be a ConsoleAppender.
log4j.appender.A1=org.apache.log4j.ConsoleAppender
# A1 uses PatternLayout.
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n
\ No newline at end of file
#
# Biopet is built on top of GATK Queue for building bioinformatic
# pipelines. It is mainly intended to support LUMC SHARK cluster which is running
# SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
# should also be able to execute Biopet tools and pipelines.
#
# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
#
# Contact us at: sasc@lumc.nl
#
# A dual licensing mode is applied. The source code within this project that are
# not part of GATK Queue is freely available for non-commercial use under an AGPL
# license; For commercial users or users who do not want to follow the AGPL
# license, please contact us to obtain a separate license.
#
# Set root logger level to DEBUG and its only appender to A1.
log4j.rootLogger=ERROR, A1
# A1 is set to be a ConsoleAppender.
log4j.appender.A1=org.apache.log4j.ConsoleAppender
# A1 uses PatternLayout.
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n
\ No newline at end of file
#
# Biopet is built on top of GATK Queue for building bioinformatic
# pipelines. It is mainly intended to support LUMC SHARK cluster which is running
# SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
# should also be able to execute Biopet tools and pipelines.
#
# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
#
# Contact us at: sasc@lumc.nl
#
# A dual licensing mode is applied. The source code within this project that are
# not part of GATK Queue is freely available for non-commercial use under an AGPL
# license; For commercial users or users who do not want to follow the AGPL
# license, please contact us to obtain a separate license.
#
# Set root logger level to DEBUG and its only appender to A1.
log4j.rootLogger=ERROR, A1
# A1 is set to be a ConsoleAppender.
log4j.appender.A1=org.apache.log4j.ConsoleAppender
# A1 uses PatternLayout.
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n
\ No newline at end of file
#
# Biopet is built on top of GATK Queue for building bioinformatic
# pipelines. It is mainly intended to support LUMC SHARK cluster which is running
# SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
# should also be able to execute Biopet tools and pipelines.
#
# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
#
# Contact us at: sasc@lumc.nl
#