Commit 3fa85cef authored by Sander Bollen's avatar Sander Bollen Committed by GitHub

Merge branch 'develop' into feature-tarmac-pipeline

parents cb03dd7b c99aa979
......@@ -21,10 +21,6 @@ node('local') {
}
}
stage('Report Tests') {
junit '*/target/surefire-reports/*.xml'
}
stage('Check git on changes') {
sh 'if [ $(git diff | wc -l) -eq 0 ]; then true; else echo "[ERROR] Git is not clean anymore after build"; git diff; echo "[ERROR] This might be caused by reformated code, if so run maven locally"; false; fi'
}
......
......@@ -39,7 +39,7 @@
</dependency>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>BiopetToolsExtensions</artifactId>
<artifactId>BiopetExtensions</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
......
......@@ -42,6 +42,9 @@ class BamMetrics(val parent: Configurable)
@Input(doc = "Bam File", shortName = "BAM", required = true)
var inputBam: File = _
@Argument(required = false)
var paired: Boolean = true
override def defaults = Map("bedtoolscoverage" -> Map("sorted" -> true))
/** returns files to store in summary */
......@@ -87,6 +90,9 @@ class BamMetrics(val parent: Configurable)
val multiMetrics = new CollectMultipleMetrics(this)
multiMetrics.input = inputBam
multiMetrics.outputName = new File(outputDir, inputBam.getName.stripSuffix(".bam"))
if (!paired)
multiMetrics.program = multiMetrics.program
.filter(_ != CollectMultipleMetrics.Programs.CollectInsertSizeMetrics)
add(multiMetrics)
addSummarizable(multiMetrics, "multi_metrics")
......@@ -212,6 +218,7 @@ object BamMetrics extends PipelineCommand {
def apply(root: Configurable,
bamFile: File,
outputDir: File,
paired: Boolean,
sampleId: Option[String] = None,
libId: Option[String] = None): BamMetrics = {
val bamMetrics = new BamMetrics(root)
......@@ -219,6 +226,7 @@ object BamMetrics extends PipelineCommand {
bamMetrics.libId = libId
bamMetrics.inputBam = bamFile
bamMetrics.outputDir = outputDir
bamMetrics.paired = paired
bamMetrics.init()
bamMetrics.biopetScript()
......
......@@ -39,7 +39,7 @@ class CoverageStats(val parent: Configurable) extends PythonCommandLineFunction
override def defaultCoreMemory = 9.0
def cmdLine =
def cmdLine: String =
getPythonCommand +
(if (inputAsStdin) " - " else required(input)) +
required("--plot", plot) +
......
......@@ -35,6 +35,11 @@
<artifactId>BiopetUtils</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>BiopetTools</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.broadinstitute.gatk</groupId>
<artifactId>gatk-queue</artifactId>
......
......@@ -44,7 +44,7 @@ trait BiopetCommandLineFunction extends CommandLineResources { biopetFunction =>
val preCommands: List[String] = config("pre_commands", default = Nil, freeVar = false)
private def changeScript(file: File): Unit = {
protected def changeScript(file: File): Unit = {
val lines = Source.fromFile(file).getLines().toList
val writer = new PrintWriter(file)
remoteCommand match {
......
......@@ -14,11 +14,14 @@
*/
package nl.lumc.sasc.biopet.core
import java.io.File
import java.io.{File, PrintWriter}
import nl.lumc.sasc.biopet.utils.Logging
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.Output
import scala.io.Source
/**
* Created by pjvan_thof on 9/29/15.
*/
......@@ -30,13 +33,13 @@ class BiopetFifoPipe(val parent: Configurable,
val outputs: Map[BiopetCommandLineFunction, Seq[File]] = try {
commands.map(x => x -> x.outputs).toMap
} catch {
case e: NullPointerException => Map()
case _: NullPointerException => Map()
}
val inputs: Map[BiopetCommandLineFunction, Seq[File]] = try {
commands.map(x => x -> x.inputs).toMap
} catch {
case e: NullPointerException => Map()
case _: NullPointerException => Map()
}
for (cmdOutput <- commands;
......@@ -52,13 +55,13 @@ class BiopetFifoPipe(val parent: Configurable,
val outputs: Map[BiopetCommandLineFunction, Seq[File]] = try {
commands.map(x => x -> x.outputs).toMap
} catch {
case e: NullPointerException => Map()
case _: NullPointerException => Map()
}
val inputs: Map[BiopetCommandLineFunction, Seq[File]] = try {
commands.map(x => x -> x.inputs).toMap
} catch {
case e: NullPointerException => Map()
case _: NullPointerException => Map()
}
val fifoFiles = fifos
......@@ -83,14 +86,16 @@ class BiopetFifoPipe(val parent: Configurable,
}
}
def cmdLine = {
val fifosFiles = this.fifos
fifosFiles.filter(_.exists()).map(required("rm", _)).mkString("\n\n", " \n", " \n\n") +
fifosFiles.map(required("mkfifo", _)).mkString("\n\n", "\n", "\n\n") +
commands.map(_.commandLine).mkString("\n\n", " & \n", " & \n\n") +
BiopetFifoPipe.waitScript +
fifosFiles.map(required("rm", _)).mkString("\n\n", " \n", " \n\n") +
BiopetFifoPipe.endScript
def cmdLine: String = {
this.fifos.filter(_.exists()).map(required("rm", _)).mkString("", "\n", "\n") +
this.fifos.map(required("mkfifo", _)).mkString("\n") +
commands.map(_.commandLine).mkString("\n", " & \n", " & \n")
}
/** This will add the control code to the script for fifo pipes */
override protected def changeScript(file: File): Unit = {
super.changeScript(file)
BiopetFifoPipe.changeScript(file, fifos)
}
override def setResources(): Unit = {
......@@ -110,7 +115,27 @@ class BiopetFifoPipe(val parent: Configurable,
}
object BiopetFifoPipe {
val waitScript =
/** This will add the control code to the script for fifo pipes */
def changeScript(file: File, fifos: List[File]): Unit = {
val reader = Source.fromFile(file)
val lines = reader.getLines().toList
reader.close()
val writer = new PrintWriter(file)
lines.foreach(writer.println)
writer.println(BiopetFifoPipe.waitScript)
writer.println(fifos.map("rm " + _).mkString(" \n"))
writer.println(BiopetFifoPipe.endScript)
writer.close()
if (Logging.logger.isDebugEnabled) {
val reader = Source.fromFile(file)
Logging.logger.debug(s"Content of script $file:\n" + reader.getLines().mkString("\n"))
reader.close()
}
}
val waitScript: String =
"""
|
|allJobs=`jobs -p`
......@@ -156,7 +181,7 @@ object BiopetFifoPipe {
|
""".stripMargin
val endScript =
val endScript: String =
"""
|
|if [ "$FAIL" == "0" ];
......
......@@ -127,7 +127,7 @@ trait BiopetQScript extends Configurable with GatkLogging { qscript: QScript =>
logger.info("Checking input files")
inputFiles.par.foreach { i =>
if (!i.file.exists()) Logging.addError(s"Input file does not exist: ${i.file}")
if (!i.file.canRead) Logging.addError(s"Input file can not be read: ${i.file}")
else if (!i.file.canRead) Logging.addError(s"Input file can not be read: ${i.file}")
if (!i.file.isAbsolute) Logging.addError(s"Input file should be an absolute path: ${i.file}")
}
......
......@@ -12,10 +12,11 @@
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.shiva
package nl.lumc.sasc.biopet.core.extensions
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.function.InProcessFunction
import org.broadinstitute.gatk.utils.commandline.Input
......@@ -26,20 +27,32 @@ import scala.io.Source
*
* Created by pjvanthof on 16/08/15.
*/
class CheckValidateVcf extends InProcessFunction {
class CheckValidateVcf(val parent: Configurable) extends InProcessFunction with Configurable {
@Input(required = true)
var inputLogFile: File = _
val abortOnError: Boolean = config("abort_on_error", default = true)
var species: String = ""
var genomeName: String = ""
/** Exits whenever the input md5sum is not the same as the output md5sum */
def run: Unit = {
def run(): Unit = {
val reader = Source.fromFile(inputLogFile)
reader.getLines().foreach { line =>
if (line.startsWith("ERROR")) {
logger.error("Corrupt vcf file found, aborting pipeline")
if (abortOnError) {
logger.error("Corrupt vcf file found, aborting pipeline")
// 130 Simulates a ctr-C
Runtime.getRuntime.halt(130)
// 130 Simulates a ctr-C
Runtime.getRuntime.halt(130)
} else {
logger.warn(
s"Corrupt vcf file found for $species-$genomeName, for details see $inputLogFile")
}
}
}
reader.close()
......
......@@ -19,6 +19,7 @@ import java.io.{File, FileOutputStream}
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
import nl.lumc.sasc.biopet.utils.Logging
import org.broadinstitute.gatk.utils.commandline.Input
import scala.collection.mutable
trait PythonCommandLineFunction extends BiopetCommandLineFunction {
@Input(doc = "Python script", required = false)
......@@ -34,10 +35,13 @@ trait PythonCommandLineFunction extends BiopetCommandLineFunction {
*/
def setPythonScript(script: String) {
pythonScript = new File(script).getAbsoluteFile
if (!pythonScript.exists()) {
if (!PythonCommandLineFunction.alreadyCopied.contains((this.getClass, script))) {
setPythonScript(script, "")
this.getClass
PythonCommandLineFunction.alreadyCopied += (this.getClass, script) -> pythonScript
} else {
pythonScriptName = script
pythonScript = PythonCommandLineFunction.alreadyCopied((this.getClass, script))
}
}
......@@ -48,14 +52,18 @@ trait PythonCommandLineFunction extends BiopetCommandLineFunction {
*/
def setPythonScript(script: String, subpackage: String) {
pythonScriptName = script
pythonScript = new File(".queue/tmp/" + subpackage + pythonScriptName).getAbsoluteFile
if (!pythonScript.getParentFile.exists) pythonScript.getParentFile.mkdirs
val is = getClass.getResourceAsStream(subpackage + pythonScriptName)
if (is != null) {
val os = new FileOutputStream(pythonScript)
org.apache.commons.io.IOUtils.copy(is, os)
os.close()
} else Logging.addError(s"Python script not found: $pythonScriptName")
if (new File(script).isAbsolute && new File(script).exists()) {
pythonScript = new File(script)
} else {
pythonScript = new File(".queue/tmp/" + subpackage + pythonScriptName).getAbsoluteFile
if (!pythonScript.getParentFile.exists) pythonScript.getParentFile.mkdirs
val is = getClass.getResourceAsStream(subpackage + pythonScriptName)
if (is != null) {
val os = new FileOutputStream(pythonScript)
org.apache.commons.io.IOUtils.copy(is, os)
os.close()
} else Logging.addError(s"Python script not found: $pythonScriptName")
}
}
/** return basic command to prefix the complete command with */
......@@ -63,3 +71,7 @@ trait PythonCommandLineFunction extends BiopetCommandLineFunction {
required(executable) + required(pythonScript)
}
}
object PythonCommandLineFunction {
private val alreadyCopied: mutable.Map[(Class[_], String), File] = mutable.Map()
}
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.tools
import java.io.File
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.function.InProcessFunction
import org.broadinstitute.gatk.utils.commandline.Input
import scala.io.Source
/**
* This class checks results of [[nl.lumc.sasc.biopet.tools.ValidateVcf]] and aborts the pipeline when a error was been found
*
* Created by pjvanthof on 16/08/15.
*/
class CheckValidateAnnotation(val parent: Configurable)
extends InProcessFunction
with Configurable {
@Input(required = true)
var inputLogFile: File = _
val abortOnError: Boolean = config("abort_on_error", default = true)
var species: String = ""
var genomeName: String = ""
/** Exits whenever the input md5sum is not the same as the output md5sum */
def run: Unit = {
val reader = Source.fromFile(inputLogFile)
reader.getLines().foreach { line =>
if (line.startsWith("ERROR")) {
// 130 Simulates a ctr-C
if (abortOnError) {
logger.error("Corrupt annotations files found, aborting pipeline")
Runtime.getRuntime.halt(130)
} else {
logger.warn(s"Corrupt annotations files found for $species-$genomeName")
logger.warn(
"**** You enabled a unsafe method by letting the pipeline continue with incorrect annotations files ****")
}
}
}
reader.close()
}
}
......@@ -20,9 +20,6 @@ import nl.lumc.sasc.biopet.core.ToolCommandFunction
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{Input, Output}
/**
* @deprecated Use picard.util.BedToIntervalList instead
*/
class DownloadNcbiAssembly(val parent: Configurable) extends ToolCommandFunction {
def toolObject = nl.lumc.sasc.biopet.tools.DownloadNcbiAssembly
......
......@@ -74,7 +74,7 @@ class FastqSync(val parent: Configurable) extends ToolCommandFunction with Summa
val regex = new Regex(
"""Filtered (\d*) reads from first read file.
|Filtered (\d*) reads from second read file.
|Synced read files contain (\d*) reads.""".stripMargin,
|Synced files contain (\d*) reads.""".stripMargin,
"R1",
"R2",
"RL"
......
......@@ -18,8 +18,6 @@ import java.io.File
import htsjdk.samtools.SamReaderFactory
import nl.lumc.sasc.biopet.core.{Reference, ToolCommandFunction}
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsMpileup
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{Input, Output}
......@@ -54,7 +52,6 @@ class MpileupToVcf(val parent: Configurable) extends ToolCommandFunction with Re
super.beforeGraph()
if (reference == null) reference = referenceFasta().getAbsolutePath
if (output.getName.endsWith(".vcf.gz")) outputIndex = new File(output.getAbsolutePath + ".tbi")
val samtoolsMpileup = new SamtoolsMpileup(this)
}
override def beforeCmd(): Unit = {
......
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.tools
import java.io.File
import nl.lumc.sasc.biopet.core.ToolCommandFunction
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{Input, Output}
class NcbiReportToContigMap(val parent: Configurable) extends ToolCommandFunction {
def toolObject = nl.lumc.sasc.biopet.tools.NcbiReportToContigMap
@Output(doc = "Output fasta file", required = true)
var contigMap: File = _
var outputReport: File = _
@Input(required = true)
var assemblyReport: File = _
var nameHeader: String = _
override def defaultCoreMemory = 4.0
override def cmdLine =
super.cmdLine +
required("-a", assemblyReport) +
required("--report", outputReport) +
required("-o", contigMap) +
required("--nameHeader", nameHeader)
}
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.extensions.tools
import java.io.File
import nl.lumc.sasc.biopet.core.{Reference, ToolCommandFunction}
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.Input
class ValidateAnnotation(val parent: Configurable) extends ToolCommandFunction with Reference {
def toolObject = nl.lumc.sasc.biopet.tools.ValidateAnnotation
@Input(required = false)
var refflatFile: Option[File] = _
@Input(required = false)
var gtfFile: List[File] = Nil
@Input(required = true)
var reference: File = _
var disableFail: Boolean = false
override def defaultCoreMemory = 16.0
override def beforeGraph(): Unit = {
super.beforeGraph()
if (reference == null) reference = referenceFasta()
}
override def cmdLine =
super.cmdLine +
optional("-r", refflatFile) +
repeat("-g", gtfFile) +
required("-R", reference) +
conditional(disableFail, "--disableFail")
}
......@@ -16,14 +16,12 @@ package nl.lumc.sasc.biopet.extensions.tools
import java.io.File
import nl.lumc.sasc.biopet.core.summary.{Summarizable}
import nl.lumc.sasc.biopet.core.summary.Summarizable
import nl.lumc.sasc.biopet.core.{Reference, ToolCommandFunction}
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.utils.{ConfigUtils}
import nl.lumc.sasc.biopet.utils.ConfigUtils
import org.broadinstitute.gatk.utils.commandline.{Input, Output}
import scala.io.Source
/**
* This tool will generate statistics from a vcf file
*
......@@ -41,10 +39,10 @@ class VcfStats(val parent: Configurable)
var input: File = _
@Input
protected var index: File = null
protected var index: File = _
@Output
protected var statsFile: File = null
protected var statsFile: File = _
override def defaultCoreMemory = 3.0
override def defaultThreads = 3
......@@ -59,6 +57,8 @@ class VcfStats(val parent: Configurable)
var intervals: Option[File] = None
override def beforeGraph(): Unit = {
super.beforeGraph()
if (intervals.isEmpty) intervals = config("intervals")
reference = referenceFasta()
index = new File(input.getAbsolutePath + ".tbi")
}
......@@ -71,7 +71,7 @@ class VcfStats(val parent: Configurable)
}
/** Creates command to execute extension */
override def cmdLine =
override def cmdLine: String =
super.cmdLine +
required("-I", input) +
required("-o", outputDir) +
......
......@@ -20,9 +20,10 @@ import nl.lumc.sasc.biopet.core.Version
import nl.lumc.sasc.biopet.core.extensions.RscriptCommandLineFunction
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline._