Commit cb7a051c authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Merge branch 'release-0.7.0' into 'master'

Release 0.7.0



See merge request !449
parents 23c1ccea bf548a42
<!--
Biopet is built on top of GATK Queue for building bioinformatic
pipelines. It is mainly intended to support LUMC SHARK cluster which is running
SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
should also be able to execute Biopet tools and pipelines.
Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
Contact us at: sasc@lumc.nl
A dual licensing mode is applied. The source code within this project that are
not part of GATK Queue is freely available for non-commercial use under an AGPL
license; For commercial users or users who do not want to follow the AGPL
license, please contact us to obtain a separate license.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>Kopisu</artifactId>
<packaging>jar</packaging>
<parent>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Biopet</artifactId>
<version>0.6.0</version>
<relativePath>../</relativePath>
</parent>
<inceptionYear>2015</inceptionYear>
<name>Kopisu</name>
<dependencies>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>BiopetCore</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>BiopetExtensions</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
</project>
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.kopisu
import java.io.File
import nl.lumc.sasc.biopet.utils.config._
import nl.lumc.sasc.biopet.core.{ PipelineCommand, _ }
import nl.lumc.sasc.biopet.extensions.Ln
import nl.lumc.sasc.biopet.extensions.conifer.{ ConiferAnalyze, ConiferCall, ConiferRPKM }
import org.broadinstitute.gatk.queue.QScript
class ConiferPipeline(val root: Configurable) extends QScript with BiopetQScript {
//*
// Kopisu - Coniferpipeline is a pipeline that can run standalone
// */
def this() = this(null)
/** Input bamfile */
@Input(doc = "Bamfile to start from", fullName = "bam", shortName = "bam", required = true)
var inputBam: File = _
@Argument(doc = "Label this sample with a name/ID [0-9a-zA-Z] and [-_]",
fullName = "label",
shortName = "label", required = false)
var sampleLabel: String = _
/** Exon definitions in bed format */
@Input(doc = "Exon definition file in bed format", fullName = "exon_bed", shortName = "bed", required = false)
var probeFile: File = config("probeFile")
@Input(doc = "Previous RPKM files (controls)", fullName = "rpkm_controls", shortName = "rc", required = false)
var controlsDir: File = config("controlsDir")
@Argument(doc = "Enable RPKM only mode, generate files for reference db", shortName = "rpkmonly", required = false)
var RPKMonly: Boolean = false
val summary = new ConiferSummary(this)
def init() {
}
def input2RPKM(inputBam: File): String = {
if (!sampleLabel.isEmpty) sampleLabel ++ ".txt"
else swapExt(inputBam.getName, ".bam", ".txt")
}
def input2HDF5(inputBam: File): String = {
if (!sampleLabel.isEmpty) sampleLabel ++ ".hdf5"
else swapExt(inputBam.getName, ".bam", ".hdf5")
}
def input2Calls(inputBam: File): String = {
if (!sampleLabel.isEmpty) sampleLabel ++ ".calls.txt"
else swapExt(inputBam.getName, ".bam", "calls.txt")
}
def biopetScript(): Unit = {
/** Setup RPKM directory */
val sampleDir: String = outputDir
val RPKMdir: File = new File(sampleDir + File.separator + "RPKM" + File.separator)
RPKMdir.mkdir()
val coniferRPKM = new ConiferRPKM(this)
coniferRPKM.bamFile = this.inputBam.getAbsoluteFile
coniferRPKM.probes = this.probeFile
coniferRPKM.output = new File(RPKMdir, input2RPKM(inputBam))
add(coniferRPKM)
if (!RPKMonly) {
/** Collect the rpkm_output to a temp directory, where we merge with the control files */
var refRPKMlist: List[File] = Nil
// Sync the .txt only, these files contain the RPKM Values
for (controlRPKMfile <- controlsDir.list.filter(_.toLowerCase.endsWith(".txt"))) {
val target = new File(RPKMdir, controlRPKMfile)
val source = new File(controlsDir, controlRPKMfile)
if (!target.exists) {
add(Ln(this, source, target, relative = false))
refRPKMlist :+= target
} else if (!target.equals(source)) {
target.delete()
add(Ln(this, source, target, relative = false))
refRPKMlist :+= target
}
}
val coniferAnalyze = new ConiferAnalyze(this)
coniferAnalyze.deps = List(coniferRPKM.output) ++ refRPKMlist
coniferAnalyze.probes = this.probeFile
coniferAnalyze.rpkmDir = RPKMdir
coniferAnalyze.output = new File(sampleDir, input2HDF5(inputBam))
add(coniferAnalyze)
val coniferCall = new ConiferCall(this)
coniferCall.input = coniferAnalyze.output
coniferCall.output = new File(sampleDir, "calls.txt")
add(coniferCall)
summary.deps = List(coniferCall.output)
summary.label = sampleLabel
summary.calls = coniferCall.output
summary.out = new File(sampleDir, input2Calls(inputBam))
add(summary)
}
}
}
object ConiferPipeline extends PipelineCommand
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.kopisu
import java.io.{ BufferedWriter, File, FileWriter }
import argonaut._
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.function.InProcessFunction
import org.broadinstitute.gatk.utils.commandline.{ Input, Output }
import scala.io.Source
class ConiferSummary(val root: Configurable) extends InProcessFunction with Configurable {
def filterCalls(callFile: File, outFile: File, sampleName: String): Unit = {
// val filename = callFile.getAbsolutePath
val writer = new BufferedWriter(new FileWriter(outFile))
for (line <- Source.fromFile(callFile).getLines()) {
line.startsWith(sampleName) || line.startsWith("sampleID") match {
case true => writer.write(line + "\n");
case _ =>
}
}
writer.close()
}
this.analysisName = getClass.getSimpleName
@Input(doc = "deps")
var deps: List[File] = Nil
@Output(doc = "Summary output", required = true)
var out: File = _
@Input(doc = "calls")
var calls: File = _
var label: String = _
var coniferPipeline: ConiferPipeline = root match {
case pipeline: ConiferPipeline => pipeline
case _ =>
throw new IllegalStateException("Root is no instance of ConiferPipeline")
}
var resources: Map[String, Json] = Map()
override def run() {
logger.debug("Start")
filterCalls(calls, out, label)
logger.debug("Stop")
}
}
#
# Biopet is built on top of GATK Queue for building bioinformatic
# pipelines. It is mainly intended to support LUMC SHARK cluster which is running
# SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
# should also be able to execute Biopet tools and pipelines.
#
# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
#
# Contact us at: sasc@lumc.nl
#
# A dual licensing mode is applied. The source code within this project that are
# not part of GATK Queue is freely available for non-commercial use under an AGPL
# license; For commercial users or users who do not want to follow the AGPL
# license, please contact us to obtain a separate license.
#
# Set root logger level to DEBUG and its only appender to A1.
log4j.rootLogger=ERROR, A1
# A1 is set to be a ConsoleAppender.
log4j.appender.A1=org.apache.log4j.ConsoleAppender
# A1 uses PatternLayout.
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n
\ No newline at end of file
/target/
\ No newline at end of file
#
# Biopet is built on top of GATK Queue for building bioinformatic
# pipelines. It is mainly intended to support LUMC SHARK cluster which is running
# SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
# should also be able to execute Biopet tools and pipelines.
#
# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
#
# Contact us at: sasc@lumc.nl
#
# A dual licensing mode is applied. The source code within this project that are
# not part of GATK Queue is freely available for non-commercial use under an AGPL
# license; For commercial users or users who do not want to follow the AGPL
# license, please contact us to obtain a separate license.
#
# Set root logger level to DEBUG and its only appender to A1.
log4j.rootLogger=ERROR, A1
# A1 is set to be a ConsoleAppender.
log4j.appender.A1=org.apache.log4j.ConsoleAppender
# A1 uses PatternLayout.
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n
\ No newline at end of file
/target/
\ No newline at end of file
#
# Biopet is built on top of GATK Queue for building bioinformatic
# pipelines. It is mainly intended to support LUMC SHARK cluster which is running
# SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
# should also be able to execute Biopet tools and pipelines.
#
# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
#
# Contact us at: sasc@lumc.nl
#
# A dual licensing mode is applied. The source code within this project that are
# not part of GATK Queue is freely available for non-commercial use under an AGPL
# license; For commercial users or users who do not want to follow the AGPL
# license, please contact us to obtain a separate license.
#
# Set root logger level to DEBUG and its only appender to A1.
log4j.rootLogger=ERROR, A1
# A1 is set to be a ConsoleAppender.
log4j.appender.A1=org.apache.log4j.ConsoleAppender
# A1 uses PatternLayout.
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n
\ No newline at end of file
#
# Biopet is built on top of GATK Queue for building bioinformatic
# pipelines. It is mainly intended to support LUMC SHARK cluster which is running
# SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
# should also be able to execute Biopet tools and pipelines.
#
# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
#
# Contact us at: sasc@lumc.nl
#
# A dual licensing mode is applied. The source code within this project that are
# not part of GATK Queue is freely available for non-commercial use under an AGPL
# license; For commercial users or users who do not want to follow the AGPL
# license, please contact us to obtain a separate license.
#
# Set root logger level to DEBUG and its only appender to A1.
log4j.rootLogger=ERROR, A1
# A1 is set to be a ConsoleAppender.
log4j.appender.A1=org.apache.log4j.ConsoleAppender
# A1 uses PatternLayout.
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n
\ No newline at end of file
#
# Biopet is built on top of GATK Queue for building bioinformatic
# pipelines. It is mainly intended to support LUMC SHARK cluster which is running
# SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
# should also be able to execute Biopet tools and pipelines.
#
# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
#
# Contact us at: sasc@lumc.nl
#
# A dual licensing mode is applied. The source code within this project that are
# not part of GATK Queue is freely available for non-commercial use under an AGPL
# license; For commercial users or users who do not want to follow the AGPL
# license, please contact us to obtain a separate license.
#
# Set root logger level to DEBUG and its only appender to A1.
log4j.rootLogger=ERROR, A1
# A1 is set to be a ConsoleAppender.
log4j.appender.A1=org.apache.log4j.ConsoleAppender
# A1 uses PatternLayout.
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n
\ No newline at end of file
/target/
\ No newline at end of file
<!--
Biopet is built on top of GATK Queue for building bioinformatic
pipelines. It is mainly intended to support LUMC SHARK cluster which is running
SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
should also be able to execute Biopet tools and pipelines.
Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
Contact us at: sasc@lumc.nl
A dual licensing mode is applied. The source code within this project that are
not part of GATK Queue is freely available for non-commercial use under an AGPL
license; For commercial users or users who do not want to follow the AGPL
license, please contact us to obtain a separate license.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>Toucan</artifactId>
<packaging>jar</packaging>
<parent>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Biopet</artifactId>
<version>0.6.0</version>
<relativePath>../</relativePath>
</parent>
<inceptionYear>2014</inceptionYear>
<name>Toucan</name>
<dependencies>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>BiopetCore</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>BiopetToolsExtensions</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
</project>
......@@ -9,8 +9,7 @@
Contact us at: sasc@lumc.nl
A dual licensing mode is applied. The source code within this project that are
not part of GATK Queue is freely available for non-commercial use under an AGPL
A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
license; For commercial users or users who do not want to follow the AGPL
license, please contact us to obtain a separate license.
......@@ -25,7 +24,7 @@
<parent>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Biopet</artifactId>
<version>0.6.0</version>
<version>0.7.0</version>
<relativePath>../</relativePath>
</parent>
......
......@@ -8,8 +8,7 @@
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
......
#
# Biopet is built on top of GATK Queue for building bioinformatic
# pipelines. It is mainly intended to support LUMC SHARK cluster which is running
# SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
# should also be able to execute Biopet tools and pipelines.
#
# Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
#
# Contact us at: sasc@lumc.nl
#
# A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
# license; For commercial users or users who do not want to follow the AGPL
# license, please contact us to obtain a separate license.
#
# Set root logger level to DEBUG and its only appender to A1.
log4j.rootLogger=ERROR, A1
# A1 is set to be a ConsoleAppender.
log4j.appender.A1=org.apache.log4j.ConsoleAppender
# A1 uses PatternLayout.
log4j.appender.A1.layout=org.apache.log4j.PatternLayout
log4j.appender.A1.layout.ConversionPattern=%-5p [%d] [%C{1}] - %m%n
\ No newline at end of file
......@@ -10,8 +10,7 @@
Contact us at: sasc@lumc.nl
A dual licensing mode is applied. The source code within this project that are
not part of GATK Queue is freely available for non-commercial use under an AGPL
A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
license; For commercial users or users who do not want to follow the AGPL
license, please contact us to obtain a separate license.
......@@ -22,7 +21,7 @@
<parent>
<artifactId>Biopet</artifactId>
<groupId>nl.lumc.sasc</groupId>
<version>0.6.0</version>
<version>0.7.0</version>
</parent>
<modelVersion>4.0.0</modelVersion>
......@@ -40,6 +39,11 @@
<artifactId>Mapping</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Kopisu</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Toucan</artifactId>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment