Skip to content
Snippets Groups Projects
Commit d46ad182 authored by bow's avatar bow
Browse files

Merge branch 'feature-Chipcap' into 'develop'

Feature chipcap

CArp pipeline can be merged into develop!

See merge request !77
parents c874be08 51da5c65
No related branches found
No related tags found
No related merge requests found
Showing with 299 additions and 4 deletions
......@@ -17,7 +17,7 @@ package nl.lumc.sasc.biopet.core
import java.io.File
import java.io.PrintWriter
import nl.lumc.sasc.biopet.core.config.{ Config, Configurable }
import nl.lumc.sasc.biopet.core.config.{ ConfigValueIndex, Config, Configurable }
import org.broadinstitute.gatk.utils.commandline.Argument
import org.broadinstitute.gatk.queue.QSettings
import org.broadinstitute.gatk.queue.function.QFunction
......@@ -29,7 +29,14 @@ trait BiopetQScript extends Configurable with GatkLogging {
@Argument(doc = "JSON config file(s)", fullName = "config_file", shortName = "config", required = false)
val configfiles: List[File] = Nil
var outputDir: String = _
var outputDir: String = {
val temp = Config.getValueFromMap(Config.global.map, ConfigValueIndex(this.configName, configPath, "output_dir"))
if (temp.isEmpty) throw new IllegalArgumentException("No output_dir defined in config")
else {
val t = temp.get.value.toString
if (!t.endsWith("/")) t + "/" else t
}
}
@Argument(doc = "Disable all scatters", shortName = "DSC", required = false)
var disableScatter: Boolean = false
......
......@@ -108,7 +108,7 @@ trait MultiSampleQScript extends BiopetQScript {
def createFile(suffix: String) = new File(sampleDir, sampleId + suffix)
/** Returns sample directory */
def sampleDir = outputDir + "samples" + File.pathSeparator + sampleId + File.pathSeparator
def sampleDir = outputDir + "samples" + File.separator + sampleId + File.separator
}
/** Sample type, need implementation in pipeline */
......
package nl.lumc.sasc.biopet.extensions.macs2
import nl.lumc.sasc.biopet.core.BiopetCommandLineFunction
/**
* Created by sajvanderzeeuw on 12/19/14.
*/
abstract class Macs2 extends BiopetCommandLineFunction {
executable = config("exe", default = "macs2", submodule = "macs2", freeVar = false)
override def versionCommand = executable + " --version"
override val versionRegex = """macs2 (.*)""".r
override val versionExitcode = List(0, 1)
}
package nl.lumc.sasc.biopet.extensions.macs2
import java.io.File
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
class Macs2CallPeak(val root: Configurable) extends Macs2 {
@Input(doc = "Treatment input", required = true)
var treatment: File = _
@Input(doc = "Control input", required = false)
var control: File = _
@Output(doc = "Output file NARROWPEAKS")
private var output_narrow: File = _
@Output(doc = "Output file BROADPEAKS")
private var output_broad: File = _
@Output(doc = "Output in Excel format")
private var output_xls: File = _
@Output(doc = "R script with Bimodal model")
private var output_r: File = _
@Output(doc = "Output file Bedgraph")
private var output_bdg: File = _
@Output(doc = "Output file gappedPeak")
private var output_gapped: File = _
var fileformat: String = config("fileformat")
var gsize: Option[Float] = config("gsize")
var keepdup: Boolean = config("keep-dup", default = false)
var buffersize: Option[Int] = config("buffer-size")
var outputdir: String = config("outputDir")
var name: String = config("name")
var bdg: Boolean = config("bdg", default = false)
var verbose: Boolean = config("verbose", default = false)
var tsize: Option[Int] = config("tsize")
var bandwith: Option[Int] = config("bandwith")
var mfold: Option[Float] = config("mfold")
var fixbimodel: Boolean = config("fixbimodel", default = false)
var nomodel: Boolean = config("nomodel", default = false)
var shift: Option[Int] = config("shift")
var qvalue: Option[Float] = config("qvalue")
var pvalue: Option[Float] = config("pvalue")
var tolarge: Boolean = config("tolarge", default = false)
var downsample: Boolean = config("downsample", default = false)
var nolambda: Boolean = config("nolambda", default = false)
var slocal: Option[Int] = config("slocal")
var llocal: Option[Int] = config("llocal")
var broad: Boolean = config("broad", default = false)
var broadcutoff: Option[Int] = config("broadcutoff")
var callsummits: Boolean = config("callsummits", default = false)
override def afterGraph: Unit = {
output_narrow = new File(outputdir + name + ".narrowPeak")
output_broad = new File(outputdir + name + ".broadPeak")
output_xls = new File(outputdir + name + ".xls")
output_bdg = new File(outputdir + name + ".bdg")
output_r = new File(outputdir + name + ".r")
output_gapped = new File(outputdir + name + ".gappedPeak")
}
def cmdLine = {
required(executable) + required("callpeak") +
required("--treatment", treatment) + /* Treatment sample */
optional("--control", control) + /* Control sample */
optional("-f", fileformat) + /* Input file format */
required("-g", gsize) + /* Estimated genome size.(format: 2.7e9) (presets: hs, mm, ce, dm) */
conditional(keepdup, "--keep-dup") + /* Whether to keep duplicates */
optional("--buffer-size", buffersize) + /* Buffer size */
required("--outdir", outputdir) + /* The output directory */
optional("--name", name) + /* prefix name of the output files. (note that also the peak names inside the files will have this name */
conditional(bdg, "-B") + /* Whether to output in BDG format */
conditional(verbose, "--verbose") + /* Whether to output verbosely */
optional("--tsize", tsize) + /* Sets custom tag length, if not specified macs will use first 10 sequences to estimate the size */
optional("--bw", bandwith) + /* The bandwith to use for model building. Set this parameter as the sonication fragment size estimated in the wetlab */
optional("--mfold", mfold) + /* The parameter to select regions within the model fold. Must be a upper and lower limit. */
conditional(fixbimodel, "--fix-bimodal") + /* Whether turn on the auto paired-peak model process. If it's set, when MACS failed to build paired model, it will use the nomodel settings, the '--extsize' parameter to extend each tags. If set, MACS will be terminated if paried-peak model is failed. */
conditional(nomodel, "--nomodel") + /* While on, MACS will bypass building the shifting model */
optional("--shift", shift) + /* You can set an arbitrary shift in basepairs here */
optional("--extsize", shift) + /* While '--nomodel' is set, MACS uses this parameter to extend reads in 5'->3' direction to fix-sized fragments. For example, if the size of binding region for your transcription factor is 200 bp, and you want to bypass the model building by MACS, this parameter can be set as 200. This option is only valid when --nomodel is set or when MACS fails to build model and --fix-bimodal is on. */
optional("--qvalue", qvalue) + /* the Q-value(FDR) cutoff */
optional("--pvalue", pvalue) + /* The P-value cutoff, if --pvalue is set no Qvalue is calculated */
conditional(tolarge, "--to-large") + /* Whether to scale up the smallest input file to the larger one */
conditional(downsample, "--down-sample") + /* This is the reversed from --to-large */
conditional(nolambda, "--nolambda") + /* With this flag on, MACS will use the background lambda as local lambda. This means MACS will not consider the local bias at peak candidate regions.*/
optional("--slocal", slocal) + /* These two parameters control which two levels of regions will be checked around the peak regions to calculate the maximum lambda as local lambda */
optional("--llocal", llocal) +
conditional(broad, "--broad") + /* whether to enable broad peak calling */
optional("--broad-cutoff", broadcutoff) + /* Cutoff for broad region. This option is not available unless --broad is set. If -p is set, this is a pvalue cutoff, otherwise, it's a qvalue cutoff. */
conditional(callsummits, "--call-summits") /* MACS will now reanalyze the shape of signal profile (p or q-score depending on cutoff setting) to deconvolve subpeaks within each peak called from general procedure. */
}
}
......@@ -80,6 +80,11 @@
<artifactId>Kopisu</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Carp</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
<build>
<plugins>
......
......@@ -23,7 +23,8 @@ object BiopetExecutablePublic extends BiopetExecutable {
nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics,
nl.lumc.sasc.biopet.pipelines.yamsvp.Yamsvp,
nl.lumc.sasc.biopet.pipelines.sage.Sage,
nl.lumc.sasc.biopet.pipelines.kopisu.ConiferPipeline
nl.lumc.sasc.biopet.pipelines.kopisu.ConiferPipeline,
nl.lumc.sasc.biopet.pipelines.carp.Carp
)
def tools: List[MainCommand] = List(
......
/target/
\ No newline at end of file
<!--
Biopet is built on top of GATK Queue for building bioinformatic
pipelines. It is mainly intended to support LUMC SHARK cluster which is running
SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
should also be able to execute Biopet tools and pipelines.
Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
Contact us at: sasc@lumc.nl
A dual licensing mode is applied. The source code within this project that are
not part of GATK Queue is freely available for non-commercial use under an AGPL
license; For commercial users or users who do not want to follow the AGPL
license, please contact us to obtain a separate license.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Carp</artifactId>
<packaging>jar</packaging>
<parent>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Biopet</artifactId>
<version>0.3.0-DEV</version>
<relativePath>../</relativePath>
</parent>
<inceptionYear>2014</inceptionYear>
<name>Carp</name>
<dependencies>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>BiopetFramework</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>nl.lumc.sasc</groupId>
<artifactId>Mapping</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
</project>
/**
* Biopet is built on top of GATK Queue for building bioinformatic
* pipelines. It is mainly intended to support LUMC SHARK cluster which is running
* SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
* should also be able to execute Biopet tools and pipelines.
*
* Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
*
* Contact us at: sasc@lumc.nl
*
* A dual licensing mode is applied. The source code within this project that are
* not part of GATK Queue is freely available for non-commercial use under an AGPL
* license; For commercial users or users who do not want to follow the AGPL
* license, please contact us to obtain a separate license.
*/
package nl.lumc.sasc.biopet.pipelines.carp
import java.io.File
import nl.lumc.sasc.biopet.extensions.Ln
import nl.lumc.sasc.biopet.extensions.macs2.Macs2CallPeak
import nl.lumc.sasc.biopet.extensions.picard.MergeSamFiles
import nl.lumc.sasc.biopet.utils.ConfigUtils
import org.broadinstitute.gatk.queue.QScript
import org.broadinstitute.gatk.utils.commandline.{ Argument, Input }
import org.broadinstitute.gatk.utils.commandline.{ Input, Argument }
import nl.lumc.sasc.biopet.core._
import nl.lumc.sasc.biopet.core.config._
import nl.lumc.sasc.biopet.pipelines.mapping.Mapping
/**
* Carp pipeline
* Chip-Seq analysis pipeline
* This pipeline performs QC,mapping and peak calling
*/
class Carp(val root: Configurable) extends QScript with MultiSampleQScript {
qscript =>
def this() = this(null)
override def defaults = ConfigUtils.mergeMaps(Map(
"mapping" -> Map("skip_markduplicates" -> true)
), super.defaults)
def makeSample(id: String) = new Sample(id)
class Sample(sampleId: String) extends AbstractSample(sampleId) {
def makeLibrary(id: String) = new Library(id)
class Library(libraryId: String) extends AbstractLibrary(libraryId) {
val mapping = new Mapping(qscript)
def addJobs(): Unit = {
if (config.contains("R1")) {
mapping.input_R1 = config("R1")
if (config.contains("R2")) mapping.input_R2 = config("R2")
mapping.libraryId = libraryId
mapping.sampleId = sampleId
mapping.outputDir = libDir
mapping.init
mapping.biopetScript
addAll(mapping.functions)
} else logger.error("Sample: " + sampleId + ": No R1 found for library: " + libraryId)
}
}
val bamFile = createFile(".bam")
val controls: List[String] = config("control", default = Nil)
def addJobs(): Unit = {
addLibsJobs()
val bamFiles = libraries.map(_._2.mapping.finalBamFile).toList
if (bamFiles.length == 1) {
add(Ln(qscript, bamFiles.head, bamFile))
val oldIndex = new File(bamFiles.head.getAbsolutePath.stripSuffix(".bam") + ".bai")
val newIndex = new File(bamFile.getAbsolutePath.stripSuffix(".bam") + ".bai")
add(Ln(qscript, oldIndex, newIndex))
} else if (bamFiles.length > 1) {
val merge = new MergeSamFiles(qscript)
merge.input = bamFiles
merge.sortOrder = "coordinate"
merge.output = bamFile
add(merge)
//TODO: Add BigWIg track
}
val macs2 = new Macs2CallPeak(qscript)
macs2.treatment = bamFile
macs2.name = sampleId
macs2.outputdir = sampleDir + "macs2/" + macs2.name + "/"
add(macs2)
}
}
def init() {
}
def biopetScript() {
// Define what the pipeline should do
// First step is QC, this will be done with Flexiprep
// Second step is mapping, this will be done with the Mapping pipeline
// Third step is calling peaks on the bam files produced with the mapping pipeline, this will be done with MACS2
logger.info("Starting CArP pipeline")
addSamplesJobs
for ((sampleId, sample) <- samples) {
for (controlId <- sample.controls) {
if (!samples.contains(controlId))
throw new IllegalStateException("For sample: " + sampleId + " this control: " + controlId + " does not exist")
val macs2 = new Macs2CallPeak(this)
macs2.treatment = sample.bamFile
macs2.control = samples(controlId).bamFile
macs2.name = sampleId + "_VS_" + controlId
macs2.outputdir = sample.sampleDir + "/" + "macs2/" + macs2.name + "/"
add(macs2)
}
}
}
}
object Carp extends PipelineCommand
......@@ -34,6 +34,7 @@
<module>sage</module>
<module>kopisu</module>
<module>yamsvp</module>
<module>carp</module>
</modules>
<properties>
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment