Skip to content
Snippets Groups Projects
Commit 801ddf8f authored by Peter van 't Hof's avatar Peter van 't Hof
Browse files

Switch Carp to multisample mapping

parent 4078fca2
No related branches found
No related tags found
No related merge requests found
......@@ -18,16 +18,13 @@ package nl.lumc.sasc.biopet.pipelines.carp
import java.io.File
import nl.lumc.sasc.biopet.core._
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsView
import nl.lumc.sasc.biopet.utils.config._
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.extensions.Ln
import nl.lumc.sasc.biopet.extensions.macs2.Macs2CallPeak
import nl.lumc.sasc.biopet.extensions.picard.{ BuildBamIndex, MergeSamFiles }
import nl.lumc.sasc.biopet.extensions.picard.BuildBamIndex
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsView
import nl.lumc.sasc.biopet.pipelines.bammetrics.BamMetrics
import nl.lumc.sasc.biopet.pipelines.bamtobigwig.Bam2Wig
import nl.lumc.sasc.biopet.pipelines.mapping.Mapping
import nl.lumc.sasc.biopet.utils.ConfigUtils
import nl.lumc.sasc.biopet.pipelines.mapping.MultisampleMappingTrait
import nl.lumc.sasc.biopet.utils.config._
import org.broadinstitute.gatk.queue.QScript
/**
......@@ -35,14 +32,15 @@ import org.broadinstitute.gatk.queue.QScript
* Chip-Seq analysis pipeline
* This pipeline performs QC,mapping and peak calling
*/
class Carp(val root: Configurable) extends QScript with MultiSampleQScript with SummaryQScript with Reference {
class Carp(val root: Configurable) extends QScript with MultisampleMappingTrait with Reference {
qscript =>
def this() = this(null)
override def defaults = Map(
"mapping" -> Map(
"skip_markduplicates" -> false,
"aligner" -> "bwa-mem"
"aligner" -> "bwa-mem",
"merge_strategy" -> "preprocessmergesam"
),
"samtoolsview" -> Map("q" -> 10)
)
......@@ -54,99 +52,41 @@ class Carp(val root: Configurable) extends QScript with MultiSampleQScript with
)
)
def summaryFile = new File(outputDir, "Carp.summary.json")
//TODO: Add summary
def summaryFiles = Map("reference" -> referenceFasta())
//TODO: Add summary
def summarySettings = Map("reference" -> referenceSummary)
def makeSample(id: String) = new Sample(id)
class Sample(sampleId: String) extends AbstractSample(sampleId) {
//TODO: Add summary
def summaryFiles: Map[String, File] = Map()
//TODO: Add summary
def summaryStats: Map[String, Any] = Map()
override def summaryFile = new File(outputDir, "Carp.summary.json")
def makeLibrary(id: String) = new Library(id)
class Library(libId: String) extends AbstractLibrary(libId) {
//TODO: Add summary
def summaryFiles: Map[String, File] = Map()
override def makeSample(id: String) = new Sample(id)
class Sample(sampleId: String) extends super.Sample(sampleId) {
//TODO: Add summary
def summaryStats: Map[String, Any] = Map()
val mapping = new Mapping(qscript)
mapping.libId = Some(libId)
mapping.sampleId = Some(sampleId)
mapping.outputDir = libDir
def addJobs(): Unit = {
if (config.contains("R1")) {
mapping.input_R1 = config("R1")
if (config.contains("R2")) mapping.input_R2 = config("R2")
inputFiles :+= new InputFile(mapping.input_R1, config("R1_md5"))
mapping.input_R2.foreach(inputFiles :+= new InputFile(_, config("R2_md5")))
mapping.init()
mapping.biopetScript()
addAll(mapping.functions)
} else logger.error("Sample: " + sampleId + ": No R1 found for library: " + libId)
addSummaryQScript(mapping)
}
}
override def preProcessBam = Some(createFile(".filter.bam"))
val bamFile = createFile(".bam")
val bamFileFilter = createFile(".filter.bam")
val controls: List[String] = config("control", default = Nil)
def addJobs(): Unit = {
addPerLibJobs()
val bamFiles = libraries.map(_._2.mapping.finalBamFile).toList
if (bamFiles.length == 1) {
add(Ln(qscript, bamFiles.head, bamFile))
val oldIndex = new File(bamFiles.head.getAbsolutePath.stripSuffix(".bam") + ".bai")
val newIndex = new File(bamFile.getAbsolutePath.stripSuffix(".bam") + ".bai")
add(Ln(qscript, oldIndex, newIndex))
} else if (bamFiles.length > 1) {
val merge = new MergeSamFiles(qscript)
merge.input = bamFiles
merge.sortOrder = "coordinate"
merge.output = bamFile
add(merge)
}
val bamMetrics = BamMetrics(qscript, bamFile, new File(sampleDir, "metrics"), sampleId = Some(sampleId))
addAll(bamMetrics.functions)
addSummaryQScript(bamMetrics)
val bamMetricsFilter = BamMetrics(qscript, bamFileFilter, new File(sampleDir, "metrics-filter"), sampleId = Some(sampleId))
addAll(bamMetricsFilter.functions)
bamMetricsFilter.summaryName = "bammetrics-filter"
addSummaryQScript(bamMetricsFilter)
override def addJobs(): Unit = {
super.addJobs()
addAll(Bam2Wig(qscript, bamFile).functions)
addAll(Bam2Wig(qscript, bamFileFilter).functions)
add(Bam2Wig(qscript, bamFile.get))
val samtoolsView = new SamtoolsView(qscript)
samtoolsView.input = bamFile
samtoolsView.output = bamFileFilter
samtoolsView.input = bamFile.get
samtoolsView.output = preProcessBam.get
samtoolsView.b = true
samtoolsView.h = true
add(samtoolsView)
val bamMetricsFilter = BamMetrics(qscript, preProcessBam.get, new File(sampleDir, "metrics-filter"), sampleId = Some(sampleId))
addAll(bamMetricsFilter.functions)
bamMetricsFilter.summaryName = "bammetrics-filter"
addSummaryQScript(bamMetricsFilter)
add(Bam2Wig(qscript, preProcessBam.get))
val buildBamIndex = new BuildBamIndex(qscript)
buildBamIndex.input = bamFileFilter
buildBamIndex.output = swapExt(bamFileFilter.getParent, bamFileFilter, ".bam", ".bai")
buildBamIndex.input = preProcessBam.get
buildBamIndex.output = swapExt(preProcessBam.get.getParentFile, preProcessBam.get, ".bam", ".bai")
add(buildBamIndex)
val macs2 = new Macs2CallPeak(qscript)
macs2.treatment = bamFileFilter
macs2.treatment = preProcessBam.get
macs2.name = Some(sampleId)
macs2.outputdir = sampleDir + File.separator + "macs2" + File.separator + sampleId + File.separator
add(macs2)
......@@ -160,32 +100,22 @@ class Carp(val root: Configurable) extends QScript with MultiSampleQScript with
Some(carp)
}
def init() = {
override def init() = {
super.init()
// ensure that no samples are called 'control' since that is our reserved keyword
require(!sampleIds.contains("control"),
"No sample should be named 'control' since it is a reserved for the Carp pipeline")
}
def biopetScript() {
// Define what the pipeline should do
// First step is QC, this will be done with Flexiprep
// Second step is mapping, this will be done with the Mapping pipeline
// Third step is calling peaks on the bam files produced with the mapping pipeline, this will be done with MACS2
logger.info("Starting CArP pipeline")
addSamplesJobs()
addSummaryJobs()
}
def addMultiSampleJobs(): Unit = {
override def addMultiSampleJobs(): Unit = {
super.addMultiSampleJobs()
for ((sampleId, sample) <- samples) {
for (controlId <- sample.controls) {
if (!samples.contains(controlId))
throw new IllegalStateException("For sample: " + sampleId + " this control: " + controlId + " does not exist")
val macs2 = new Macs2CallPeak(this)
macs2.treatment = sample.bamFileFilter
macs2.control = samples(controlId).bamFileFilter
macs2.treatment = sample.preProcessBam.get
macs2.control = samples(controlId).preProcessBam.get
macs2.name = Some(sampleId + "_VS_" + controlId)
macs2.outputdir = sample.sampleDir + File.separator + "macs2" + File.separator + macs2.name.get + File.separator
add(macs2)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment