From 7734e7d38d285219579c88bdd4798fdfe6cc1b4d Mon Sep 17 00:00:00 2001
From: Sander van der Zeeuw <s.a.j.van_der_zeeuw@lumc.nl>
Date: Thu, 29 Jan 2015 14:33:54 +0100
Subject: [PATCH] Added changes for new sampling handling

---
 .../sasc/biopet/pipelines/carp/Carp.scala     | 137 ++++++++----------
 1 file changed, 61 insertions(+), 76 deletions(-)

diff --git a/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala b/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala
index a69ec4d32..dfef64132 100644
--- a/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala
+++ b/public/carp/src/main/scala/nl/lumc/sasc/biopet/pipelines/carp/Carp.scala
@@ -15,13 +15,15 @@
  */
 package nl.lumc.sasc.biopet.pipelines.carp
 
+import java.io.File
+
 import nl.lumc.sasc.biopet.extensions.Ln
 import nl.lumc.sasc.biopet.extensions.macs2.Macs2CallPeak
 import nl.lumc.sasc.biopet.extensions.picard.MergeSamFiles
+import nl.lumc.sasc.biopet.utils.ConfigUtils
 import org.broadinstitute.gatk.queue.QScript
 import org.broadinstitute.gatk.utils.commandline.{ Argument, Input }
 import org.broadinstitute.gatk.utils.commandline.{ Input, Argument }
-import nl.lumc.sasc.biopet.extensions.aligners.{ Bwa, Star, Bowtie, Stampy }
 import nl.lumc.sasc.biopet.core._
 import nl.lumc.sasc.biopet.core.config._
 import nl.lumc.sasc.biopet.pipelines.mapping.Mapping
@@ -32,14 +34,60 @@ import nl.lumc.sasc.biopet.pipelines.mapping.Mapping
  * This pipeline performs QC,mapping and peak calling
  */
 class Carp(val root: Configurable) extends QScript with MultiSampleQScript {
+  qscript =>
   def this() = this(null)
 
-  class LibraryOutput extends AbstractLibraryOutput {
-    var mappedBamFile: File = _
-  }
+  override def defaults = ConfigUtils.mergeMaps(Map(
+    "mapping" -> Map("skip_markduplicates" -> true)
+  ), super.defaults)
+
+  def makeSample(id: String) = new Sample(id)
+  class Sample(sampleId: String) extends AbstractSample(sampleId) {
+    def makeLibrary(id: String) = new Library(id)
+    class Library(libraryId: String) extends AbstractLibrary(libraryId) {
+      val mapping = new Mapping(qscript)
+
+      def addJobs(): Unit = {
+        if (config.contains("R1")) {
+          mapping.input_R1 = config("R1")
+          if (config.contains("R2")) mapping.input_R2 = config("R2")
+          mapping.libraryId = libraryId
+          mapping.sampleId = sampleId
+          mapping.outputDir = libDir
+
+          mapping.init
+          mapping.biopetScript
+          addAll(mapping.functions)
+
+        } else logger.error("Sample: " + sampleId + ": No R1 found for library: " + libraryId)
+      }
+    }
+
+    val bamFile = new File(sampleDir + sampleId + ".bam")
+    val controls: List[String] = config("control", default = Nil)
+
+    def addJobs(): Unit = {
+      addLibsJobs()
+      val bamFiles = libraries.map(_._2.mapping.finalBamFile).toList
+      if (bamFiles.length == 1) {
+        add(Ln(qscript, bamFiles.head, bamFile))
+        val oldIndex = new File(bamFiles.head.getAbsolutePath.stripSuffix(".bam") + ".bai")
+        val newIndex = new File(bamFile.getAbsolutePath.stripSuffix(".bam") + ".bai")
+        add(Ln(qscript, oldIndex, newIndex))
+      } else if (bamFiles.length > 1) {
+        val merge = new MergeSamFiles(qscript)
+        merge.input = bamFiles
+        merge.sortOrder = "coordinate"
+        merge.output = bamFile
+        add(merge)
+      }
 
-  class SampleOutput extends AbstractSampleOutput {
-    var mappedBamFile: File = _
+      val macs2 = new Macs2CallPeak(qscript)
+      macs2.treatment = bamFile
+      macs2.name = sampleId
+      macs2.outputdir = sampleDir + "macs2/" + macs2.name + "/"
+      add(macs2)
+    }
   }
 
   def init() {
@@ -52,84 +100,21 @@ class Carp(val root: Configurable) extends QScript with MultiSampleQScript {
     // Third step is calling peaks on the bam files produced with the mapping pipeline, this will be done with MACS2
     logger.info("Starting CArP pipeline")
 
-    runSamplesJobs
-
-    for (sample <- getSamples) {
-      val controls: List[String] = config("control", sample = sample, default = Nil)
+    addSamplesJobs
 
-      for (control <- controls) {
-        if (!getSamples.exists(_ == control))
+    for ((sampleId, sample) <- samples) {
+      for (control <- sample.controls) {
+        if (!samples.exists(_ == control))
           throw new IllegalStateException("For sample: " + sample + " this control: " + control + " does not exist")
         val macs2 = new Macs2CallPeak(this)
-        macs2.treatment = samplesOutput(sample).mappedBamFile
-        macs2.control = samplesOutput(control).mappedBamFile
+        macs2.treatment = sample.bamFile
+        macs2.control = samples(control).bamFile
         macs2.name = sample + "_VS_" + control
-        macs2.outputdir = globalSampleDir + sample + "/" + "macs2/" + macs2.name + "/"
+        macs2.outputdir = sample.sampleDir + "/" + "macs2/" + macs2.name + "/"
         add(macs2)
       }
     }
   }
-
-  def runSingleSampleJobs(sampleConfig: Map[String, Any]): SampleOutput = {
-    val sampleOutput = new SampleOutput
-    val sampleID: String = getCurrentSample
-    val sampleDir = globalSampleDir + sampleID + "/"
-
-    sampleOutput.libraries = runLibraryJobs(sampleConfig)
-    val bamfiles = sampleOutput.libraries.map(_._2.mappedBamFile).toList
-    sampleOutput.mappedBamFile = new File(sampleDir + sampleID + ".bam")
-    if (bamfiles.length == 1) {
-      add(Ln(this, bamfiles.head, sampleOutput.mappedBamFile))
-      val oldIndex = new File(bamfiles.head.getAbsolutePath.stripSuffix(".bam") + ".bai")
-      val newIndex = new File(sampleOutput.mappedBamFile.getAbsolutePath.stripSuffix(".bam") + ".bai")
-      add(Ln(this, oldIndex, newIndex))
-    } else if (bamfiles.length > 1) {
-      val merge = new MergeSamFiles(this)
-      merge.input = bamfiles
-      merge.sortOrder = "coordinate"
-      merge.output = sampleOutput.mappedBamFile
-      add(merge)
-    }
-
-    val macs2 = new Macs2CallPeak(this)
-    macs2.treatment = sampleOutput.mappedBamFile
-    macs2.name = sampleID
-    macs2.outputdir = sampleDir + "macs2/" + macs2.name + "/"
-    add(macs2)
-
-    return sampleOutput
-  }
-
-  def runSingleLibraryJobs(runConfig: Map[String, Any], sampleConfig: Map[String, Any]): LibraryOutput = {
-    val libraryOutput = new LibraryOutput
-
-    val runID: String = getCurrentLibrary
-    val sampleID: String = getCurrentSample
-    val runDir: String = globalSampleDir + sampleID + "/run_" + runID + "/"
-
-    if (runConfig.contains("R1")) {
-      val mapping = new Mapping(this)
-
-      mapping.skipMarkduplicates = config("skip_markduplicates", default = true) // we do the dedup marking using Sambamba
-
-      mapping.input_R1 = new File(runConfig("R1").toString)
-      if (runConfig.contains("R2")) mapping.input_R2 = new File(runConfig("R2").toString)
-      mapping.RGLB = runConfig("ID").toString
-      mapping.RGSM = sampleConfig("ID").toString
-      if (runConfig.contains("PL")) mapping.RGPL = runConfig("PL").toString
-      if (runConfig.contains("PU")) mapping.RGPU = runConfig("PU").toString
-      if (runConfig.contains("CN")) mapping.RGCN = runConfig("CN").toString
-      mapping.outputDir = runDir
-
-      mapping.init
-      mapping.biopetScript
-      addAll(mapping.functions)
-
-      libraryOutput.mappedBamFile = mapping.outputFiles("finalBamFile")
-    } else this.logger.error("Sample: " + sampleID + ": No R1 found for run: " + runConfig)
-    return libraryOutput
-  }
-
 }
 
 object Carp extends PipelineCommand
-- 
GitLab