From 5933b5a10649ac252fbb638fc456eaea3b5ad668 Mon Sep 17 00:00:00 2001
From: Peter van 't Hof <p.j.van_t_hof@lumc.nl>
Date: Thu, 16 Apr 2015 10:12:48 +0200
Subject: [PATCH] Refactor bed to interval

---
 .../pipelines/bammetrics/BamMetrics.scala     | 98 +++++++++++--------
 .../bedtools/BedtoolsIntersect.scala          |  4 +-
 .../extensions/picard/BedToIntervalList.scala |  4 +-
 3 files changed, 60 insertions(+), 46 deletions(-)

diff --git a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala b/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala
index 09f5a937b..574adb382 100644
--- a/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala
+++ b/public/bammetrics/src/main/scala/nl/lumc/sasc/biopet/pipelines/bammetrics/BamMetrics.scala
@@ -23,7 +23,7 @@ import java.io.File
 import nl.lumc.sasc.biopet.tools.{ BedToInterval, BiopetFlagstat }
 import nl.lumc.sasc.biopet.core.config.Configurable
 import nl.lumc.sasc.biopet.extensions.bedtools.{ BedtoolsCoverage, BedtoolsIntersect }
-import nl.lumc.sasc.biopet.extensions.picard.{ CollectInsertSizeMetrics, CollectGcBiasMetrics, CalculateHsMetrics, CollectAlignmentSummaryMetrics }
+import nl.lumc.sasc.biopet.extensions.picard._
 import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsFlagstat
 
 class BamMetrics(val root: Configurable) extends QScript with SummaryQScript with SampleLibraryTag {
@@ -32,14 +32,11 @@ class BamMetrics(val root: Configurable) extends QScript with SummaryQScript wit
   @Input(doc = "Bam File", shortName = "BAM", required = true)
   var inputBam: File = _
 
-  @Input(doc = "Bed tracks targets", shortName = "target", required = false)
-  var bedFiles: List[File] = Nil
+  /** Bed files for region of interests */
+  var roiBedFiles: List[File] = config("regions_of_interest", Nil)
 
-  @Input(doc = "Bed tracks bait", shortName = "bait", required = false)
-  var baitBedFile: File = _
-
-  @Argument(doc = "", required = false)
-  var wholeGenome = false
+  /** Bed of amplicon that is used */
+  var ampliconBedFile: Option[File] = config("amplicon_bed")
 
   /** return location of summary file */
   def summaryFile = (sampleId, libId) match {
@@ -56,12 +53,6 @@ class BamMetrics(val root: Configurable) extends QScript with SummaryQScript wit
 
   /** executed before script */
   def init() {
-    if (config.contains("target_bed")) {
-      for (file <- config("target_bed").asList) {
-        bedFiles +:= new File(file.toString)
-      }
-    }
-    if (baitBedFile == null && config.contains("target_bait")) baitBedFile = config("target_bait")
   }
 
   /** Script to add jobs */
@@ -82,31 +73,60 @@ class BamMetrics(val root: Configurable) extends QScript with SummaryQScript wit
     add(collectAlignmentSummaryMetrics)
     addSummarizable(collectAlignmentSummaryMetrics, "alignment_metrics")
 
-    val baitIntervalFile = if (baitBedFile != null) new File(outputDir, baitBedFile.getName.stripSuffix(".bed") + ".interval") else null
-    if (baitIntervalFile != null)
-      add(BedToInterval(this, baitBedFile, inputBam, outputDir), true)
+    case class Intervals(bed: File, intervals: File)
+
+    // Create temp jobs to convert bed files to intervals lists
+    val roiIntervals = roiBedFiles.map(roiBedFile => {
+      val roiIntervals = swapExt(outputDir, roiBedFile, ".bed", ".intervals")
+      val ampBedToInterval = BedToIntervalList(this, roiBedFile, roiIntervals)
+      ampBedToInterval.isIntermediate = true
+      add(ampBedToInterval)
+      Intervals(roiBedFile, roiIntervals)
+    })
+
+    // Metrics that require a amplicon bed file
+    val ampIntervals = ampliconBedFile.collect {
+      case ampliconBedFile => {
+        val ampIntervals = swapExt(outputDir, ampliconBedFile, ".bed", ".intervals")
+        val ampBedToInterval = BedToIntervalList(this, ampliconBedFile, ampIntervals)
+        ampBedToInterval.isIntermediate = true
+        add(ampBedToInterval)
+
+        val chsMetrics = CalculateHsMetrics(this, inputBam,
+          List(ampIntervals), ampIntervals :: roiIntervals.map(_.intervals), outputDir)
+        add(chsMetrics)
 
-    for (bedFile <- bedFiles) {
+        //TODO: target pcr metrics
+
+        Intervals(ampliconBedFile, ampIntervals)
+      }
+    }
+
+    // Create stats and coverage plot for each bed/interval file
+    for (intervals <- roiIntervals ++ ampIntervals) {
       //TODO: Add target jobs to summary
-      val targetDir = new File(outputDir, bedFile.getName.stripSuffix(".bed"))
-      val targetFile = new File(targetDir, bedFile.getName.stripSuffix(".bed") + ".interval")
-      val targetInterval = BedToInterval(this, bedFile, inputBam, targetFile)
-      add(targetInterval, true)
-      add(CalculateHsMetrics(this, inputBam, if (baitIntervalFile != null) baitIntervalFile
-      else targetInterval.output, targetInterval.output, targetDir))
-
-      val strictOutputBam = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".overlap.strict.bam")
-      add(BedtoolsIntersect(this, inputBam, bedFile, strictOutputBam, minOverlap = config("strictintersectoverlap", default = 1.0)), true)
-      add(SamtoolsFlagstat(this, strictOutputBam, targetDir))
-      add(BiopetFlagstat(this, strictOutputBam, targetDir))
-
-      val looseOutputBam = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".overlap.loose.bam")
-      add(BedtoolsIntersect(this, inputBam, bedFile, looseOutputBam, minOverlap = config("looseintersectoverlap", default = 0.01)), true)
-      add(SamtoolsFlagstat(this, looseOutputBam, targetDir))
-      add(BiopetFlagstat(this, looseOutputBam, targetDir))
+      val targetDir = new File(outputDir, intervals.bed.getName.stripSuffix(".bed"))
+
+      val biStrict = BedtoolsIntersect(this, inputBam, intervals.bed,
+        output = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".overlap.strict.bam"),
+        minOverlap = config("strictintersectoverlap", default = 1.0))
+      biStrict.isIntermediate = true
+      add(biStrict)
+      add(SamtoolsFlagstat(this, biStrict.output, targetDir))
+      add(BiopetFlagstat(this, biStrict.output, targetDir))
+
+      val biLoose = BedtoolsIntersect(this, inputBam, intervals.bed,
+        output = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".overlap.loose.bam"),
+        minOverlap = config("looseintersectoverlap", default = 0.01))
+      biLoose.isIntermediate = true
+      add(biLoose)
+      add(SamtoolsFlagstat(this, biLoose.output, targetDir))
+      add(BiopetFlagstat(this, biLoose.output, targetDir))
 
       val coverageFile = new File(targetDir, inputBam.getName.stripSuffix(".bam") + ".coverage")
-      add(BedtoolsCoverage(this, inputBam, bedFile, coverageFile, true), true)
+
+      //FIXME:should use piping
+      add(BedtoolsCoverage(this, inputBam, intervals.bed, coverageFile, true), true)
       add(CoverageStats(this, coverageFile, targetDir))
     }
 
@@ -115,13 +135,7 @@ class BamMetrics(val root: Configurable) extends QScript with SummaryQScript wit
 }
 
 object BamMetrics extends PipelineCommand {
-  /**
-   * Make default implementation of BamMetrics
-   * @param root
-   * @param bamFile
-   * @param outputDir
-   * @return
-   */
+  /** Make default implementation of BamMetrics */
   def apply(root: Configurable, bamFile: File, outputDir: File): BamMetrics = {
     val bamMetrics = new BamMetrics(root)
     bamMetrics.inputBam = bamFile
diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsIntersect.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsIntersect.scala
index 2381f6f1d..0d18eb240 100644
--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsIntersect.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsIntersect.scala
@@ -55,12 +55,12 @@ class BedtoolsIntersect(val root: Configurable) extends Bedtools {
 object BedtoolsIntersect {
   /** Returns default bedtools intersect */
   def apply(root: Configurable, input: File, intersect: File, output: File,
-            minOverlap: Double = 0, count: Boolean = false): BedtoolsIntersect = {
+            minOverlap: Option[Double] = None, count: Boolean = false): BedtoolsIntersect = {
     val bedtoolsIntersect = new BedtoolsIntersect(root)
     bedtoolsIntersect.input = input
     bedtoolsIntersect.intersectFile = intersect
     bedtoolsIntersect.output = output
-    if (minOverlap > 0) bedtoolsIntersect.minOverlap = Option(minOverlap)
+    bedtoolsIntersect.minOverlap = minOverlap
     bedtoolsIntersect.count = count
     return bedtoolsIntersect
   }
diff --git a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/BedToIntervalList.scala b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/BedToIntervalList.scala
index 2faef859c..68954ec4f 100644
--- a/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/BedToIntervalList.scala
+++ b/public/biopet-framework/src/main/scala/nl/lumc/sasc/biopet/extensions/picard/BedToIntervalList.scala
@@ -3,7 +3,7 @@ package nl.lumc.sasc.biopet.extensions.picard
 import java.io.File
 
 import nl.lumc.sasc.biopet.core.config.Configurable
-import org.broadinstitute.gatk.utils.commandline.{Output, Input}
+import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
 
 /**
  * Created by pjvan_thof on 4/15/15.
@@ -27,7 +27,7 @@ class BedToIntervalList(val root: Configurable) extends Picard {
 }
 
 object BedToIntervalList {
-  def apply(root:Configurable, input:File, output:File): BedToIntervalList = {
+  def apply(root: Configurable, input: File, output: File): BedToIntervalList = {
     val bi = new BedToIntervalList(root)
     bi.input = input
     bi.output = output
-- 
GitLab