From 29ddfee68df64c2b4459fa74f4f90aa2bf72a7ad Mon Sep 17 00:00:00 2001
From: Sander Bollen <a.h.b.bollen@lumc.nl>
Date: Tue, 5 Jan 2016 12:10:45 +0100
Subject: [PATCH] merge bed file to prevent massive files

---
 .../extensions/bedtools/BedtoolsMerge.scala   | 28 +++++++++++++++++++
 .../sasc/biopet/pipelines/toucan/Toucan.scala | 21 ++++++++++----
 2 files changed, 44 insertions(+), 5 deletions(-)
 create mode 100644 public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsMerge.scala

diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsMerge.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsMerge.scala
new file mode 100644
index 000000000..c0f2e3640
--- /dev/null
+++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsMerge.scala
@@ -0,0 +1,28 @@
+package nl.lumc.sasc.biopet.extensions.bedtools
+
+import java.io.File
+
+import nl.lumc.sasc.biopet.utils.config.Configurable
+import org.broadinstitute.gatk.utils.commandline.{ Argument, Output, Input }
+
+/**
+ * Created by ahbbollen on 5-1-16.
+ */
+class BedtoolsMerge(val root: Configurable) extends Bedtools {
+
+  @Input(doc = "Input bed file")
+  var input: File = _
+
+  @Argument(doc = "Distance")
+  var dist: Int = 1 //default of tool is 1
+
+  @Output(doc = "Output bed file")
+  var output: File = _
+
+  def cmdLine = {
+    required(executable) + required("merge") +
+      required("-i", input) + optional("-d", dist) +
+      " > " + required(output)
+  }
+
+}
diff --git a/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala b/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala
index 974df31da..620192846 100644
--- a/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala
+++ b/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala
@@ -18,7 +18,7 @@ package nl.lumc.sasc.biopet.pipelines.toucan
 import java.io.{ File, PrintWriter }
 
 import nl.lumc.sasc.biopet.extensions.bcftools.BcftoolsView
-import nl.lumc.sasc.biopet.extensions.bedtools.BedtoolsIntersect
+import nl.lumc.sasc.biopet.extensions.bedtools.{ BedtoolsMerge, BedtoolsIntersect }
 import nl.lumc.sasc.biopet.extensions.manwe.{ ManweSamplesImport, ManweAnnotateVcf, ManweDataSourcesAnnotate }
 import nl.lumc.sasc.biopet.utils.config.Configurable
 import nl.lumc.sasc.biopet.core.summary.SummaryQScript
@@ -131,9 +131,15 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum
     bedTrack.sample = Some(sampleID)
     add(bedTrack)
 
+    val mergedBed = new BedtoolsMerge(this)
+    mergedBed.input = bedTrack.outputBed
+    mergedBed.dist = 5
+    mergedBed.output = swapExt(outputDir, bedTrack.outputBed, ".bed", ".merged.bed")
+    add(mergedBed)
+
     val bgzippedBed = new Bgzip(this)
-    bgzippedBed.input = List(bedTrack.outputBed)
-    bgzippedBed.output = swapExt(outputDir, bedTrack.outputBed, ".bed", ".bed.gz")
+    bgzippedBed.input = List(mergedBed.output)
+    bgzippedBed.output = swapExt(outputDir, mergedBed.output, ".bed", ".bed.gz")
     add(bgzippedBed)
 
     val singleVcf = new BcftoolsView(this)
@@ -147,11 +153,16 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum
     val intersected = new BedtoolsIntersect(this)
     intersected.input = singleVcf.output
     intersected.intersectFile = bgzippedBed.output
-    intersected.output = swapExt(outputDir, singleVcf.output, ".vcf.gz", ".intersected.vcf.gz")
+    intersected.output = swapExt(outputDir, singleVcf.output, ".vcf.gz", ".intersected.vcf")
     add(intersected)
 
+    val bgzippedIntersect = new Bgzip(this)
+    bgzippedIntersect.input = List(intersected.output)
+    bgzippedIntersect.output = swapExt(outputDir, intersected.output, ".vcf", ".vcf.gz")
+    add(bgzippedIntersect)
+
     val imported = new ManweSamplesImport(this)
-    imported.vcfs = List(intersected.output)
+    imported.vcfs = List(bgzippedIntersect.output)
     imported.beds = List(bgzippedBed.output)
     imported.name = Some(sampleID)
     imported.public = isPublic
-- 
GitLab