From 29ddfee68df64c2b4459fa74f4f90aa2bf72a7ad Mon Sep 17 00:00:00 2001 From: Sander Bollen <a.h.b.bollen@lumc.nl> Date: Tue, 5 Jan 2016 12:10:45 +0100 Subject: [PATCH] merge bed file to prevent massive files --- .../extensions/bedtools/BedtoolsMerge.scala | 28 +++++++++++++++++++ .../sasc/biopet/pipelines/toucan/Toucan.scala | 21 ++++++++++---- 2 files changed, 44 insertions(+), 5 deletions(-) create mode 100644 public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsMerge.scala diff --git a/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsMerge.scala b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsMerge.scala new file mode 100644 index 000000000..c0f2e3640 --- /dev/null +++ b/public/biopet-extensions/src/main/scala/nl/lumc/sasc/biopet/extensions/bedtools/BedtoolsMerge.scala @@ -0,0 +1,28 @@ +package nl.lumc.sasc.biopet.extensions.bedtools + +import java.io.File + +import nl.lumc.sasc.biopet.utils.config.Configurable +import org.broadinstitute.gatk.utils.commandline.{ Argument, Output, Input } + +/** + * Created by ahbbollen on 5-1-16. + */ +class BedtoolsMerge(val root: Configurable) extends Bedtools { + + @Input(doc = "Input bed file") + var input: File = _ + + @Argument(doc = "Distance") + var dist: Int = 1 //default of tool is 1 + + @Output(doc = "Output bed file") + var output: File = _ + + def cmdLine = { + required(executable) + required("merge") + + required("-i", input) + optional("-d", dist) + + " > " + required(output) + } + +} diff --git a/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala b/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala index 974df31da..620192846 100644 --- a/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala +++ b/public/toucan/src/main/scala/nl/lumc/sasc/biopet/pipelines/toucan/Toucan.scala @@ -18,7 +18,7 @@ package nl.lumc.sasc.biopet.pipelines.toucan import java.io.{ File, PrintWriter } import nl.lumc.sasc.biopet.extensions.bcftools.BcftoolsView -import nl.lumc.sasc.biopet.extensions.bedtools.BedtoolsIntersect +import nl.lumc.sasc.biopet.extensions.bedtools.{ BedtoolsMerge, BedtoolsIntersect } import nl.lumc.sasc.biopet.extensions.manwe.{ ManweSamplesImport, ManweAnnotateVcf, ManweDataSourcesAnnotate } import nl.lumc.sasc.biopet.utils.config.Configurable import nl.lumc.sasc.biopet.core.summary.SummaryQScript @@ -131,9 +131,15 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum bedTrack.sample = Some(sampleID) add(bedTrack) + val mergedBed = new BedtoolsMerge(this) + mergedBed.input = bedTrack.outputBed + mergedBed.dist = 5 + mergedBed.output = swapExt(outputDir, bedTrack.outputBed, ".bed", ".merged.bed") + add(mergedBed) + val bgzippedBed = new Bgzip(this) - bgzippedBed.input = List(bedTrack.outputBed) - bgzippedBed.output = swapExt(outputDir, bedTrack.outputBed, ".bed", ".bed.gz") + bgzippedBed.input = List(mergedBed.output) + bgzippedBed.output = swapExt(outputDir, mergedBed.output, ".bed", ".bed.gz") add(bgzippedBed) val singleVcf = new BcftoolsView(this) @@ -147,11 +153,16 @@ class Toucan(val root: Configurable) extends QScript with BiopetQScript with Sum val intersected = new BedtoolsIntersect(this) intersected.input = singleVcf.output intersected.intersectFile = bgzippedBed.output - intersected.output = swapExt(outputDir, singleVcf.output, ".vcf.gz", ".intersected.vcf.gz") + intersected.output = swapExt(outputDir, singleVcf.output, ".vcf.gz", ".intersected.vcf") add(intersected) + val bgzippedIntersect = new Bgzip(this) + bgzippedIntersect.input = List(intersected.output) + bgzippedIntersect.output = swapExt(outputDir, intersected.output, ".vcf", ".vcf.gz") + add(bgzippedIntersect) + val imported = new ManweSamplesImport(this) - imported.vcfs = List(intersected.output) + imported.vcfs = List(bgzippedIntersect.output) imported.beds = List(bgzippedBed.output) imported.name = Some(sampleID) imported.public = isPublic -- GitLab