Commit e5d79321 authored by bow's avatar bow
Browse files

Merge branch 'fix-bedtools_coverage_sorted' into 'develop'

Added bedtools sorted flag as default

@a.h.b.bollen: This is for you, can you test is this uses less memory then?

See merge request !395
parents 75834095 92a3a1b7
......@@ -17,14 +17,14 @@ package nl.lumc.sasc.biopet.pipelines.bammetrics
import java.io.File
import nl.lumc.sasc.biopet.core.annotations.{ AnnotationRefFlat, RibosomalRefFlat }
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
import nl.lumc.sasc.biopet.core.{ BiopetFifoPipe, PipelineCommand, Reference, SampleLibraryTag }
import nl.lumc.sasc.biopet.extensions.bedtools.{ BedtoolsCoverage, BedtoolsIntersect }
import nl.lumc.sasc.biopet.extensions.bedtools.{ BedtoolsCoverage, BedtoolsIntersect, BedtoolsSort }
import nl.lumc.sasc.biopet.extensions.picard._
import nl.lumc.sasc.biopet.extensions.samtools.SamtoolsFlagstat
import nl.lumc.sasc.biopet.pipelines.bammetrics.scripts.CoverageStats
import nl.lumc.sasc.biopet.extensions.tools.BiopetFlagstat
import nl.lumc.sasc.biopet.pipelines.bammetrics.scripts.CoverageStats
import nl.lumc.sasc.biopet.utils.config.Configurable
import nl.lumc.sasc.biopet.utils.intervals.BedCheck
import org.broadinstitute.gatk.queue.QScript
......@@ -41,6 +41,8 @@ class BamMetrics(val root: Configurable) extends QScript
@Input(doc = "Bam File", shortName = "BAM", required = true)
var inputBam: File = _
override def defaults = Map("bedtoolscoverage" -> Map("sorted" -> true))
/** return location of summary file */
def summaryFile = (sampleId, libId) match {
case (Some(s), Some(l)) => new File(outputDir, s + "-" + l + ".BamMetrics.summary.json")
......@@ -164,7 +166,11 @@ class BamMetrics(val root: Configurable) extends QScript
addSummarizable(biopetFlagstatLoose, targetName + "_biopet_flagstat_loose")
add(new BiopetFifoPipe(this, List(biLoose, biopetFlagstatLoose)))
val bedCov = BedtoolsCoverage(this, intervals.bed, inputBam, depth = true)
val sorter = new BedtoolsSort(this)
sorter.input = intervals.bed
sorter.output = swapExt(targetDir, intervals.bed, ".bed", ".sorted.bed")
add(sorter)
val bedCov = BedtoolsCoverage(this, sorter.output, inputBam, depth = true)
val covStats = CoverageStats(this, targetDir, inputBam.getName.stripSuffix(".bam") + ".coverage")
covStats.title = Some("Coverage Plot")
covStats.subTitle = Some(s"for file '$targetName.bed'")
......
......@@ -14,13 +14,16 @@
*/
package nl.lumc.sasc.biopet.extensions.bedtools
import java.io.File
import java.io.{ File, PrintWriter }
import nl.lumc.sasc.biopet.core.Reference
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output }
import scala.io.Source
/** Extension for bedtools coverage */
class BedtoolsCoverage(val root: Configurable) extends Bedtools {
class BedtoolsCoverage(val root: Configurable) extends Bedtools with Reference {
@Input(doc = "Input file (bed/gff/vcf/bam)")
var input: File = null
......@@ -40,6 +43,9 @@ class BedtoolsCoverage(val root: Configurable) extends Bedtools {
@Argument(doc = "diffStrand", required = false)
var diffStrand: Boolean = false
@Argument(doc = "sorted", required = false)
var sorted: Boolean = config("sorted", default = false, freeVar = false)
override def defaultCoreMemory = 4.0
/** Returns command to execute */
......@@ -49,7 +55,10 @@ class BedtoolsCoverage(val root: Configurable) extends Bedtools {
conditional(depth, "-d") +
conditional(sameStrand, "-s") +
conditional(diffStrand, "-S") +
conditional(sorted, "-sorted") +
(if (sorted) required("-g", BedtoolsCoverage.getGenomeFile(referenceFai, jobTempDir)) else "") +
(if (outputAsStsout) "" else " > " + required(output))
}
object BedtoolsCoverage {
......@@ -65,4 +74,27 @@ object BedtoolsCoverage {
bedtoolsCoverage.diffStrand = diffStrand
bedtoolsCoverage
}
private var genomeCache: Map[(File, File), File] = Map()
def getGenomeFile(fai: File, dir: File): File = {
if (!genomeCache.contains((fai, dir))) genomeCache += (fai, dir) -> createGenomeFile(fai, dir)
genomeCache((fai, dir))
}
/**
* Creates the genome file. i.e. the first two columns of the fasta index
* @return
*/
def createGenomeFile(fai: File, dir: File): File = {
val tmp = File.createTempFile(fai.getName, ".genome", dir)
tmp.deleteOnExit()
val writer = new PrintWriter(tmp)
Source.fromFile(fai).
getLines().
map(s => s.split("\t").take(2).mkString("\t")).
foreach(f => writer.println(f))
writer.close()
tmp
}
}
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions.bedtools
import java.io.File
import nl.lumc.sasc.biopet.core.Reference
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output }
/**
* Created by Sander Bollen on 26-5-16.
*/
class BedtoolsSort(val root: Configurable) extends Bedtools with Reference {
@Input
var input: File = null
@Output
var output: File = null
@Argument(required = false)
var faidx: File = referenceFai
def cmdLine = required(executable) + required("sort") + required("-i", input) +
optional("-faidx", faidx) +
(if (outputAsStsout) "" else " > " + required(output))
}
chr1 9 6 9 10
\ No newline at end of file
package nl.lumc.sasc.biopet.extensions
import java.io.File
import java.nio.file.Paths
import nl.lumc.sasc.biopet.extensions.bedtools.BedtoolsCoverage
import nl.lumc.sasc.biopet.utils.config.{ Config, Configurable }
import org.scalatest.Matchers
import org.scalatest.testng.TestNGSuite
import org.testng.annotations.Test
import scala.io.Source
/**
* Created by Sander Bollen on 12-5-16.
*/
class BedToolsTest extends TestNGSuite with Matchers {
@Test
def testBedtoolsCoverageCreateGenomeFile() = {
val file = new File(Paths.get(this.getClass.getResource("/ref.fa.fai").toURI).toString)
val tmp = File.createTempFile("test", ".bed")
tmp.deleteOnExit()
class TestCov(override val root: Configurable) extends BedtoolsCoverage(root) {
jobTempDir = tmp
override def referenceFai = file
def genome = BedtoolsCoverage.createGenomeFile(file, file.getParentFile)
}
val cov = new TestCov(null)
val genome = cov.genome
Source.fromFile(genome).getLines().mkString("\n") shouldBe "chr1\t9"
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment