From 91e15a2ae7acd71132c868bdd45b7a72e091c716 Mon Sep 17 00:00:00 2001 From: Peter van 't Hof <p.j.van_t_hof@lumc.nl> Date: Mon, 30 May 2016 08:30:18 +0200 Subject: [PATCH] Init commit for bamstats --- .../nl/lumc/sasc/biopet/tools/BamStats.scala | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BamStats.scala diff --git a/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BamStats.scala b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BamStats.scala new file mode 100644 index 000000000..96f7253cf --- /dev/null +++ b/biopet-tools/src/main/scala/nl/lumc/sasc/biopet/tools/BamStats.scala @@ -0,0 +1,52 @@ +package nl.lumc.sasc.biopet.tools + +import java.io.File + +import nl.lumc.sasc.biopet.utils.ToolCommand +import nl.lumc.sasc.biopet.utils.intervals.BedRecordList + +/** + * Created by pjvanthof on 25/05/16. + */ +object BamStats extends ToolCommand { + case class Args(outputDir: File = null, + bamFile: File = null, + reference: File = null, + binSize: Int = 10000, + threadBinSize: Int = 10000000) extends AbstractArgs + + class OptParser extends AbstractOptParser { + opt[File]('R', "reference") required () valueName "<file>" action { (x, c) => + c.copy(reference = x) + } + opt[File]('o', "outputDir") required () valueName "<directory>" action { (x, c) => + c.copy(outputDir = x) + } + opt[File]('b', "bam") required () valueName "<file>" action { (x, c) => + c.copy(bamFile = x) + } + opt[Int]("binSize") required () valueName "<int>" action { (x, c) => + c.copy(binSize = x) + } + } + + def main(args: Array[String]): Unit = { + val argsParser = new OptParser + val cmdArgs: Args = argsParser.parse(args, Args()) getOrElse (throw new IllegalArgumentException) + + val regions = BedRecordList.fromReference(cmdArgs.reference) + .scatter(cmdArgs.binSize) + .sorted + .chrRecords.flatMap { case (chr, regions) => + val numberThreads = cmdArgs.threadBinSize / cmdArgs.binSize + regions.grouped(numberThreads).map(BedRecordList.fromList(_)) + }.par + + for (regionList <- regions) { + require(regionList.chrRecords.size == 1, "A thread can only have 1 contig") + val chr = regionList.chrRecords.head._1 + val start = regionList.allRecords.map(_.start).min + 1 + val end = regionList.allRecords.map(_.end).max + } + } +} -- GitLab