BedtoolsCoverageToCounts.scala 2.25 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
11
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
12
13
14
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
15
package nl.lumc.sasc.biopet.tools
16

Peter van 't Hof's avatar
Peter van 't Hof committed
17
18
import java.io.{ File, PrintWriter }

Peter van 't Hof's avatar
Peter van 't Hof committed
19
20
import nl.lumc.sasc.biopet.utils.ToolCommand

21
import scala.collection.{ SortedMap, mutable }
22
23
import scala.io.Source

24
object BedtoolsCoverageToCounts extends ToolCommand {
Peter van 't Hof's avatar
Peter van 't Hof committed
25
  case class Args(input: File = null, output: File = null) extends AbstractArgs
26
27

  class OptParser extends AbstractOptParser {
Peter van 't Hof's avatar
Peter van 't Hof committed
28
    opt[File]('I', "input") required () valueName "<file>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
29
      c.copy(input = x)
Moustakas's avatar
Moustakas committed
30
    } text "Coverage file produced with bedtools"
Peter van 't Hof's avatar
Peter van 't Hof committed
31
    opt[File]('o', "output") required () unbounded () valueName "<file>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
32
      c.copy(output = x)
Moustakas's avatar
Moustakas committed
33
    } text "Output file name"
34
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
35

36
37
38
39
  /**
   * @param args the command line arguments
   */
  def main(args: Array[String]): Unit = {
40
    val argsParser = new OptParser
Peter van 't Hof's avatar
Peter van 't Hof committed
41
    val commandArgs: Args = argsParser.parse(args, Args()) getOrElse (throw new IllegalArgumentException)
Peter van 't Hof's avatar
Peter van 't Hof committed
42

43
    if (!commandArgs.input.exists) throw new IllegalStateException("Input file not found, file: " + commandArgs.input)
Peter van 't Hof's avatar
Peter van 't Hof committed
44

Peter van 't Hof's avatar
Peter van 't Hof committed
45
46
    val counts: mutable.Map[String, Long] = mutable.Map()
    for (line <- Source.fromFile(commandArgs.input).getLines()) {
47
48
49
50
51
52
      val values = line.split("\t")
      val gene = values(3)
      val count = values(6).toLong
      if (counts.contains(gene)) counts(gene) += count
      else counts += gene -> count
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
53
54
55

    val sortedCounts: SortedMap[String, Long] = SortedMap(counts.toArray: _*)

56
    val writer = new PrintWriter(commandArgs.output)
Peter van 't Hof's avatar
Peter van 't Hof committed
57
    for ((seq, count) <- sortedCounts) {
58
59
      if (count > 0) writer.println(seq + "\t" + count)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
60
    writer.close()
61
62
  }
}