SageCreateTagCounts.scala 5.04 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16
package nl.lumc.sasc.biopet.tools
17

Peter van 't Hof's avatar
Peter van 't Hof committed
18 19
import java.io.{ File, PrintWriter }

Peter van 't Hof's avatar
Peter van 't Hof committed
20 21
import nl.lumc.sasc.biopet.utils.ToolCommand

22
import scala.collection.{ SortedMap, mutable }
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import scala.io.Source
24

25
object SageCreateTagCounts extends ToolCommand {
Peter van 't Hof's avatar
Peter van 't Hof committed
26 27
  case class Args(input: File = null, tagLib: File = null, countSense: File = null, countAllSense: File = null,
                  countAntiSense: File = null, countAllAntiSense: File = null) extends AbstractArgs
28 29

  class OptParser extends AbstractOptParser {
Peter van 't Hof's avatar
Peter van 't Hof committed
30
    opt[File]('I', "input") required () unbounded () valueName "<file>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
31 32
      c.copy(input = x)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
33
    opt[File]('t', "tagLib") required () unbounded () valueName "<file>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
34 35
      c.copy(tagLib = x)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
36
    opt[File]("countSense") unbounded () valueName "<file>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
37 38
      c.copy(countSense = x)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
39
    opt[File]("countAllSense") unbounded () valueName "<file>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
40 41
      c.copy(countAllSense = x)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
42
    opt[File]("countAntiSense") unbounded () valueName "<file>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
43 44
      c.copy(countAntiSense = x)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
45
    opt[File]("countAllAntiSense") unbounded () valueName "<file>" action { (x, c) =>
Peter van 't Hof's avatar
Peter van 't Hof committed
46 47
      c.copy(countAllAntiSense = x)
    }
48
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
49

50 51 52 53
  /**
   * @param args the command line arguments
   */
  def main(args: Array[String]): Unit = {
54
    val argsParser = new OptParser
55
    val commandArgs: Args = argsParser.parse(args, Args()) getOrElse(throw new IllegalArgumentException)
Peter van 't Hof's avatar
Peter van 't Hof committed
56

57
    if (!commandArgs.input.exists) throw new IllegalStateException("Input file not found, file: " + commandArgs.input)
Peter van 't Hof's avatar
Peter van 't Hof committed
58

Peter van 't Hof's avatar
Peter van 't Hof committed
59 60
    val rawCounts: mutable.Map[String, Long] = mutable.Map()
    for (line <- Source.fromFile(commandArgs.input).getLines()) {
61 62 63 64 65 66
      val values = line.split("\t")
      val gene = values(0)
      val count = values(1).toLong
      if (rawCounts.contains(gene)) rawCounts(gene) += count
      else rawCounts += gene -> count
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
67

Peter van 't Hof's avatar
Peter van 't Hof committed
68 69 70 71
    val senseCounts: mutable.Map[String, Long] = mutable.Map()
    val allSenseCounts: mutable.Map[String, Long] = mutable.Map()
    val antiSenseCounts: mutable.Map[String, Long] = mutable.Map()
    val allAntiSenseCounts: mutable.Map[String, Long] = mutable.Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
72

Peter van 't Hof's avatar
Peter van 't Hof committed
73
    for (line <- Source.fromFile(commandArgs.tagLib).getLines() if !line.startsWith("#")) {
74
      val values = line.split("\t")
Peter van 't Hof's avatar
Peter van 't Hof committed
75
      val tag = values(0)
76 77 78 79
      val sense = values(1)
      val allSense = values(2)
      val antiSense = if (values.size > 3) values(3) else ""
      val allAntiSense = if (values.size > 4) values(4) else ""
Peter van 't Hof's avatar
Peter van 't Hof committed
80

81 82 83 84 85
      if (!sense.isEmpty && !sense.contains(",")) {
        val count = if (rawCounts.contains(tag)) rawCounts(tag) else 0
        if (senseCounts.contains(sense)) senseCounts(sense) += count
        else senseCounts += sense -> count
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
86

87 88 89 90 91
      if (!allSense.isEmpty && !allSense.contains(",")) {
        val count = if (rawCounts.contains(tag)) rawCounts(tag) else 0
        if (allSenseCounts.contains(allSense)) allSenseCounts(allSense) += count
        else allSenseCounts += allSense -> count
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
92

93 94 95 96 97
      if (!antiSense.isEmpty && !antiSense.contains(",")) {
        val count = if (rawCounts.contains(tag)) rawCounts(tag) else 0
        if (antiSenseCounts.contains(antiSense)) antiSenseCounts(antiSense) += count
        else antiSenseCounts += antiSense -> count
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
98

99 100 101 102 103 104
      if (!allAntiSense.isEmpty && !allAntiSense.contains(",")) {
        val count = if (rawCounts.contains(tag)) rawCounts(tag) else 0
        if (allAntiSenseCounts.contains(allAntiSense)) allAntiSenseCounts(allAntiSense) += count
        else allAntiSenseCounts += allAntiSense -> count
      }
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
105

Peter van 't Hof's avatar
Peter van 't Hof committed
106
    def writeFile(file: File, counts: mutable.Map[String, Long]) {
Peter van 't Hof's avatar
Peter van 't Hof committed
107
      val sorted: SortedMap[String, Long] = SortedMap(counts.toArray: _*)
108 109
      if (file != null) {
        val writer = new PrintWriter(file)
Peter van 't Hof's avatar
Peter van 't Hof committed
110
        for ((gene, count) <- sorted) {
111 112
          if (count > 0) writer.println(gene + "\t" + count)
        }
Peter van 't Hof's avatar
Peter van 't Hof committed
113
        writer.close()
114 115
      }
    }
Sander Bollen's avatar
Sander Bollen committed
116

Sander Bollen's avatar
Sander Bollen committed
117
    if (commandArgs.countSense != null) {
Sander Bollen's avatar
Sander Bollen committed
118 119
      writeFile(commandArgs.countSense, senseCounts)
    }
Sander Bollen's avatar
Sander Bollen committed
120
    if (commandArgs.countAllAntiSense != null) {
Sander Bollen's avatar
Sander Bollen committed
121 122
      writeFile(commandArgs.countAllAntiSense, allAntiSenseCounts)
    }
Sander Bollen's avatar
Sander Bollen committed
123
    if (commandArgs.countAllSense != null) {
Sander Bollen's avatar
Sander Bollen committed
124 125
      writeFile(commandArgs.countAllSense, allSenseCounts)
    }
Sander Bollen's avatar
Sander Bollen committed
126
    if (commandArgs.countAntiSense != null) {
Sander Bollen's avatar
Sander Bollen committed
127 128
      writeFile(commandArgs.countAntiSense, antiSenseCounts)
    }
129 130
  }
}