Picard.scala 4.89 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16
package nl.lumc.sasc.biopet.extensions.picard
17

18
import java.io.File
bow's avatar
bow committed
19
import scala.io.Source
20

bow's avatar
bow committed
21
import org.broadinstitute.gatk.utils.commandline.Argument
22

Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.core.{ Logging, BiopetJavaCommandLineFunction }
24
import nl.lumc.sasc.biopet.utils.tryToParseNumber
25

Peter van 't Hof's avatar
Peter van 't Hof committed
26
27
28
/**
 * General picard extension
 *
Peter van 't Hof's avatar
Peter van 't Hof committed
29
 * This is based on using class files directly from the jar, if needed other picard jar can be used
Peter van 't Hof's avatar
Peter van 't Hof committed
30
 */
31
32
33
abstract class Picard extends BiopetJavaCommandLineFunction {
  override def subPath = "picard" :: super.subPath

34
35
  if (config.contains("picard_jar")) jarFile = config("picard_jar")

bow's avatar
bow committed
36
  @Argument(doc = "VERBOSITY", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
37
  var verbosity: Option[String] = config("verbosity")
bow's avatar
bow committed
38
39

  @Argument(doc = "QUIET", required = false)
40
  var quiet: Boolean = config("quiet", default = false)
bow's avatar
bow committed
41
42

  @Argument(doc = "VALIDATION_STRINGENCY", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
43
  var stringency: Option[String] = config("validationstringency")
bow's avatar
bow committed
44
45

  @Argument(doc = "COMPRESSION_LEVEL", required = false)
46
  var compression: Option[Int] = config("compressionlevel")
bow's avatar
bow committed
47
48

  @Argument(doc = "MAX_RECORDS_IN_RAM", required = false)
49
  var maxRecordsInRam: Option[Int] = config("maxrecordsinram")
bow's avatar
bow committed
50
51

  @Argument(doc = "CREATE_INDEX", required = false)
52
  var createIndex: Boolean = config("createindex", default = true)
bow's avatar
bow committed
53
54

  @Argument(doc = "CREATE_MD5_FILE", required = false)
55
  var createMd5: Boolean = config("createmd5", default = false)
bow's avatar
bow committed
56

57
58
59
60
61
62
  override def versionCommand = {
    if (jarFile != null) executable + " -cp " + jarFile + " " + javaMainClass + " -h"
    else null
  }
  override val versionRegex = """Version: (.*)""".r
  override val versionExitcode = List(0, 1)
bow's avatar
bow committed
63

Peter van 't Hof's avatar
Peter van 't Hof committed
64
  override val defaultCoreMemory = 3.0
bow's avatar
bow committed
65

66
67
  override def commandLine = super.commandLine +
    required("TMP_DIR=" + jobTempDir) +
bow's avatar
bow committed
68
    optional("VERBOSITY=", verbosity, spaceSeparated = false) +
69
    conditional(quiet, "QUIET=TRUE") +
bow's avatar
bow committed
70
71
72
    optional("VALIDATION_STRINGENCY=", stringency, spaceSeparated = false) +
    optional("COMPRESSION_LEVEL=", compression, spaceSeparated = false) +
    optional("MAX_RECORDS_IN_RAM=", maxRecordsInRam, spaceSeparated = false) +
73
74
75
    conditional(createIndex, "CREATE_INDEX=TRUE") +
    conditional(createMd5, "CREATE_MD5_FILE=TRUE")
}
76

Peter van 't Hof's avatar
Peter van 't Hof committed
77
object Picard extends Logging {
78

79
80
81
82
83
84
85
86
  def getMetrics(file: File, tag: String = "METRICS CLASS",
                 groupBy: Option[String] = None): Option[Any] = {
    getMetricsContent(file, tag) match {
      case Some((header, content)) => {
        (content.size, groupBy) match {
          case (_, Some(group)) => {
            val groupId = header.indexOf(group)
            if (groupId == -1) throw new IllegalArgumentException(group + " not existing in header of: " + file)
Peter van 't Hof's avatar
Peter van 't Hof committed
87
            if (header.count(_ == group) > 1) logger.warn(group + " multiple times seen in header of: " + file)
Peter van 't Hof's avatar
Peter van 't Hof committed
88
            Some((for (c <- content) yield c(groupId).toString() -> {
Peter van 't Hof's avatar
Peter van 't Hof committed
89
              header.filter(_ != group).zip(c.take(groupId) ::: c.takeRight(c.size - groupId - 1)).toMap
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
            }).toMap)
          }
          case (1, _) => Some(header.zip(content.head).toMap)
          case _      => Some(header :: content)
        }
      }
      case _ => None
    }
  }

  /**
   * This function parse the metrics but transpose for table
   * @param file metrics file
   * @param tag default to "HISTOGRAM"
   * @return
   */
  def getHistogram(file: File, tag: String = "HISTOGRAM") = {
    getMetricsContent(file, tag) match {
      case Some((header, content)) => {
        val colums = header.zipWithIndex.map(x => x._1 -> content.map(_.lift(x._2))).toMap
        Some(colums)
      }
      case _ => None
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
116
117
118
119
120
  /**
   * This function parse a metrics file in separated values
   * @param file input metrics file
   * @return (header, content)
   */
121
  def getMetricsContent(file: File, tag: String) = {
122
123
    if (!file.exists) None
    else {
124
      val lines = Source.fromFile(file).getLines().toArray
125

126
      val start = lines.indexWhere(_.startsWith("## " + tag)) + 1
127
128
      val end = lines.indexOf("", start)

Peter van 't Hof's avatar
Peter van 't Hof committed
129
130
131
132
      val header = lines(start).split("\t").toList
      val content = (for (i <- (start + 1) until end) yield {
        lines(i).split("\t").map(v => tryToParseNumber(v, true).getOrElse(v)).toList
      }).toList
133

134
      Some(header, content)
135
    }
136
  }
137
}