Picard.scala 4.68 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16
package nl.lumc.sasc.biopet.extensions.picard
17

18
import java.io.File
bow's avatar
bow committed
19
import scala.io.Source
20

bow's avatar
bow committed
21
import org.broadinstitute.gatk.utils.commandline.Argument
22

bow's avatar
bow committed
23
import nl.lumc.sasc.biopet.core.BiopetJavaCommandLineFunction
24
import nl.lumc.sasc.biopet.utils.tryToParseNumber
25

Peter van 't Hof's avatar
Peter van 't Hof committed
26
27
28
/**
 * General picard extension
 *
Peter van 't Hof's avatar
Peter van 't Hof committed
29
 * This is based on using class files directly from the jar, if needed other picard jar can be used
Peter van 't Hof's avatar
Peter van 't Hof committed
30
 */
31
32
33
abstract class Picard extends BiopetJavaCommandLineFunction {
  override def subPath = "picard" :: super.subPath

34
35
  if (config.contains("picard_jar")) jarFile = config("picard_jar")

bow's avatar
bow committed
36
  @Argument(doc = "VERBOSITY", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
37
  var verbosity: Option[String] = config("verbosity")
bow's avatar
bow committed
38
39

  @Argument(doc = "QUIET", required = false)
40
  var quiet: Boolean = config("quiet", default = false)
bow's avatar
bow committed
41
42

  @Argument(doc = "VALIDATION_STRINGENCY", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
43
  var stringency: Option[String] = config("validationstringency")
bow's avatar
bow committed
44
45

  @Argument(doc = "COMPRESSION_LEVEL", required = false)
46
  var compression: Option[Int] = config("compressionlevel")
bow's avatar
bow committed
47
48

  @Argument(doc = "MAX_RECORDS_IN_RAM", required = false)
49
  var maxRecordsInRam: Option[Int] = config("maxrecordsinram")
bow's avatar
bow committed
50
51

  @Argument(doc = "CREATE_INDEX", required = false)
52
  var createIndex: Boolean = config("createindex", default = true)
bow's avatar
bow committed
53
54

  @Argument(doc = "CREATE_MD5_FILE", required = false)
55
  var createMd5: Boolean = config("createmd5", default = false)
bow's avatar
bow committed
56

57
58
59
60
61
62
  override def versionCommand = {
    if (jarFile != null) executable + " -cp " + jarFile + " " + javaMainClass + " -h"
    else null
  }
  override val versionRegex = """Version: (.*)""".r
  override val versionExitcode = List(0, 1)
bow's avatar
bow committed
63

Peter van 't Hof's avatar
Peter van 't Hof committed
64
  override val defaultCoreMemory = 3.0
bow's avatar
bow committed
65

66
67
  override def commandLine = super.commandLine +
    required("TMP_DIR=" + jobTempDir) +
bow's avatar
bow committed
68
    optional("VERBOSITY=", verbosity, spaceSeparated = false) +
69
    conditional(quiet, "QUIET=TRUE") +
bow's avatar
bow committed
70
71
72
    optional("VALIDATION_STRINGENCY=", stringency, spaceSeparated = false) +
    optional("COMPRESSION_LEVEL=", compression, spaceSeparated = false) +
    optional("MAX_RECORDS_IN_RAM=", maxRecordsInRam, spaceSeparated = false) +
73
74
75
    conditional(createIndex, "CREATE_INDEX=TRUE") +
    conditional(createMd5, "CREATE_MD5_FILE=TRUE")
}
76
77
78

object Picard {

79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
  def getMetrics(file: File, tag: String = "METRICS CLASS",
                 groupBy: Option[String] = None): Option[Any] = {
    getMetricsContent(file, tag) match {
      case Some((header, content)) => {
        (content.size, groupBy) match {
          case (_, Some(group)) => {
            val groupId = header.indexOf(group)
            if (groupId == -1) throw new IllegalArgumentException(group + " not existing in header of: " + file)
            Some((for (c <- content) yield content(groupId).toString() -> {
              header.zip(c).toMap
            }).toMap)
          }
          case (1, _) => Some(header.zip(content.head).toMap)
          case _      => Some(header :: content)
        }
      }
      case _ => None
    }
  }

  /**
   * This function parse the metrics but transpose for table
   * @param file metrics file
   * @param tag default to "HISTOGRAM"
   * @return
   */
  def getHistogram(file: File, tag: String = "HISTOGRAM") = {
    getMetricsContent(file, tag) match {
      case Some((header, content)) => {
        val colums = header.zipWithIndex.map(x => x._1 -> content.map(_.lift(x._2))).toMap
        Some(colums)
      }
      case _ => None
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
115
116
117
118
119
  /**
   * This function parse a metrics file in separated values
   * @param file input metrics file
   * @return (header, content)
   */
120
  def getMetricsContent(file: File, tag: String) = {
121
122
    if (!file.exists) None
    else {
123
      val lines = Source.fromFile(file).getLines().toArray
124

125
      val start = lines.indexWhere(_.startsWith("## " + tag)) + 1
126
127
      val end = lines.indexOf("", start)

Peter van 't Hof's avatar
Peter van 't Hof committed
128
129
130
131
      val header = lines(start).split("\t").toList
      val content = (for (i <- (start + 1) until end) yield {
        lines(i).split("\t").map(v => tryToParseNumber(v, true).getOrElse(v)).toList
      }).toList
132

133
      Some(header, content)
134
    }
135
  }
136
}