Picard.scala 5.84 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16
package nl.lumc.sasc.biopet.extensions.picard
17

18
import java.io.{ FileReader, File }
bow's avatar
bow committed
19
import scala.io.Source
20

bow's avatar
bow committed
21
import org.broadinstitute.gatk.utils.commandline.Argument
22

Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.core.{ Logging, BiopetJavaCommandLineFunction }
24
import nl.lumc.sasc.biopet.utils.tryToParseNumber
25

Peter van 't Hof's avatar
Peter van 't Hof committed
26
27
28
/**
 * General picard extension
 *
Peter van 't Hof's avatar
Peter van 't Hof committed
29
 * This is based on using class files directly from the jar, if needed other picard jar can be used
Peter van 't Hof's avatar
Peter van 't Hof committed
30
 */
31
32
33
abstract class Picard extends BiopetJavaCommandLineFunction {
  override def subPath = "picard" :: super.subPath

34
35
  if (config.contains("picard_jar")) jarFile = config("picard_jar")

bow's avatar
bow committed
36
  @Argument(doc = "VERBOSITY", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
37
  var verbosity: Option[String] = config("verbosity")
bow's avatar
bow committed
38
39

  @Argument(doc = "QUIET", required = false)
40
  var quiet: Boolean = config("quiet", default = false)
bow's avatar
bow committed
41
42

  @Argument(doc = "VALIDATION_STRINGENCY", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
43
  var stringency: Option[String] = config("validationstringency")
bow's avatar
bow committed
44
45

  @Argument(doc = "COMPRESSION_LEVEL", required = false)
46
  var compression: Option[Int] = config("compressionlevel")
bow's avatar
bow committed
47
48

  @Argument(doc = "MAX_RECORDS_IN_RAM", required = false)
49
  var maxRecordsInRam: Option[Int] = config("maxrecordsinram")
bow's avatar
bow committed
50
51

  @Argument(doc = "CREATE_INDEX", required = false)
52
  var createIndex: Boolean = config("createindex", default = true)
bow's avatar
bow committed
53
54

  @Argument(doc = "CREATE_MD5_FILE", required = false)
55
  var createMd5: Boolean = config("createmd5", default = false)
bow's avatar
bow committed
56

57
58
59
60
61
62
  override def versionCommand = {
    if (jarFile != null) executable + " -cp " + jarFile + " " + javaMainClass + " -h"
    else null
  }
  override val versionRegex = """Version: (.*)""".r
  override val versionExitcode = List(0, 1)
bow's avatar
bow committed
63

Peter van 't Hof's avatar
Peter van 't Hof committed
64
  override val defaultCoreMemory = 3.0
bow's avatar
bow committed
65

66
67
68
69
70
  override def getVersion = {
    if (jarFile == null) Picard.getBiopetPicardVersion
    else super.getVersion
  }

71
72
  override def commandLine = super.commandLine +
    required("TMP_DIR=" + jobTempDir) +
bow's avatar
bow committed
73
    optional("VERBOSITY=", verbosity, spaceSeparated = false) +
74
    conditional(quiet, "QUIET=TRUE") +
bow's avatar
bow committed
75
76
77
    optional("VALIDATION_STRINGENCY=", stringency, spaceSeparated = false) +
    optional("COMPRESSION_LEVEL=", compression, spaceSeparated = false) +
    optional("MAX_RECORDS_IN_RAM=", maxRecordsInRam, spaceSeparated = false) +
78
79
80
    conditional(createIndex, "CREATE_INDEX=TRUE") +
    conditional(createMd5, "CREATE_MD5_FILE=TRUE")
}
81

Peter van 't Hof's avatar
Peter van 't Hof committed
82
object Picard extends Logging {
83

84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
  lazy val getBiopetPicardVersion: Option[String] = {
    val reader = Source.fromInputStream(getClass.getResourceAsStream("/dependency_list.txt"))
    val dependencies = reader.getLines().map(_.trim.split(":")).filter(_.size == 5).map(line => Map(
      "groupId" -> line(0),
      "artifactId" -> line(1),
      "type" -> line(2),
      "version" -> line(3),
      "scope" -> line(4)
    )).toList

    logger.debug("dependencies: " + dependencies)

    val htsjdk = dependencies.find(dep => dep("groupId") == "samtools" && dep("artifactId") == "htsjdk").collect {
      case dep =>
        "samtools htsjdk " + dep("version")
    }

    dependencies.find(dep => dep("groupId") == "picard" && dep("artifactId") == "picard").collect {
      case dep =>
        "Picard " + dep("version") + " using " + htsjdk.getOrElse("unknown htsjdk")
    }
  }

107
108
109
110
111
112
113
114
  def getMetrics(file: File, tag: String = "METRICS CLASS",
                 groupBy: Option[String] = None): Option[Any] = {
    getMetricsContent(file, tag) match {
      case Some((header, content)) => {
        (content.size, groupBy) match {
          case (_, Some(group)) => {
            val groupId = header.indexOf(group)
            if (groupId == -1) throw new IllegalArgumentException(group + " not existing in header of: " + file)
Peter van 't Hof's avatar
Peter van 't Hof committed
115
            if (header.count(_ == group) > 1) logger.warn(group + " multiple times seen in header of: " + file)
Peter van 't Hof's avatar
Peter van 't Hof committed
116
            Some((for (c <- content) yield c(groupId).toString() -> {
Peter van 't Hof's avatar
Peter van 't Hof committed
117
              header.filter(_ != group).zip(c.take(groupId) ::: c.takeRight(c.size - groupId - 1)).toMap
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
            }).toMap)
          }
          case (1, _) => Some(header.zip(content.head).toMap)
          case _      => Some(header :: content)
        }
      }
      case _ => None
    }
  }

  /**
   * This function parse the metrics but transpose for table
   * @param file metrics file
   * @param tag default to "HISTOGRAM"
   * @return
   */
  def getHistogram(file: File, tag: String = "HISTOGRAM") = {
    getMetricsContent(file, tag) match {
      case Some((header, content)) => {
        val colums = header.zipWithIndex.map(x => x._1 -> content.map(_.lift(x._2))).toMap
        Some(colums)
      }
      case _ => None
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
144
145
146
147
148
  /**
   * This function parse a metrics file in separated values
   * @param file input metrics file
   * @return (header, content)
   */
149
  def getMetricsContent(file: File, tag: String) = {
150
151
    if (!file.exists) None
    else {
152
      val lines = Source.fromFile(file).getLines().toArray
153

154
      val start = lines.indexWhere(_.startsWith("## " + tag)) + 1
155
156
      val end = lines.indexOf("", start)

Peter van 't Hof's avatar
Peter van 't Hof committed
157
158
159
160
      val header = lines(start).split("\t").toList
      val content = (for (i <- (start + 1) until end) yield {
        lines(i).split("\t").map(v => tryToParseNumber(v, true).getOrElse(v)).toList
      }).toList
161

162
      Some(header, content)
163
    }
164
  }
165
}