Picard.scala 5.99 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
16
package nl.lumc.sasc.biopet.extensions.picard
17

18
import java.io.{ FileReader, File }
bow's avatar
bow committed
19
import scala.io.Source
20

bow's avatar
bow committed
21
import org.broadinstitute.gatk.utils.commandline.Argument
22

Peter van 't Hof's avatar
Peter van 't Hof committed
23
import nl.lumc.sasc.biopet.core.{ Logging, BiopetJavaCommandLineFunction }
24
import nl.lumc.sasc.biopet.utils.tryToParseNumber
25

Peter van 't Hof's avatar
Peter van 't Hof committed
26
27
28
/**
 * General picard extension
 *
Peter van 't Hof's avatar
Peter van 't Hof committed
29
 * This is based on using class files directly from the jar, if needed other picard jar can be used
Peter van 't Hof's avatar
Peter van 't Hof committed
30
 */
31
32
33
abstract class Picard extends BiopetJavaCommandLineFunction {
  override def subPath = "picard" :: super.subPath

34
35
  if (config.contains("picard_jar")) jarFile = config("picard_jar")

bow's avatar
bow committed
36
  @Argument(doc = "VERBOSITY", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
37
  var verbosity: Option[String] = config("verbosity")
bow's avatar
bow committed
38
39

  @Argument(doc = "QUIET", required = false)
40
  var quiet: Boolean = config("quiet", default = false)
bow's avatar
bow committed
41
42

  @Argument(doc = "VALIDATION_STRINGENCY", required = false)
Peter van 't Hof's avatar
Peter van 't Hof committed
43
  var stringency: Option[String] = config("validationstringency")
bow's avatar
bow committed
44
45

  @Argument(doc = "COMPRESSION_LEVEL", required = false)
46
  var compression: Option[Int] = config("compressionlevel")
bow's avatar
bow committed
47
48

  @Argument(doc = "MAX_RECORDS_IN_RAM", required = false)
49
  var maxRecordsInRam: Option[Int] = config("maxrecordsinram")
bow's avatar
bow committed
50
51

  @Argument(doc = "CREATE_INDEX", required = false)
52
  var createIndex: Boolean = config("createindex", default = true)
bow's avatar
bow committed
53
54

  @Argument(doc = "CREATE_MD5_FILE", required = false)
55
  var createMd5: Boolean = config("createmd5", default = false)
bow's avatar
bow committed
56

57
58
59
60
61
62
  override def versionCommand = {
    if (jarFile != null) executable + " -cp " + jarFile + " " + javaMainClass + " -h"
    else null
  }
  override val versionRegex = """Version: (.*)""".r
  override val versionExitcode = List(0, 1)
bow's avatar
bow committed
63

Peter van 't Hof's avatar
Peter van 't Hof committed
64
  override val defaultCoreMemory = 3.0
bow's avatar
bow committed
65

66
67
68
69
70
  override def getVersion = {
    if (jarFile == null) Picard.getBiopetPicardVersion
    else super.getVersion
  }

71
72
  override def commandLine = super.commandLine +
    required("TMP_DIR=" + jobTempDir) +
bow's avatar
bow committed
73
    optional("VERBOSITY=", verbosity, spaceSeparated = false) +
74
    conditional(quiet, "QUIET=TRUE") +
bow's avatar
bow committed
75
76
77
    optional("VALIDATION_STRINGENCY=", stringency, spaceSeparated = false) +
    optional("COMPRESSION_LEVEL=", compression, spaceSeparated = false) +
    optional("MAX_RECORDS_IN_RAM=", maxRecordsInRam, spaceSeparated = false) +
78
79
80
    conditional(createIndex, "CREATE_INDEX=TRUE") +
    conditional(createMd5, "CREATE_MD5_FILE=TRUE")
}
81

Peter van 't Hof's avatar
Peter van 't Hof committed
82
object Picard extends Logging {
83

84
  lazy val getBiopetPicardVersion: Option[String] = {
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
    Option(getClass.getResourceAsStream("/dependency_list.txt")) match {
      case Some(src) =>
        val dependencies = Source.fromInputStream(src)
          .getLines().map(_.trim.split(":")).filter(_.size == 5).map(line => Map(
            "groupId" -> line(0),
            "artifactId" -> line(1),
            "type" -> line(2),
            "version" -> line(3),
            "scope" -> line(4)
          )).toList

        logger.debug("dependencies: " + dependencies)

        val htsjdk = dependencies.find(dep => dep("groupId") == "samtools" && dep("artifactId") == "htsjdk").collect {
          case dep =>
            "samtools htsjdk " + dep("version")
        }
102

103
104
105
106
107
        dependencies.find(dep => dep("groupId") == "picard" && dep("artifactId") == "picard").collect {
          case dep =>
            "Picard " + dep("version") + " using " + htsjdk.getOrElse("unknown htsjdk")
        }
      case otherwise => None
108
109
110
    }
  }

111
112
113
114
115
116
117
118
  def getMetrics(file: File, tag: String = "METRICS CLASS",
                 groupBy: Option[String] = None): Option[Any] = {
    getMetricsContent(file, tag) match {
      case Some((header, content)) => {
        (content.size, groupBy) match {
          case (_, Some(group)) => {
            val groupId = header.indexOf(group)
            if (groupId == -1) throw new IllegalArgumentException(group + " not existing in header of: " + file)
Peter van 't Hof's avatar
Peter van 't Hof committed
119
            if (header.count(_ == group) > 1) logger.warn(group + " multiple times seen in header of: " + file)
Peter van 't Hof's avatar
Peter van 't Hof committed
120
            Some((for (c <- content) yield c(groupId).toString() -> {
Peter van 't Hof's avatar
Peter van 't Hof committed
121
              header.filter(_ != group).zip(c.take(groupId) ::: c.takeRight(c.size - groupId - 1)).toMap
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
            }).toMap)
          }
          case (1, _) => Some(header.zip(content.head).toMap)
          case _      => Some(header :: content)
        }
      }
      case _ => None
    }
  }

  /**
   * This function parse the metrics but transpose for table
   * @param file metrics file
   * @param tag default to "HISTOGRAM"
   * @return
   */
  def getHistogram(file: File, tag: String = "HISTOGRAM") = {
    getMetricsContent(file, tag) match {
      case Some((header, content)) => {
        val colums = header.zipWithIndex.map(x => x._1 -> content.map(_.lift(x._2))).toMap
        Some(colums)
      }
      case _ => None
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
148
149
150
151
152
  /**
   * This function parse a metrics file in separated values
   * @param file input metrics file
   * @return (header, content)
   */
153
  def getMetricsContent(file: File, tag: String) = {
154
155
    if (!file.exists) None
    else {
156
      val lines = Source.fromFile(file).getLines().toArray
157

158
      val start = lines.indexWhere(_.startsWith("## " + tag)) + 1
159
160
      val end = lines.indexOf("", start)

Peter van 't Hof's avatar
Peter van 't Hof committed
161
162
163
164
      val header = lines(start).split("\t").toList
      val content = (for (i <- (start + 1) until end) yield {
        lines(i).split("\t").map(v => tryToParseNumber(v, true).getOrElse(v)).toList
      }).toList
165

166
      Some(header, content)
167
    }
168
  }
169
}