CollectMultipleMetrics.scala 6.7 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
/**
2
3
4
5
6
7
8
9
10
11
12
13
14
  * Biopet is built on top of GATK Queue for building bioinformatic
  * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
  * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
  * should also be able to execute Biopet tools and pipelines.
  *
  * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
  *
  * Contact us at: sasc@lumc.nl
  *
  * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
  * license; For commercial users or users who do not want to follow the AGPL
  * license, please contact us to obtain a separate license.
  */
Peter van 't Hof's avatar
Peter van 't Hof committed
15
16
17
18
package nl.lumc.sasc.biopet.extensions.picard

import java.io.File

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.core.Reference
20
import nl.lumc.sasc.biopet.utils.Logging
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.utils.config.Configurable
22
23
import nl.lumc.sasc.biopet.core.summary.{Summarizable, SummaryQScript}
import org.broadinstitute.gatk.utils.commandline.{Argument, Input, Output}
Peter van 't Hof's avatar
Peter van 't Hof committed
24
25

/**
26
27
28
29
30
31
32
33
  * Extension for piacrd's CollectMultipleMetrics tool
  *
  * Created by pjvan_thof on 4/16/15.
  */
class CollectMultipleMetrics(val parent: Configurable)
    extends Picard
    with Summarizable
    with Reference {
Peter van 't Hof's avatar
Peter van 't Hof committed
34
35
  import CollectMultipleMetrics._

Peter van 't Hof's avatar
Peter van 't Hof committed
36
  override def defaultCoreMemory = 8.0
bow's avatar
bow committed
37

Peter van 't Hof's avatar
Peter van 't Hof committed
38
  @Input(doc = "The input SAM or BAM files to analyze", required = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
39
  var input: File = _
Peter van 't Hof's avatar
Peter van 't Hof committed
40

41
  @Input(doc = "The reference file for the bam files.", shortName = "R")
Peter van 't Hof's avatar
Peter van 't Hof committed
42
  var reference: File = _
43

Peter van 't Hof's avatar
Peter van 't Hof committed
44
  @Output(doc = "Base name of output files", required = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
45
  var outputName: File = _
Peter van 't Hof's avatar
Peter van 't Hof committed
46
47

  @Argument(doc = "Base name of output files", required = true)
Peter van 't Hof's avatar
Peter van 't Hof committed
48
49
50
51
52
53
54
  var program: List[Programs.Value] = {
    val value: List[String] = config("metrics_programs")
    value match {
      case Nil => Programs.values.toList
      case list => list.flatMap(x => Programs.values.find(_.toString.toLowerCase == x.toLowerCase))
    }
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
55
56
57
58
59
60
61

  @Argument(doc = "Assume alignment file is sorted by position", required = false)
  var assumeSorted: Boolean = config("assume_sorted", default = false)

  @Argument(doc = "Stop after processing N reads", required = false)
  var stopAfter: Option[Long] = config("stop_after")

62
63
64
  @Output
  private var outputFiles: List[File] = Nil

Peter van 't Hof's avatar
Peter van 't Hof committed
65
  override def beforeGraph(): Unit = {
66
67
    super.beforeGraph()
    if (reference == null) reference = referenceFasta()
Peter van 't Hof's avatar
Peter van 't Hof committed
68
    program.foreach {
Peter van 't Hof's avatar
Peter van 't Hof committed
69
      case Programs.CollectAlignmentSummaryMetrics =>
Peter van 't Hof's avatar
Peter van 't Hof committed
70
        outputFiles :+= new File(outputName + ".alignment_summary_metrics")
Peter van 't Hof's avatar
Peter van 't Hof committed
71
      case Programs.CollectInsertSizeMetrics =>
Peter van 't Hof's avatar
Peter van 't Hof committed
72
        outputFiles :+= new File(outputName + ".insert_size_metrics")
73
        outputFiles :+= new File(outputName + ".insert_size_histogram.pdf")
Peter van 't Hof's avatar
Peter van 't Hof committed
74
      case Programs.QualityScoreDistribution =>
Peter van 't Hof's avatar
Peter van 't Hof committed
75
76
        outputFiles :+= new File(outputName + ".quality_distribution_metrics")
        outputFiles :+= new File(outputName + ".test.quality_distribution.pdf")
Peter van 't Hof's avatar
Peter van 't Hof committed
77
      case Programs.MeanQualityByCycle =>
Peter van 't Hof's avatar
Peter van 't Hof committed
78
79
        outputFiles :+= new File(outputName + ".quality_by_cycle_metrics")
        outputFiles :+= new File(outputName + ".quality_by_cycle.pdf")
Peter van 't Hof's avatar
Peter van 't Hof committed
80
      case Programs.CollectBaseDistributionByCycle =>
Peter van 't Hof's avatar
Peter van 't Hof committed
81
82
        outputFiles :+= new File(outputName + ".base_distribution_by_cycle_metrics")
        outputFiles :+= new File(outputName + ".base_distribution_by_cycle.pdf")
83
      case p => Logging.addError("Program '" + p + "' does not exist for 'CollectMultipleMetrics'")
Peter van 't Hof's avatar
Peter van 't Hof committed
84
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
85
86
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
87
  override def cmdLine: String =
88
89
90
91
92
93
    super.cmdLine +
      required("INPUT=", input, spaceSeparated = false) +
      required("OUTPUT=", outputName, spaceSeparated = false) +
      conditional(assumeSorted, "ASSUME_SORTED=true") +
      optional("STOP_AFTER=", stopAfter, spaceSeparated = false) +
      optional("REFERENCE_SEQUENCE=", reference, spaceSeparated = false) +
Peter van 't Hof's avatar
Peter van 't Hof committed
94
      repeat("PROGRAM=", program.map(_.toString), spaceSeparated = false)
95

Peter van 't Hof's avatar
Peter van 't Hof committed
96
97
98
  override def addToQscriptSummary(qscript: SummaryQScript): Unit = {

    def summarizable(stats: () => Any): Summarizable = new Summarizable {
Peter van 't Hof's avatar
Peter van 't Hof committed
99
      def summaryStats: Any = stats()
Peter van 't Hof's avatar
Peter van 't Hof committed
100
101
102
      def summaryFiles: Map[String, File] = Map()
    }

103
    program
Peter van 't Hof's avatar
Peter van 't Hof committed
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
      .foreach {
        case Programs.CollectAlignmentSummaryMetrics =>
          qscript.addSummarizable(
            summarizable(
              () =>
                Picard.getMetrics(new File(outputName + ".alignment_summary_metrics"),
                  groupBy = Some("CATEGORY"))),
            Programs.CollectAlignmentSummaryMetrics.toString,
            forceSingle = true)
        case Programs.CollectInsertSizeMetrics =>
          qscript.addSummarizable(
            summarizable(
              () =>
                if (new File(outputName + ".insert_size_metrics").exists())
                  Map(
                    "metrics" -> Picard.getMetrics(
                      new File(outputName + ".insert_size_metrics")),
                    "histogram" -> Picard.getHistogram(
                      new File(outputName + ".insert_size_metrics"))
                  )
                else Map()),
            Programs.CollectInsertSizeMetrics.toString,
            forceSingle = true
          )
        case Programs.QualityScoreDistribution =>
          qscript.addSummarizable(
            summarizable(
              () => Picard.getHistogram(new File(outputName + ".quality_distribution_metrics"))),
            Programs.QualityScoreDistribution.toString,
            forceSingle = true)
        case Programs.MeanQualityByCycle =>
          qscript.addSummarizable(
            summarizable(
              () => Picard.getHistogram(new File(outputName + ".quality_by_cycle_metrics"))),
            Programs.MeanQualityByCycle.toString,
            forceSingle = true)
        case Programs.CollectBaseDistributionByCycle =>
          qscript.addSummarizable(
            summarizable(
              () =>
                Picard.getHistogram(new File(outputName + ".base_distribution_by_cycle_metrics"),
                  tag = "METRICS CLASS")),
            Programs.CollectBaseDistributionByCycle.toString,
            forceSingle = true)
        case _ => None
149
150
151
152
      }
  }

  def summaryStats = Map()
153

Peter van 't Hof's avatar
Peter van 't Hof committed
154
  def summaryFiles: Map[String, File] = {
155
156
    program
      .map {
Peter van 't Hof's avatar
Peter van 't Hof committed
157
        case Programs.CollectInsertSizeMetrics =>
158
159
          Map("insert_size_histogram" -> new File(outputName + ".insert_size_histogram.pdf"),
              "insert_size_metrics" -> new File(outputName + ".insert_size_metrics"))
Peter van 't Hof's avatar
Peter van 't Hof committed
160
        case _ => Map()
161
162
      }
      .foldLeft(Map.empty[String, File]) { case (acc, m) => acc ++ m }
163
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
164
165
166
167
}

object CollectMultipleMetrics {
  object Programs extends Enumeration {
168
169
    val CollectAlignmentSummaryMetrics, CollectInsertSizeMetrics, QualityScoreDistribution,
    MeanQualityByCycle, CollectBaseDistributionByCycle = Value
Peter van 't Hof's avatar
Peter van 't Hof committed
170
  }
171
}