CollectMultipleMetrics.scala 5.79 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project that are
 * not part of GATK Queue is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
16
17
18
19
package nl.lumc.sasc.biopet.extensions.picard

import java.io.File

20
import nl.lumc.sasc.biopet.core.{ Reference, BiopetQScript }
Peter van 't Hof's avatar
Peter van 't Hof committed
21
import nl.lumc.sasc.biopet.core.config.Configurable
22
import nl.lumc.sasc.biopet.core.summary.{ Summarizable, SummaryQScript }
Peter van 't Hof's avatar
Peter van 't Hof committed
23
import org.broadinstitute.gatk.utils.commandline.{ Argument, Input, Output }
Peter van 't Hof's avatar
Peter van 't Hof committed
24
25

/**
Peter van 't Hof's avatar
Peter van 't Hof committed
26
27
 * Extension for piacrd's CollectMultipleMetrics tool
 *
Peter van 't Hof's avatar
Peter van 't Hof committed
28
29
 * Created by pjvan_thof on 4/16/15.
 */
30
class CollectMultipleMetrics(val root: Configurable) extends Picard with Summarizable with Reference {
Peter van 't Hof's avatar
Peter van 't Hof committed
31
32
33
34
  import CollectMultipleMetrics._

  javaMainClass = new picard.analysis.CollectMultipleMetrics().getClass.getName

Peter van 't Hof's avatar
Peter van 't Hof committed
35
  override def defaultCoreMemory = 6.0
bow's avatar
bow committed
36

Peter van 't Hof's avatar
Peter van 't Hof committed
37
38
39
  @Input(doc = "The input SAM or BAM files to analyze", required = true)
  var input: File = null

40
41
42
  @Input(doc = "The reference file for the bam files.", shortName = "R")
  var reference: File = null

Peter van 't Hof's avatar
Peter van 't Hof committed
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
  @Output(doc = "Base name of output files", required = true)
  var outputName: File = null

  @Argument(doc = "Base name of output files", required = true)
  var program: List[String] = config("metrics_programs",
    default = Programs.values.iterator.toList.map(_.toString))

  @Argument(doc = "Assume alignment file is sorted by position", required = false)
  var assumeSorted: Boolean = config("assume_sorted", default = false)

  @Argument(doc = "Stop after processing N reads", required = false)
  var stopAfter: Option[Long] = config("stop_after")

  @Output
  protected var outputFiles: List[File] = Nil

Peter van 't Hof's avatar
Peter van 't Hof committed
59
  override def beforeGraph(): Unit = {
60
61
    super.beforeGraph()
    if (reference == null) reference = referenceFasta()
Peter van 't Hof's avatar
Peter van 't Hof committed
62
63
    program.foreach {
      case p if p == Programs.CollectAlignmentSummaryMetrics.toString =>
Peter van 't Hof's avatar
Peter van 't Hof committed
64
        outputFiles :+= new File(outputName + ".alignment_summary_metrics")
Peter van 't Hof's avatar
Peter van 't Hof committed
65
      case p if p == Programs.CollectInsertSizeMetrics.toString =>
Peter van 't Hof's avatar
Peter van 't Hof committed
66
        outputFiles :+= new File(outputName + ".insert_size_metrics")
67
        outputFiles :+= new File(outputName + ".insert_size_histogram.pdf")
Peter van 't Hof's avatar
Peter van 't Hof committed
68
      case p if p == Programs.QualityScoreDistribution.toString =>
Peter van 't Hof's avatar
Peter van 't Hof committed
69
70
        outputFiles :+= new File(outputName + ".quality_distribution_metrics")
        outputFiles :+= new File(outputName + ".test.quality_distribution.pdf")
Peter van 't Hof's avatar
Peter van 't Hof committed
71
      case p if p == Programs.MeanQualityByCycle.toString =>
Peter van 't Hof's avatar
Peter van 't Hof committed
72
73
        outputFiles :+= new File(outputName + ".quality_by_cycle_metrics")
        outputFiles :+= new File(outputName + ".quality_by_cycle.pdf")
Peter van 't Hof's avatar
Peter van 't Hof committed
74
      case p if p == Programs.CollectBaseDistributionByCycle.toString =>
Peter van 't Hof's avatar
Peter van 't Hof committed
75
76
        outputFiles :+= new File(outputName + ".base_distribution_by_cycle_metrics")
        outputFiles :+= new File(outputName + ".base_distribution_by_cycle.pdf")
Peter van 't Hof's avatar
Peter van 't Hof committed
77
78
      case p => BiopetQScript.addError("Program '" + p + "' does not exist for 'CollectMultipleMetrics'")
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
79
80
81
82
83
  }

  override def commandLine = super.commandLine +
    required("INPUT=", input, spaceSeparated = false) +
    required("OUTPUT=", outputName, spaceSeparated = false) +
Peter van 't Hof's avatar
Peter van 't Hof committed
84
85
    conditional(assumeSorted, "ASSUME_SORTED=true") +
    optional("STOP_AFTER=", stopAfter, spaceSeparated = false) +
86
    optional("REFERENCE_SEQUENCE=", reference, spaceSeparated = false) +
Peter van 't Hof's avatar
Peter van 't Hof committed
87
    repeat("PROGRAM=", program, spaceSeparated = false)
88
89
90

  override def addToQscriptSummary(qscript: SummaryQScript, name: String): Unit = {
    program.foreach(p => {
Peter van 't Hof's avatar
Peter van 't Hof committed
91
      val stats: Any = p match {
92
        case _ if p == Programs.CollectAlignmentSummaryMetrics.toString =>
Peter van 't Hof's avatar
Peter van 't Hof committed
93
          Picard.getMetrics(new File(outputName + ".alignment_summary_metrics"), groupBy = Some("CATEGORY"))
94
        case _ if p == Programs.CollectInsertSizeMetrics.toString =>
95
          Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
96
97
            "metrics" -> Picard.getMetrics(new File(outputName + ".insert_size_metrics")),
            "histogram" -> Picard.getHistogram(new File(outputName + ".insert_size_metrics"))
98
          )
99
        case _ if p == Programs.QualityScoreDistribution.toString =>
Peter van 't Hof's avatar
Peter van 't Hof committed
100
          Picard.getHistogram(new File(outputName + ".quality_distribution_metrics"))
101
        case _ if p == Programs.MeanQualityByCycle.toString =>
Peter van 't Hof's avatar
Peter van 't Hof committed
102
          Picard.getHistogram(new File(outputName + ".quality_by_cycle_metrics"))
103
        case _ if p == Programs.CollectBaseDistributionByCycle.toString =>
Peter van 't Hof's avatar
Peter van 't Hof committed
104
          Picard.getHistogram(new File(outputName + ".base_distribution_by_cycle_metrics"), tag = "METRICS CLASS")
Peter van 't Hof's avatar
Peter van 't Hof committed
105
        case _ => None
106
107
      }
      val sum = new Summarizable {
Peter van 't Hof's avatar
Peter van 't Hof committed
108
        override def summaryStats = stats
109
        override def summaryFiles: Map[String, File] = Map()
110
111
112
113
114
115
116
      }
      qscript.addSummarizable(sum, p)
    })

  }

  def summaryStats = Map()
117
118
119
120
121
122
123
124

  def summaryFiles = {
    program.map {
      case p if p == Programs.CollectInsertSizeMetrics.toString =>
        Map(
          "insert_size_histogram" -> new File(outputName + ".insert_size_histogram.pdf"),
          "insert_size_metrics" -> new File(outputName + ".insert_size_metrics"))
      case otherwise => Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
125
    }.foldLeft(Map.empty[String, File]) { case (acc, m) => acc ++ m }
126
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
127
128
129
130
131
132
133
}

object CollectMultipleMetrics {
  object Programs extends Enumeration {
    val CollectAlignmentSummaryMetrics, CollectInsertSizeMetrics, QualityScoreDistribution, MeanQualityByCycle, CollectBaseDistributionByCycle = Value
  }
}