Measurement.scala 3.47 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
/**
2
3
4
5
6
7
8
9
10
11
12
13
14
  * Biopet is built on top of GATK Queue for building bioinformatic
  * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
  * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
  * should also be able to execute Biopet tools and pipelines.
  *
  * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
  *
  * Contact us at: sasc@lumc.nl
  *
  * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
  * license; For commercial users or users who do not want to follow the AGPL
  * license, please contact us to obtain a separate license.
  */
Peter van 't Hof's avatar
Peter van 't Hof committed
15
16
17
18
package nl.lumc.sasc.biopet.pipelines.gentrap.measures

import nl.lumc.sasc.biopet.core.Reference
import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.extensions.tools.MergeTables
Peter van 't Hof's avatar
Peter van 't Hof committed
20
import nl.lumc.sasc.biopet.pipelines.gentrap.scripts.PlotHeatmap
Peter van 't Hof's avatar
Peter van 't Hof committed
21
22
23
import org.broadinstitute.gatk.queue.QScript

/**
24
25
  * Created by pjvan_thof on 1/12/16.
  */
26
27
trait Measurement extends SummaryQScript with Reference { qscript: QScript =>
  protected var bamFiles: Map[String, File] = Map()
Peter van 't Hof's avatar
Peter van 't Hof committed
28

Peter van 't Hof's avatar
Peter van 't Hof committed
29
  /**
30
31
32
33
34
    * Method to add a bamFile to the pipeline
    *
    * @param id Unique id used for this bam file, most likely to be a sampleName
    * @param file Location of the bam file
    */
Peter van 't Hof's avatar
Peter van 't Hof committed
35
36
37
38
  def addBamfile(id: String, file: File): Unit = {
    require(!bamFiles.contains(id), s"'$id' already exist")
    bamFiles += id -> file
  }
39

Peter van 't Hof's avatar
Peter van 't Hof committed
40
  /** Name of job, this is used as prefix for most of the files */
41
42
  def name: String = this.getClass.getSimpleName.toLowerCase

Peter van 't Hof's avatar
Peter van 't Hof committed
43
  /** Class to store args for MergeTables */
44
45
46
47
  case class MergeArgs(idCols: List[Int],
                       valCol: Int,
                       numHeaderLines: Int = 0,
                       fallback: String = "-")
Peter van 't Hof's avatar
Peter van 't Hof committed
48

Peter van 't Hof's avatar
Peter van 't Hof committed
49
  /** This should contain the args for MergeTables */
Peter van 't Hof's avatar
Peter van 't Hof committed
50
51
  def mergeArgs: MergeArgs

52
53
54
55
56
  /** Init for pipeline */
  def init(): Unit = {
    require(bamFiles.nonEmpty)
  }

pjvan_thof's avatar
pjvan_thof committed
57
  lazy val mergeCountFiles: Boolean = config("merge_count_files", default = bamFiles.size > 1)
58

59
60
  private var extraSummaryFiles: Map[String, File] = Map()

61
62
  def addMergeTableJob(countFiles: List[File],
                       outputFile: File,
63
                       name: String,
64
                       fileExtension: String,
65
                       args: MergeArgs = mergeArgs): Unit = {
66
    if (mergeCountFiles) {
67
68
69
70
71
72
73
74
75
      add(
        MergeTables(this,
                    countFiles,
                    outputFile,
                    args.idCols,
                    args.valCol,
                    args.numHeaderLines,
                    args.fallback,
                    fileExtension = Some(fileExtension)))
76
77
      extraSummaryFiles += s"${name}_table" -> outputFile
    }
78
  }
79

80
81
82
83
  def addHeatmapJob(countTable: File,
                    outputFile: File,
                    name: String,
                    countType: Option[String] = None): Unit = {
84
85
86
87
88
89
90
91
    if (mergeCountFiles) {
      val job = new PlotHeatmap(qscript)
      job.input = countTable
      job.output = outputFile
      job.countType = countType
      add(job)
      extraSummaryFiles += s"${name}_heatmap" -> outputFile
    }
92
93
94
95
96
97
  }

  /** Must return a map with used settings for this pipeline */
  def summarySettings: Map[String, Any] = Map()

  /** File to put in the summary for thie pipeline */
98
99
100
  def summaryFiles: Map[String, File] = extraSummaryFiles ++ bamFiles.map {
    case (id, file) => s"input_bam_$id" -> file
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
101
}