XhmmMethod.scala 5.37 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
/**
 * Biopet is built on top of GATK Queue for building bioinformatic
 * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
 * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
 * should also be able to execute Biopet tools and pipelines.
 *
 * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
 *
 * Contact us at: sasc@lumc.nl
 *
 * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
 * license; For commercial users or users who do not want to follow the AGPL
 * license, please contact us to obtain a separate license.
 */
Sander Bollen's avatar
Sander Bollen committed
15
16
17
18
package nl.lumc.sasc.biopet.pipelines.kopisu.methods

import nl.lumc.sasc.biopet.core.Reference
import nl.lumc.sasc.biopet.extensions.gatk.DepthOfCoverage
19
import nl.lumc.sasc.biopet.extensions.tools.XcnvToBed
Sander Bollen's avatar
Sander Bollen committed
20
21
22
23
import nl.lumc.sasc.biopet.extensions.xhmm._
import nl.lumc.sasc.biopet.utils.config.Configurable

/**
Sander Bollen's avatar
Sander Bollen committed
24
25
 * Created by Sander Bollen on 23-11-16.
 */
Sander Bollen's avatar
Sander Bollen committed
26
27
28
29
class XhmmMethod(val root: Configurable) extends CnvMethod with Reference {

  def name = "xhmm"

Sander Bollen's avatar
Sander Bollen committed
30
  private var targets: File = config("amplicon_bed")
Sander Bollen's avatar
Sander Bollen committed
31
32
  val xhmmDir = new File(outputDir, "xhmm")

Sander Bollen's avatar
Sander Bollen committed
33
34
35
36
37
38
39
40
41
42
43
44
45
46
  override def fixedValues: Map[String, Any] = {
    super.fixedValues ++ Map(
      "depth_of_coverage" -> Map(
        "downsampling_type" -> "BY_SAMPLE",
        "downsample_to_coverage" -> 5000,
        "omit_depth_output_at_each_base" -> true,
        "omit_locus_table" -> true,
        "min_base_quality" -> 0,
        "min_mapping_quality" -> 20,
        "start" -> 1,
        "stop" -> 5000,
        "n_bins" -> 200,
        "include_ref_n_sites" -> true,
        "count_type" -> "COUNT_FRAGMENTS"
Sander Bollen's avatar
Sander Bollen committed
47
      )
Sander Bollen's avatar
Sander Bollen committed
48
    )
Sander Bollen's avatar
Sander Bollen committed
49
50
51
  }

  def biopetScript() = {
Sander Bollen's avatar
Sander Bollen committed
52
    val depths = inputBams.map { keyValuePair =>
Sander Bollen's avatar
Sander Bollen committed
53
      DepthOfCoverage(this, List(keyValuePair._2), swapExt(xhmmDir, keyValuePair._2, ".bam", ".dcov"), List(targets))
Sander Bollen's avatar
Sander Bollen committed
54
    }.toList
Sander Bollen's avatar
Sander Bollen committed
55
56
57
58
59
60
61
62
63
64
65
    addAll(depths)

    // merging of gatk depths files
    val merged = new XhmmMergeGatkDepths(this)
    merged.gatkDepthsFiles = depths.map(_.intervalSummaryFile)
    merged.output = new File(xhmmDir, name + ".depths.data")
    add(merged)

    // the filtered and centered matrix
    val firstMatrix = new XhmmMatrix(this)
    firstMatrix.inputMatrix = merged.output
Sander Bollen's avatar
Sander Bollen committed
66
67
68
    firstMatrix.outputMatrix = swapExt(xhmmDir, merged.output, ".depths.data", ".filtered_centered.data")
    firstMatrix.outputExcludedSamples = Some(swapExt(xhmmDir, merged.output, ".depths.data", ".filtered.samples.txt"))
    firstMatrix.outputExcludedTargets = Some(swapExt(xhmmDir, merged.output, ".depths.data", ".filtered.targets.txt"))
Sander Bollen's avatar
Sander Bollen committed
69
70
71
72
73
    add(firstMatrix)

    // pca generation
    val pca = new XhmmPca(this)
    pca.inputMatrix = firstMatrix.outputMatrix
Sander Bollen's avatar
Sander Bollen committed
74
    pca.pcaFile = swapExt(xhmmDir, firstMatrix.outputMatrix, ".filtered_centered.data", ".rd_pca.data")
Sander Bollen's avatar
Sander Bollen committed
75
76
77
78
79
80
    add(pca)

    // normalization
    val normalize = new XhmmNormalize(this)
    normalize.inputMatrix = firstMatrix.outputMatrix
    normalize.pcaFile = pca.pcaFile
Sander Bollen's avatar
Sander Bollen committed
81
    normalize.normalizeOutput = swapExt(xhmmDir, firstMatrix.outputMatrix, ".filtered_centered.data", ".normalized.data")
Sander Bollen's avatar
Sander Bollen committed
82
83
84
85
86
87
88
89
90
    add(normalize)

    // normalized & filtered matrix
    val secondMatrix = new XhmmMatrix(this)
    secondMatrix.inputMatrix = normalize.normalizeOutput
    secondMatrix.centerData = true
    secondMatrix.centerType = "sample"
    secondMatrix.zScoreData = true
    secondMatrix.maxsdTargetRD = 30
Sander Bollen's avatar
Sander Bollen committed
91
92
93
    secondMatrix.outputExcludedTargets = Some(swapExt(xhmmDir, normalize.normalizeOutput, ".data", ".filtered.targets.txt"))
    secondMatrix.outputExcludedSamples = Some(swapExt(xhmmDir, normalize.normalizeOutput, ".data", ".filtered.samples.txt"))
    secondMatrix.outputMatrix = swapExt(xhmmDir, normalize.normalizeOutput, ".data", "filtered.data")
Sander Bollen's avatar
Sander Bollen committed
94
95
96
97
98
99
100
    add(secondMatrix)

    // re-synced matrix
    val thirdMatrix = new XhmmMatrix(this)
    thirdMatrix.inputMatrix = merged.output
    thirdMatrix.inputExcludeSamples = List(firstMatrix.outputExcludedSamples, secondMatrix.outputExcludedSamples).flatten
    thirdMatrix.inputExcludeTargets = List(firstMatrix.outputExcludedTargets, secondMatrix.outputExcludedTargets).flatten
Sander Bollen's avatar
Sander Bollen committed
101
    thirdMatrix.outputMatrix = swapExt(xhmmDir, merged.output, ".depths.data", ".same_filtered.data")
Sander Bollen's avatar
Sander Bollen committed
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
    add(thirdMatrix)

    // discovering cnvs
    val discover = new XhmmDiscover(this)
    discover.inputMatrix = secondMatrix.outputMatrix
    discover.r = thirdMatrix.outputMatrix
    discover.xhmmAnalysisName = name
    discover.outputXcnv = new File(xhmmDir, "xhmm.xcnv")
    add(discover)
    addSummarizable(discover, "xcnv")

    // generate vcf
    val genotype = new XhmmGenotype(this)
    genotype.inputXcnv = discover.outputXcnv
    genotype.inputMatrix = secondMatrix.outputMatrix
Sander Bollen's avatar
Sander Bollen committed
117
    genotype.r = thirdMatrix.outputMatrix
Sander Bollen's avatar
Sander Bollen committed
118
    genotype.outputVcf = new File(xhmmDir, "xhmm.vcf")
Sander Bollen's avatar
Sander Bollen committed
119
    add(genotype)
Sander Bollen's avatar
Sander Bollen committed
120

121
122
    // create bed files
    val bedDir = new File(xhmmDir, "beds")
Sander Bollen's avatar
Sander Bollen committed
123
    val beds = inputBams.keys.map { x =>
124
125
126
127
128
129
130
131
      val z = new XcnvToBed(this)
      z.inputXcnv = discover.outputXcnv
      z.sample = x
      z.outpuBed = new File(bedDir, s"$x.bed")
      z
    }
    addAll(beds)

Sander Bollen's avatar
Sander Bollen committed
132
133
134
135
136
137
138
    addSummaryJobs()

  }

  private def depthOfCoverage(bamFile: File): DepthOfCoverage = {
    val dp = new DepthOfCoverage(this)
    dp.input_file = List(bamFile)
Sander Bollen's avatar
Sander Bollen committed
139
    dp.intervals = List(targets)
Sander Bollen's avatar
Sander Bollen committed
140
141
142
143
144
    dp.out = swapExt(xhmmDir, bamFile, ".bam", ".dcov")
    dp
  }

}