XhmmMethod.scala 4.78 KB
Newer Older
Sander Bollen's avatar
Sander Bollen committed
1
2
3
4
package nl.lumc.sasc.biopet.pipelines.kopisu.methods

import nl.lumc.sasc.biopet.core.Reference
import nl.lumc.sasc.biopet.extensions.gatk.DepthOfCoverage
5
import nl.lumc.sasc.biopet.extensions.tools.XcnvToBed
Sander Bollen's avatar
Sander Bollen committed
6
7
8
9
import nl.lumc.sasc.biopet.extensions.xhmm._
import nl.lumc.sasc.biopet.utils.config.Configurable

/**
Sander Bollen's avatar
Sander Bollen committed
10
11
 * Created by Sander Bollen on 23-11-16.
 */
Sander Bollen's avatar
Sander Bollen committed
12
13
14
15
class XhmmMethod(val root: Configurable) extends CnvMethod with Reference {

  def name = "xhmm"

Sander Bollen's avatar
Sander Bollen committed
16
  private var targets: File = config("amplicon_bed")
Sander Bollen's avatar
Sander Bollen committed
17
18
  val xhmmDir = new File(outputDir, "xhmm")

Sander Bollen's avatar
Sander Bollen committed
19
20
21
22
23
24
25
26
27
28
29
30
31
32
  override def fixedValues: Map[String, Any] = {
    super.fixedValues ++ Map(
      "depth_of_coverage" -> Map(
        "downsampling_type" -> "BY_SAMPLE",
        "downsample_to_coverage" -> 5000,
        "omit_depth_output_at_each_base" -> true,
        "omit_locus_table" -> true,
        "min_base_quality" -> 0,
        "min_mapping_quality" -> 20,
        "start" -> 1,
        "stop" -> 5000,
        "n_bins" -> 200,
        "include_ref_n_sites" -> true,
        "count_type" -> "COUNT_FRAGMENTS"
Sander Bollen's avatar
Sander Bollen committed
33
      )
Sander Bollen's avatar
Sander Bollen committed
34
    )
Sander Bollen's avatar
Sander Bollen committed
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
  }

  def biopetScript() = {
    val depths = inputBams.map(keyValuePair => depthOfCoverage(keyValuePair._2)).toList
    addAll(depths)

    // merging of gatk depths files
    val merged = new XhmmMergeGatkDepths(this)
    merged.gatkDepthsFiles = depths.map(_.intervalSummaryFile)
    merged.output = new File(xhmmDir, name + ".depths.data")
    add(merged)

    // the filtered and centered matrix
    val firstMatrix = new XhmmMatrix(this)
    firstMatrix.inputMatrix = merged.output
    firstMatrix.outputMatrix = swapExt(merged.output, ".depths.data", ".filtered_centered.data")
    firstMatrix.minTargetSize = 10
    firstMatrix.maxTargetSize = 10000
    firstMatrix.minMeanTargetRD = 10
    firstMatrix.maxMeanTargetRD = 500
    firstMatrix.minMeanSampleRD = 25
    firstMatrix.maxMeanSampleRD = 200
    firstMatrix.maxSdSampleRD = 150
    firstMatrix.outputExcludedSamples = Some(swapExt(merged.output, ".depths.data", ".filtered.samples.txt"))
    firstMatrix.outputExcludedTargets = Some(swapExt(merged.output, ".depths.data", ".filtered.targets.txt"))
    add(firstMatrix)

    // pca generation
    val pca = new XhmmPca(this)
    pca.inputMatrix = firstMatrix.outputMatrix
    pca.pcaFile = swapExt(firstMatrix.outputMatrix, ".filtered_centered.data", ".rd_pca.data")
    add(pca)

    // normalization
    val normalize = new XhmmNormalize(this)
    normalize.inputMatrix = firstMatrix.outputMatrix
    normalize.pcaFile = pca.pcaFile
    normalize.normalizeOutput = swapExt(firstMatrix.outputMatrix, ".filtered_centered.data", ".normalized.data")
    add(normalize)

    // normalized & filtered matrix
    val secondMatrix = new XhmmMatrix(this)
    secondMatrix.inputMatrix = normalize.normalizeOutput
    secondMatrix.centerData = true
    secondMatrix.centerType = "sample"
    secondMatrix.zScoreData = true
    secondMatrix.maxsdTargetRD = 30
    secondMatrix.outputExcludedTargets = Some(swapExt(normalize.normalizeOutput, ".data", ".filtered.targets.txt"))
    secondMatrix.outputExcludedSamples = Some(swapExt(normalize.normalizeOutput, ".data", ".filtered.samples.txt"))
    secondMatrix.outputMatrix = swapExt(normalize.normalizeOutput, ".data", "filtered.data")
    add(secondMatrix)

    // re-synced matrix
    val thirdMatrix = new XhmmMatrix(this)
    thirdMatrix.inputMatrix = merged.output
    thirdMatrix.inputExcludeSamples = List(firstMatrix.outputExcludedSamples, secondMatrix.outputExcludedSamples).flatten
    thirdMatrix.inputExcludeTargets = List(firstMatrix.outputExcludedTargets, secondMatrix.outputExcludedTargets).flatten
    thirdMatrix.outputMatrix = swapExt(merged.output, ".depths.data", ".same_filtered.data")
    add(thirdMatrix)

    // discovering cnvs
    val discover = new XhmmDiscover(this)
    discover.inputMatrix = secondMatrix.outputMatrix
    discover.r = thirdMatrix.outputMatrix
    discover.xhmmAnalysisName = name
    discover.outputXcnv = new File(xhmmDir, "xhmm.xcnv")
    add(discover)
    addSummarizable(discover, "xcnv")

    // generate vcf
    val genotype = new XhmmGenotype(this)
    genotype.inputXcnv = discover.outputXcnv
    genotype.inputMatrix = secondMatrix.outputMatrix
Sander Bollen's avatar
Sander Bollen committed
108
    genotype.r = thirdMatrix.outputMatrix
Sander Bollen's avatar
Sander Bollen committed
109
    genotype.outputVcf = new File(xhmmDir, "xhmm.vcf")
Sander Bollen's avatar
Sander Bollen committed
110
    add(genotype)
Sander Bollen's avatar
Sander Bollen committed
111

112
113
    // create bed files
    val bedDir = new File(xhmmDir, "beds")
Sander Bollen's avatar
Sander Bollen committed
114
    val beds = inputBams.keys.map { x =>
115
116
117
118
119
120
121
122
      val z = new XcnvToBed(this)
      z.inputXcnv = discover.outputXcnv
      z.sample = x
      z.outpuBed = new File(bedDir, s"$x.bed")
      z
    }
    addAll(beds)

Sander Bollen's avatar
Sander Bollen committed
123
124
125
126
127
128
129
    addSummaryJobs()

  }

  private def depthOfCoverage(bamFile: File): DepthOfCoverage = {
    val dp = new DepthOfCoverage(this)
    dp.input_file = List(bamFile)
Sander Bollen's avatar
Sander Bollen committed
130
    dp.intervals = List(targets)
Sander Bollen's avatar
Sander Bollen committed
131
132
133
134
135
    dp.out = swapExt(xhmmDir, bamFile, ".bam", ".dcov")
    dp
  }

}