Tarmac.scala 24.4 KB
Newer Older
Sander Bollen's avatar
Sander Bollen committed
1
2
3
package nl.lumc.sasc.biopet.pipelines.tarmac

import nl.lumc.sasc.biopet.core.summary.SummaryQScript
Sander Bollen's avatar
Sander Bollen committed
4
import nl.lumc.sasc.biopet.core._
Bollen's avatar
Bollen committed
5
import nl.lumc.sasc.biopet.extensions.bedtools.{BedtoolsIntersect, BedtoolsSort}
6
import nl.lumc.sasc.biopet.extensions.gatk.DepthOfCoverage
Bollen's avatar
Bollen committed
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import nl.lumc.sasc.biopet.extensions.stouffbed.{StouffbedHorizontal, StouffbedVertical}
import nl.lumc.sasc.biopet.extensions.wisecondor.{
  WisecondorCount,
  WisecondorGcCorrect,
  WisecondorNewRef,
  WisecondorZscore
}
import nl.lumc.sasc.biopet.extensions.xhmm.{
  XhmmMatrix,
  XhmmMergeGatkDepths,
  XhmmNormalize,
  XhmmPca
}
import nl.lumc.sasc.biopet.extensions.{Bgzip, Ln, Tabix}
Sander Bollen's avatar
Sander Bollen committed
21
22
23
24
25
26
import nl.lumc.sasc.biopet.pipelines.tarmac.scripts.{
  BedThreshold,
  FindAllCommon,
  SampleFromMatrix,
  TarmacPlot
}
27
import nl.lumc.sasc.biopet.utils.Logging
Sander Bollen's avatar
Sander Bollen committed
28
29
import nl.lumc.sasc.biopet.utils.config.Configurable
import org.broadinstitute.gatk.queue.QScript
30
import org.broadinstitute.gatk.queue.function.QFunction
Sander Bollen's avatar
Sander Bollen committed
31

32
import scalaz.{-\/, \/, \/-}
33

Sander Bollen's avatar
Sander Bollen committed
34
/**
35
36
37
38
39
40
41
  * Created by Sander Bollen on 23-3-17.
  */
class Tarmac(val parent: Configurable)
    extends QScript
    with PedigreeQscript
    with SummaryQScript
    with Reference { qscript =>
42

Sander Bollen's avatar
Sander Bollen committed
43
  lazy val targets: File = config("targets")
Sander Bollen's avatar
Sander Bollen committed
44
  lazy val stouffWindowSizes: List[Int] = config("stouff_window_size")
Sander Bollen's avatar
Sander Bollen committed
45
  lazy val threshold: Int = config("threshold")
Sander Bollen's avatar
Sander Bollen committed
46

Sander Bollen's avatar
Sander Bollen committed
47
48
  def this() = this(null)

Sander Bollen's avatar
Sander Bollen committed
49
50
51
  private var _finalFiles: Map[Sample, List[File]] = Map()
  def finalFiles: Map[Sample, List[File]] = _finalFiles

Sander Bollen's avatar
Sander Bollen committed
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
  /* Fixed values for xhmm count file */
  override def fixedValues: Map[String, Any] = {
    super.fixedValues ++ Map(
      "depth_of_coverage" -> Map(
        "downsampling_type" -> "BY_SAMPLE",
        "downsample_to_coverage" -> 5000,
        "omit_depth_output_at_each_base" -> true,
        "omit_locus_table" -> true,
        "min_base_quality" -> 0,
        "min_mapping_quality" -> 20,
        "start" -> 1,
        "stop" -> 5000,
        "n_bins" -> 200,
        "include_ref_n_sites" -> true,
        "count_type" -> "COUNT_FRAGMENTS"
      )
    )
  }

71
  def init(): Unit = {}
Sander Bollen's avatar
Sander Bollen committed
72

73
  def biopetScript(): Unit = {
74
75
76
77
    addSamplesJobs()
    addSummaryJobs()
  }

78
  def addMultiSampleJobs(): Unit = {
79
80
81
    val initRefMap = samples map {
      case (sampleName, sample) => sample -> getReferenceSamplesForSample(sampleName)
    }
82
83
84
85
86
87
88
89
    initRefMap.values.collect { case -\/(error) => error }.foreach(Logging.addError(_))

    val refMap: Map[Sample, Set[Sample]] = initRefMap.collect {
      case (sample, \/-(sampleSet)) =>
        val actualRefSamples = sampleSet map { sampleId =>
          samples.getOrElse(sampleId, Logging.addError(s"Sample $sampleId does not exist"))
        } collect { case s: Sample => s }
        sample -> actualRefSamples
Sander Bollen's avatar
Sander Bollen committed
90
    }
Sander Bollen's avatar
Sander Bollen committed
91

Sander Bollen's avatar
Sander Bollen committed
92
93
    val wisecondorRefJobs = refMap map {
      case (sample, refSamples) =>
94
        val refDir = new File(sample.wisecondorDir, "wisecondor_reference")
Sander Bollen's avatar
Sander Bollen committed
95
        sample -> createWisecondorReferenceJobs(refSamples, refDir)
Sander Bollen's avatar
Sander Bollen committed
96
97
    }

Sander Bollen's avatar
Sander Bollen committed
98
99
    val xhmmRefJobs = refMap map {
      case (sample, refSamples) =>
100
        val refDir = new File(sample.xhmmDir, "xhmm_reference")
Sander Bollen's avatar
Sander Bollen committed
101
        sample -> createXhmmReferenceJobs(sample, refSamples, refDir)
Sander Bollen's avatar
Sander Bollen committed
102
103
    }

Sander Bollen's avatar
Sander Bollen committed
104
105
106
107
108
109
110
111
112
113
114
115
116
    val wisecondorZJobs = wisecondorRefJobs map {
      case (sample, jobsAndRefFile) =>
        val refJobs = jobsAndRefFile._1
        val refFile = jobsAndRefFile._2
        val tbiFile = refJobs.collect { case tbi: Tabix => tbi.outputIndex }.head
        sample -> createWisecondorZScore(sample, refFile, tbiFile)
    }

    val xhmmZJobs = xhmmRefJobs map {
      case (sample, jobsAndRefFile) =>
        sample -> createXhmmZscore(sample, jobsAndRefFile._2)
    }

Sander Bollen's avatar
Sander Bollen committed
117
118
119
120
121
    val wisecondorSyncJobs = wisecondorZJobs map {
      case (sample, job) =>
        val intersect = new BedtoolsIntersect(this)
        intersect.input = job.output
        intersect.intersectFile = xhmmZJobs(sample)._2
122
123
        intersect.output =
          new File(sample.wisecondorDir, s"${sample.sampleId}.wisecondor.sync.z.bed")
Sander Bollen's avatar
Sander Bollen committed
124
125
126
127
128
129
130
131
        sample -> intersect
    }

    val xhmmSyncJobs = xhmmZJobs map {
      case (sample, jobsAndFile) =>
        val intersect = new BedtoolsIntersect(this)
        intersect.input = jobsAndFile._2
        intersect.intersectFile = wisecondorZJobs(sample).output
132
        intersect.output = new File(sample.xhmmDir, s"${sample.sampleId}.xhmm.sync.z.bed")
Sander Bollen's avatar
Sander Bollen committed
133
134
135
        sample -> intersect
    }

Sander Bollen's avatar
Sander Bollen committed
136
137
138
139
140
141
142
143
144
145
146
147
    val zScoreMergeJobs = samples map {
      case (_, sample) =>
        val horizontal = new StouffbedHorizontal(this)
        val inputs = List(
          wisecondorSyncJobs(sample).output,
          xhmmSyncJobs(sample).output
        )
        horizontal.inputFiles = inputs
        horizontal.output = new File(sample.sampleDir, s"${sample.sampleId}.horizontal.bed")
        sample -> horizontal
    }

Sander Bollen's avatar
Sander Bollen committed
148
149
150
151
152
153
154
155
156
157
158
    val recessiveJobs = zScoreMergeJobs
      .filter(x => getChildren.map(_.individualId).contains(x._1.sampleId))
      .flatMap {
        case (sample, job) =>
          val parents = findParentHorizontals(sample, zScoreMergeJobs)
          val parentsAndKid = job :: parents
          val syncJobs = parentsAndKid.map { x =>
            val syncer = new FindAllCommon(this)
            val remainder = (parentsAndKid.toSet - x).toList
            syncer.inputFile = x.output
            syncer.databases = remainder.map(_.output)
159
160
161
162
163
            syncer.output = Some(
              swapExt(x.output.getParentFile,
                      x.output,
                      ".bed",
                      s"${sample.family.getOrElse("unknown")}.family_common.bed"))
Sander Bollen's avatar
Sander Bollen committed
164
165
            syncer
          }
166
167
168
169
170
171
172
173
174
175
176

          val horizontalJob = new StouffbedHorizontal(this)
          horizontalJob.inputFiles = syncJobs.flatMap(_.output)
          horizontalJob.output = new File(sample.sampleDir, s"${sample.sampleId}.recessives.bed")

          val verticalsAndThresholds = stouffWindowSizes map { size =>
            val windowDir = new File(sample.sampleDir, s"window_$size")
            val vertical = new StouffbedVertical(this)
            vertical.inputFiles = List(horizontalJob.output)
            vertical.output =
              new File(windowDir, s"${sample.sampleId}.window_$size.recessive.z.bed")
177
            vertical.windowSize = size
Sander Bollen's avatar
Sander Bollen committed
178
179
180
181
            val thresholder = new BedThreshold(this)
            thresholder.input = vertical.output
            thresholder.threshold = threshold
            thresholder.output =
182
              Some(new File(windowDir, s"${sample.sampleId}.recessive.treshold.bed"))
Sander Bollen's avatar
Sander Bollen committed
183
            vertical :: thresholder :: Nil
Sander Bollen's avatar
Sander Bollen committed
184
185
          }
          val verticalAndThresholdJobs: List[BiopetCommandLineFunction] =
186
            verticalsAndThresholds.flatten
Sander Bollen's avatar
Sander Bollen committed
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228

          // single-parent method

          val singleParentJobs = parents.flatMap { x =>
            val parentName = x.output.getName.split('.').head
            val parentSync = new FindAllCommon(this)
            val childSync = new FindAllCommon(this)
            parentSync.inputFile = x.output
            parentSync.databases = List(job.output)
            parentSync.output = Some(
              swapExt(job.output.getParentFile, x.output, ".bed", ".parent.common.bed")
            )
            childSync.inputFile = job.output
            childSync.databases = List(x.output)
            childSync.output = Some(
              swapExt(job.output.getParentFile, x.output, ".bed", ".child.common.bed")
            )

            val singleParHorizontal = new StouffbedHorizontal(this)
            singleParHorizontal.inputFiles = (parentSync.output :: childSync.output :: Nil).flatten
            singleParHorizontal.output =
              swapExt(job.output.getParentFile, x.output, ".bed", ".single-parent-horizontal.bed")

            val singleParVerticals = stouffWindowSizes flatMap { size =>
              val windowDir = new File(sample.sampleDir, s"window_$size")
              val vertical = new StouffbedVertical(this)
              vertical.inputFiles = List(singleParHorizontal.output)
              vertical.output =
                new File(windowDir, s"${sample.sampleId}.shared_with_parent_$parentName.z.bed")
              vertical.windowSize = size
              val thresholder = new BedThreshold(this)
              thresholder.input = vertical.output
              thresholder.threshold = threshold
              thresholder.output = Some(
                new File(windowDir,
                         s"${sample.sampleId}.shared_with_parent_$parentName.threshold.bed"))
              vertical :: thresholder :: Nil
            }
            parentSync :: childSync :: singleParHorizontal :: singleParVerticals ::: Nil
          }

          horizontalJob :: verticalAndThresholdJobs ::: syncJobs ::: singleParentJobs
Sander Bollen's avatar
Sander Bollen committed
229
230
      }

Sander Bollen's avatar
Sander Bollen committed
231
232
233
234
235
236
237
238
239
240
241
242
243
    val windowStouffJobs = zScoreMergeJobs map {
      case (sample, horizontal) =>
        val subMap = stouffWindowSizes map { size =>
          val windowDir = new File(sample.sampleDir, s"window_$size")
          val vertical = new StouffbedVertical(this)
          vertical.inputFiles = List(horizontal.output)
          vertical.output = new File(windowDir, s"${sample.sampleId}.window_$size.z.bed")
          vertical.windowSize = size
          size -> vertical
        }
        sample -> subMap.toMap
    }

Sander Bollen's avatar
Sander Bollen committed
244
245
246
247
248
249
250
    val thresholdJobs = windowStouffJobs map {
      case (sample, subMap) =>
        val threshSubMap = subMap map {
          case (size, job) =>
            val windowDir = new File(sample.sampleDir, s"window_$size")
            val thresholder = new BedThreshold(this)
            thresholder.input = job.output
Sander Bollen's avatar
Sander Bollen committed
251
            thresholder.output = Some(new File(windowDir, s"${sample.sampleId}.threshold.bed"))
Sander Bollen's avatar
Sander Bollen committed
252
            thresholder.threshold = threshold
Sander Bollen's avatar
Sander Bollen committed
253
            size -> thresholder
Sander Bollen's avatar
Sander Bollen committed
254
255
256
257
258
259
        }
        sample -> threshSubMap
    }

    _finalFiles = thresholdJobs map {
      case (sample, subMap) =>
Sander Bollen's avatar
Sander Bollen committed
260
        sample -> subMap.values.flatMap(_.output).toList
Sander Bollen's avatar
Sander Bollen committed
261
262
    }

Sander Bollen's avatar
Sander Bollen committed
263
264
265
266
267
268
    addAll(xhmmRefJobs.values.flatMap(_._1))
    addAll(wisecondorRefJobs.values.flatMap(_._1))
    addAll(xhmmZJobs.values.flatMap(_._1))
    addAll(wisecondorZJobs.values)
    addAll(wisecondorSyncJobs.values)
    addAll(xhmmSyncJobs.values)
Sander Bollen's avatar
Sander Bollen committed
269
    addAll(zScoreMergeJobs.values)
Sander Bollen's avatar
Sander Bollen committed
270
271
    addAll(windowStouffJobs.values.flatMap(_.values))
    addAll(thresholdJobs.values.flatMap(_.values))
Sander Bollen's avatar
Sander Bollen committed
272
    addAll(recessiveJobs)
Sander Bollen's avatar
Sander Bollen committed
273
274
275
276
277

    val xhmmZGzip = xhmmZJobs map {
      case (sample, jobAndRefFile) =>
        val sort = sortBgzipAndTabix(jobAndRefFile._2)
        val pipeJob = new BiopetFifoPipe(this, sort.sortJob :: sort.bgzipJob :: Nil)
Bollen's avatar
Bollen committed
278
        add(sort.tabixJob, pipeJob)
Sander Bollen's avatar
Sander Bollen committed
279
280
281
282
283
284
285
        sample -> sort
    }

    val wisecondorZGzip = wisecondorZJobs map {
      case (sample, job) =>
        val sort = sortBgzipAndTabix(job.output)
        val pipeJob = new BiopetFifoPipe(this, sort.sortJob :: sort.bgzipJob :: Nil)
Sander Bollen's avatar
fix add    
Sander Bollen committed
286
        add(sort.tabixJob, pipeJob)
Sander Bollen's avatar
Sander Bollen committed
287
288
289
290
291
292
293
294
295
        sample -> sort
    }

    val stouffGzip = windowStouffJobs map {
      case (sample, subMap) =>
        val nMap = subMap map {
          case (window, stouffJob) =>
            val sort = sortBgzipAndTabix(stouffJob.output)
            val pipeJob = new BiopetFifoPipe(this, sort.sortJob :: sort.bgzipJob :: Nil)
Bollen's avatar
Bollen committed
296
            add(pipeJob, sort.tabixJob)
Sander Bollen's avatar
Sander Bollen committed
297
298
299
300
301
302
303
304
305
306
307
308
            window -> sort
        }
        sample -> nMap
    }

    thresholdJobs foreach {
      case (sample, subMap) =>
        subMap foreach {
          case (window, threshJob) =>
            val imageDir = new File(threshJob.output.get.getParentFile, "plots")
            val sort = sortBgzipAndTabix(threshJob.output.get)
            val pipeJob = new BiopetFifoPipe(this, sort.sortJob :: sort.bgzipJob :: Nil)
Bollen's avatar
Bollen committed
309
            add(pipeJob, sort.tabixJob)
Sander Bollen's avatar
Sander Bollen committed
310
311
312
313
314
            val xhmmZ = xhmmZGzip(sample)
            val wiseZ = wisecondorZGzip(sample)
            val stouff = stouffGzip(sample)(window)
            val plotter = new TarmacPlot(this)
            plotter.outputDir = imageDir
315
            plotter.callFile = sort.bgzipJob.output
Sander Bollen's avatar
Sander Bollen committed
316
317
318
            plotter.stouffFile = stouff.bgzipJob.output
            plotter.xhmmFile = xhmmZ.bgzipJob.output
            plotter.wisecondorFile = wiseZ.bgzipJob.output
319
            plotter.deps ++= sort.tabixJob.outputIndex :: xhmmZ.tabixJob.outputIndex :: wiseZ.tabixJob.outputIndex :: stouff.tabixJob.outputIndex :: Nil
Sander Bollen's avatar
Sander Bollen committed
320
321
322
323
324
            add(plotter)
        }
    }
  }

Sander Bollen's avatar
Sander Bollen committed
325
326
327
328
329
330
331
332
333
334
335
336
337
338
  def findParentHorizontals(
      sample: Sample,
      sampleHorizontalMap: Map[Sample, StouffbedHorizontal]): List[StouffbedHorizontal] = {
    val sampleIdMap = sampleHorizontalMap.map { case (k, v) => k.sampleId -> v }
    (sample.mother, sample.father) match {
      case (Some(m), Some(f)) =>
        (sampleIdMap.get(m), sampleIdMap.get(f)) match {
          case (Some(one), Some(two)) => List(one, two)
          case _ => Nil
        }
      case _ => Nil
    }
  }

Sander Bollen's avatar
Sander Bollen committed
339
340
341
342
343
344
345
346
347
348
349
350
351
352
  case class SortBgzipTabix(sortJob: BedtoolsSort, bgzipJob: Bgzip, tabixJob: Tabix)

  def sortBgzipAndTabix(file: File): SortBgzipTabix = {
    val sorter = new BedtoolsSort(this)
    sorter.input = file
    sorter.isIntermediate = true
    val sortFile = swapExt(file.getParentFile, file, ".bed", ".sorted.bed")
    sorter.output = sortFile
    val gzFile = swapExt(file.getParentFile, file, ".bed", ".sorted.bed.gz")
    val bgzip = new Bgzip(this)
    bgzip.input = List(sortFile)
    bgzip.output = gzFile
    val tbi = Tabix(this, gzFile)
    SortBgzipTabix(sorter, bgzip, tbi)
Sander Bollen's avatar
Sander Bollen committed
353
  }
354
355

  /**
356
357
358
359
360
361
362
    * Get set of sample names constituting reference samples for a given sample name
    *
    * Reference samples must match the own gender, while excluding own parents (if any) and self
    *
    * @param sampleName: The sample name to create reference set for
    * @return
    */
363
364
365
366
  def getReferenceSamplesForSample(sampleName: String): String \/ Set[String] = {
    val allSampleNames = pedSamples.map(_.individualId)
    if (!allSampleNames.toSet.contains(sampleName)) {
      -\/(s"Sample $sampleName does not exist in PED samples")
Sander Bollen's avatar
Sander Bollen committed
367
    } else {
368
369
370
371
      val theSample = pedSamples(allSampleNames.indexOf(sampleName))
      val totalSet = pedSamples.filter(p => p.gender == theSample.gender).map(_.individualId).toSet
      val referenceSet = (theSample.maternalId, theSample.paternalId) match {
        case (Some(m), Some(f)) => totalSet - (m, f, sampleName)
372
373
374
        case (None, Some(f)) => totalSet - (f, sampleName)
        case (Some(m), None) => totalSet - (m, sampleName)
        case _ => totalSet - sampleName
375
376
377
378
      }
      \/-(referenceSet)
    }
  }
Sander Bollen's avatar
Sander Bollen committed
379

Sander Bollen's avatar
Sander Bollen committed
380
381
382
383
  /*
  Create jobs for Xhmm reference creation
  Returns both jobs and the resulting reference file
   */
384
385
  def createXhmmReferenceJobs(sample: Sample,
                              referenceSamples: Set[Sample],
386
                              outputDirectory: File): (List[QFunction], File) = {
Sander Bollen's avatar
Sander Bollen committed
387
388
389
    /* XHMM requires refset including self */
    val totalSet = referenceSamples + sample
    val merger = new XhmmMergeGatkDepths(this)
390
391
    merger.gatkDepthsFiles =
      totalSet.map(_.outputXhmmCountFile).collect { case \/-(file) => file }.toList
Sander Bollen's avatar
Sander Bollen committed
392
393
394
    val refFile = new File(outputDirectory, "reference.matrix")
    merger.output = refFile
    (List(merger), refFile)
Sander Bollen's avatar
Sander Bollen committed
395
396
  }

Sander Bollen's avatar
Sander Bollen committed
397
398
399
400
  /*
  Create jobs for wisecondor reference creation
  Returns both jobs and the resulting reference file
   */
401
402
403
404
  def createWisecondorReferenceJobs(referenceSamples: Set[Sample],
                                    outputDirectory: File): (List[QFunction], File) = {
    val gccs =
      referenceSamples.map(_.outputWisecondorGccFile).collect { case \/-(file) => file }.toList
Sander Bollen's avatar
Sander Bollen committed
405
    val reference = new WisecondorNewRef(this)
406
    reference.inputBeds = gccs
Sander Bollen's avatar
Sander Bollen committed
407
408
    reference.output = new File(outputDirectory, "reference.bed")
    reference.isIntermediate = true
409
410
411
412
413
414
415
416
417
    val sort = new BedtoolsSort(this)
    sort.input = reference.output
    sort.output = new File(outputDirectory, "reference.sorted.bed")
    sort.isIntermediate = true
    val refFile = new File(outputDirectory, "reference.bed.gz")
    val gzipRef = new Bgzip(this) // FIXME change to pipe with sort
    gzipRef.input = List(sort.output)
    gzipRef.output = refFile
    val tabix = Tabix(this, refFile)
Sander Bollen's avatar
Sander Bollen committed
418
    (List(new BiopetFifoPipe(this, reference :: sort :: gzipRef :: Nil), tabix), refFile)
Sander Bollen's avatar
Sander Bollen committed
419
420
  }

421
422
423
  def createWisecondorZScore(sample: Sample,
                             referenceFile: File,
                             tbiFile: File): WisecondorZscore = {
Sander Bollen's avatar
Sander Bollen committed
424
    val zscore = new WisecondorZscore(this)
425
    val zFile = new File(sample.wisecondorDir, s"${sample.sampleId}.z.bed")
Sander Bollen's avatar
Sander Bollen committed
426
427
428
429
430
431
432
433
    zscore.inputBed = sample.outputWisecondorGzFile.getOrElse(new File(""))
    zscore.deps ++= sample.outputWisecondorTbiFile.toList
    zscore.deps :+= tbiFile
    zscore.referenceDictionary = referenceFile
    zscore.output = zFile
    zscore
  }

434
  def createXhmmZscore(sample: Sample, referenceMatrix: File): (List[QFunction], File) = {
Sander Bollen's avatar
Sander Bollen committed
435
436
437
438

    // the filtered and centered matrix
    val filtMatrix = new XhmmMatrix(this)
    filtMatrix.inputMatrix = referenceMatrix
439
440
441
442
443
444
    filtMatrix.outputMatrix =
      new File(sample.xhmmDir, s"${sample.sampleId}.filtered-centered.matrix")
    filtMatrix.outputExcludedSamples = Some(
      new File(sample.xhmmDir, s"${sample.sampleId}.filtered-samples.txt"))
    filtMatrix.outputExcludedTargets = Some(
      new File(sample.xhmmDir, s"${sample.sampleId}.filtered-targets.txt"))
Sander Bollen's avatar
Sander Bollen committed
445
446
447
448

    // pca generation
    val pca = new XhmmPca(this)
    pca.inputMatrix = filtMatrix.outputMatrix
449
    pca.pcaFile = new File(sample.xhmmDir, s"${sample.sampleId}.pca.matrix")
Sander Bollen's avatar
Sander Bollen committed
450
451
452
453
454

    // normalization
    val normalize = new XhmmNormalize(this)
    normalize.inputMatrix = filtMatrix.outputMatrix
    normalize.pcaFile = pca.pcaFile
455
    normalize.normalizeOutput = new File(sample.xhmmDir, s"${sample.sampleId}.normalized.matrix")
Sander Bollen's avatar
Sander Bollen committed
456
457
458
459
460
461
462

    // create matrix of zscores
    val zMatrix = new XhmmMatrix(this)
    zMatrix.inputMatrix = normalize.normalizeOutput
    zMatrix.centerData = true
    zMatrix.centerType = "sample"
    zMatrix.zScoreData = true
463
464
465
466
    zMatrix.outputExcludedTargets = Some(
      new File(sample.xhmmDir, s"${sample.sampleId}.z-filtered-targets.txt"))
    zMatrix.outputExcludedSamples = Some(
      new File(sample.xhmmDir, s"${sample.sampleId}.z-filtered-samples.txt"))
467
    zMatrix.outputMatrix = new File(sample.xhmmDir, "zscores.matrix")
Sander Bollen's avatar
Sander Bollen committed
468
469
470
471
472

    // select sample from matrix
    val selector = new SampleFromMatrix(this)
    selector.sample = sample.sampleId
    selector.inputMatrix = zMatrix.outputMatrix
473
    val zscoreFile = new File(sample.xhmmDir, s"${sample.sampleId}.zscore-xhmm.bed")
Sander Bollen's avatar
Sander Bollen committed
474
475
476
    selector.output = Some(zscoreFile)

    (selector :: zMatrix :: normalize :: pca :: filtMatrix :: Nil, zscoreFile)
Sander Bollen's avatar
Sander Bollen committed
477
  }
Sander Bollen's avatar
Sander Bollen committed
478

479
480
  class Sample(name: String) extends AbstractSample(name) {

481
482
    val inputXhmmCountFile: Option[File] = config("xhmm_count_file")
    val inputWisecondorCountFile: Option[File] = config("wisecondor_count_file")
483
484
    val bamFile: Option[File] = config("bam")

485
486
487
    val wisecondorDir: File = new File(sampleDir, "wisecondor")
    val xhmmDir: File = new File(sampleDir, "xhmm")

488
    /**
489
490
491
492
493
      * Create XHMM count file or create link to input count file
      * Precedence is given to existing count files.
      * Returns a disjunction where right is the file, and left is
      * a potential error message
      */
494
    protected lazy val outputXhmmCountJob: String \/ QFunction = {
495
      val outFile = new File(xhmmDir, s"$name.dcov")
496
      (inputXhmmCountFile, bamFile) match {
497
        case (Some(f), _) =>
Sander Bollen's avatar
Sander Bollen committed
498
          if (bamFile.isDefined) {
499
500
            logger.warn(
              s"Both BAM and Xhmm count files are given for sample $name. The BAM file will be ignored")
Sander Bollen's avatar
Sander Bollen committed
501
          }
502
503
504
          val ln = new Ln(root)
          ln.input = f
          ln.output = outFile
505
          \/-(ln)
506
        case (None, Some(bam)) =>
507
          val dcov = DepthOfCoverage(root, List(bam), outFile, List(targets))
508
          \/-(dcov)
509
510
511
512
        case _ =>
          -\/(
            s"Cannot find bam file or xhmm count file for sample" +
              s" $name in config. At least one must be given.")
513
514
515
516
      }

    }

517
518
519
    /* Get count file for Xhmm method */
    lazy val outputXhmmCountFile: String \/ File = {
      outputXhmmCountJob match {
520
        case \/-(ln: Ln) => \/-(ln.output)
Sander Bollen's avatar
Sander Bollen committed
521
        case \/-(doc: DepthOfCoverage) => \/-(doc.intervalSummaryFile)
522
        case -\/(error) => -\/(error)
523
        case _ => throw new IllegalStateException("This should not be reachable")
524
525
526
      }
    }

527
    /**
528
529
530
531
532
      * Create wisecondor count file or create link to input count file.
      * Precedence is given to existing count files.
      * Returns a disjunction where right is the file, and left is
      * a potential error message
      */
533
    protected lazy val outputWisecondorCountJob: String \/ QFunction = {
534
      val outFile = new File(wisecondorDir, s"$name.wisecondor.bed")
535
      (inputWisecondorCountFile, bamFile) match {
536
        case (Some(f), _) =>
Sander Bollen's avatar
Sander Bollen committed
537
          if (bamFile.isDefined) {
538
539
            logger.warn(
              s"Both BAM and Wisecondor count files are given for sample $name. The BAM file will be ignored")
Sander Bollen's avatar
Sander Bollen committed
540
          }
541
542
543
          val ln = new Ln(root)
          ln.input = f
          ln.output = outFile
544
          \/-(ln)
545
        case (None, Some(bam)) =>
546
547
548
549
          val counter = new WisecondorCount(root)
          counter.inputBam = bam
          counter.output = outFile
          counter.binFile = Some(targets)
550
          \/-(counter)
551
552
553
554
        case _ =>
          -\/(
            s"Cannot find bam file or wisecondor count for sample" +
              s" $name. At least one must be given.")
555
556
557
558
      }

    }

559
560
561
    /* Get count file for wisecondor method */
    lazy val outputWisecondorCountFile: String \/ File = {
      outputWisecondorCountJob match {
562
        case \/-(ln: Ln) => \/-(ln.output)
563
        case \/-(count: WisecondorCount) => \/-(count.output)
564
        case -\/(error) => -\/(error)
565
        case _ => throw new IllegalStateException("This should not be reachable")
566
567
568
      }
    }

569
    protected lazy val outputWisecondorGccJob: String \/ QFunction = {
570
      val outFile = new File(wisecondorDir, s"$name.wisecondor.gcc.bed")
571
572
573
574
575
576
577
578
579
580
581
      outputWisecondorCountFile map { bedFile =>
        val gcc = new WisecondorGcCorrect(root)
        gcc.inputBed = bedFile
        gcc.output = outFile
        gcc
      }
    }

    lazy val outputWisecondorGccFile: String \/ File = {
      outputWisecondorGccJob match {
        case \/-(gcc: WisecondorGcCorrect) => \/-(gcc.output)
582
        case -\/(error) => -\/(error)
583
        case _ => throw new IllegalStateException("This should not be reachable")
584
585
586
      }
    }

587
    protected lazy val outputWisecondorSortJobs: String \/ List[QFunction] = {
588
589
590
      outputWisecondorGccFile map { gccFile =>
        val sort = new BedtoolsSort(root)
        sort.input = gccFile
591
        sort.output = new File(wisecondorDir, s"$name.wisecondor.sorted.gcc.bed")
592
593
        sort.isIntermediate = true
        val gz = new Bgzip(root)
594
595
596
597
        gz.input = List(sort.output)
        gz.output = new File(wisecondorDir, s"$name.wisecondor.sorted.gcc.bed.gz")
        val tabix = Tabix(root, gz.output)
        List(sort, gz, tabix)
598
599
600
601
      }
    }

    lazy val outputWisecondorGzFile: String \/ File = {
602
      outputWisecondorSortJobs match {
603
604
605
        case -\/(error) => -\/(error)
        case \/-(functions: List[QFunction]) =>
          \/-(functions.collect { case gz: Bgzip => gz.output }.head)
606
607
608
609
      }
    }

    lazy val outputWisecondorTbiFile: String \/ File = {
610
      outputWisecondorSortJobs match {
611
612
613
        case -\/(error) => -\/(error)
        case \/-(functions: List[QFunction]) =>
          \/-(functions.collect { case tbi: Tabix => tbi.outputIndex }.head)
614
615
616
      }
    }

617
    /** Function to add sample jobs */
618
    def addJobs(): Unit = {
619
      (outputWisecondorGccJob :: outputWisecondorCountJob :: outputXhmmCountJob :: Nil).foreach {
620
        case -\/(error) => Logging.addError(error)
Sander Bollen's avatar
Sander Bollen committed
621
        case \/-(function) => add(function)
622
      }
623
      outputWisecondorSortJobs match {
624
        case -\/(error) => Logging.addError(error)
625
626
        case \/-(functions: List[QFunction]) => addAll(functions)
      }
627
    }
628

629
    /* This is necessary for compile reasons, but library does not in fact exist for this pipeline */
630
631
632
633
634
635
636
    def makeLibrary(id: String) = new Library(id)

    class Library(id: String) extends AbstractLibrary(id) {
      def addJobs(): Unit = {}
      def summaryFiles: Map[String, File] = Map()
      def summaryStats: Any = Map()
    }
637

638
639
640
    /** Must return files to store into summary */
    def summaryFiles: Map[String, File] = Map()

Sander Bollen's avatar
Sander Bollen committed
641
642
643
    /** Libs do not exist for this pipeline */
    override def libIds: Set[String] = Set()

644
645
646
647
648
    /** Must returns stats to store into summary */
    def summaryStats: Any = Map()
  }

  def makeSample(sampleId: String) = new Sample(sampleId)
Sander Bollen's avatar
Sander Bollen committed
649
650
651
652
653
654

  def summarySettings: Map[String, Any] = Map()
  def summaryFiles: Map[String, File] = Map()

  def summaryFile: File = new File(outputDir, "tarmac.summary.json")
}
655

Bollen's avatar
Bollen committed
656
object Tarmac extends PipelineCommand