VcfStats.scala 17.3 KB
Newer Older
1
2
package nl.lumc.sasc.biopet.tools

Peter van 't Hof's avatar
Peter van 't Hof committed
3
import java.io.{ FileOutputStream, PrintWriter, File }
4

Peter van 't Hof's avatar
Peter van 't Hof committed
5
import htsjdk.variant.variantcontext.{ VariantContext, Genotype }
6
import htsjdk.variant.vcf.VCFFileReader
Peter van 't Hof's avatar
Peter van 't Hof committed
7
import nl.lumc.sasc.biopet.core.summary.Summarizable
Peter van 't Hof's avatar
Peter van 't Hof committed
8
9
10
import nl.lumc.sasc.biopet.core.{ BiopetJavaCommandLineFunction, ToolCommand }
import nl.lumc.sasc.biopet.core.config.Configurable
import org.broadinstitute.gatk.utils.commandline.{ Output, Input }
11
12
import scala.collection.JavaConversions._
import scala.collection.mutable
Peter van 't Hof's avatar
Peter van 't Hof committed
13
import scala.io.Source
Peter van 't Hof's avatar
Peter van 't Hof committed
14
import scala.sys.process.{ Process, ProcessLogger }
Peter van 't Hof's avatar
Peter van 't Hof committed
15
import htsjdk.samtools.util.Interval
16
17
18
19

/**
 * Created by pjvan_thof on 1/10/15.
 */
Peter van 't Hof's avatar
Peter van 't Hof committed
20
class VcfStats(val root: Configurable) extends BiopetJavaCommandLineFunction with Summarizable {
Peter van 't Hof's avatar
Peter van 't Hof committed
21
22
23
24
25
  javaMainClass = getClass.getName

  @Input(doc = "Input fastq", shortName = "I", required = true)
  var input: File = _

Peter van 't Hof's avatar
Peter van 't Hof committed
26
27
28
29
30
31
  @Output
  protected var generalStats: File = null

  @Output
  protected var genotypeStats: File = null

Peter van 't Hof's avatar
Peter van 't Hof committed
32
33
34
  override val defaultVmem = "4G"
  override val defaultThreads = 3

Peter van 't Hof's avatar
Peter van 't Hof committed
35
  protected var outputDir: File = _
36

Peter van 't Hof's avatar
Peter van 't Hof committed
37
38
39
40
  /**
   * Set output dir and a output file
   * @param dir
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
41
  def setOutputDir(dir: File): Unit = {
42
    outputDir = dir
Peter van 't Hof's avatar
Peter van 't Hof committed
43
44
45
    generalStats = new File(dir, "general.tsv")
    genotypeStats = new File(dir, "genotype_general.tsv")
    jobOutputFile = new File(dir, ".vcfstats.out")
46
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
47
48
49
50
51
52
53

  /**
   * Creates command to execute extension
   * @return
   */
  override def commandLine = super.commandLine +
    required("-I", input) +
54
    required("-o", outputDir)
Peter van 't Hof's avatar
Peter van 't Hof committed
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75

  /**
   * Returns general stats to the summary
   * @return
   */
  def summaryStats: Map[String, Any] = {
    (for (
      line <- Source.fromFile(generalStats).getLines();
      values = line.split("\t") if values.size >= 2 && !values(0).isEmpty
    ) yield values(0) -> values(1).toInt
    ).toMap
  }

  /**
   * return only general files to summary
   * @return
   */
  def summaryFiles: Map[String, File] = Map(
    "general_stats" -> generalStats,
    "genotype_stats" -> genotypeStats
  )
76
77
78
}

object VcfStats extends ToolCommand {
Peter van 't Hof's avatar
Peter van 't Hof committed
79
  /** Commandline argument */
Peter van 't Hof's avatar
Peter van 't Hof committed
80
  case class Args(inputFile: File = null, outputDir: String = null, intervals: Option[File] = None) extends AbstractArgs
81

Peter van 't Hof's avatar
Peter van 't Hof committed
82
  /** Parsing commandline arguments */
83
84
85
86
87
88
89
  class OptParser extends AbstractOptParser {
    opt[File]('I', "inputFile") required () unbounded () valueName ("<file>") action { (x, c) =>
      c.copy(inputFile = x)
    }
    opt[String]('o', "outputDir") required () unbounded () valueName ("<file>") action { (x, c) =>
      c.copy(outputDir = x)
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
90
    //TODO: add interval argument
Peter van 't Hof's avatar
Peter van 't Hof committed
91
92
93
94
95
    /*
    opt[File]('i', "intervals") unbounded () valueName ("<file>") action { (x, c) =>
      c.copy(intervals = Some(x))
    }
    */
96
97
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
98
99
100
101
102
103
104
  /**
   * Class to store sample to sample compare stats
   * @param genotypeOverlap Number of genotypes match with other sample
   * @param alleleOverlap Number of alleles also found in other sample
   */
  case class SampleToSampleStats(var genotypeOverlap: Int = 0,
                                 var alleleOverlap: Int = 0) {
Peter van 't Hof's avatar
Peter van 't Hof committed
105
    /** Add an other class */
Peter van 't Hof's avatar
Peter van 't Hof committed
106
107
108
109
110
111
    def +=(other: SampleToSampleStats) {
      this.genotypeOverlap += other.genotypeOverlap
      this.alleleOverlap += other.alleleOverlap
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
112
113
114
115
116
117
118
  /**
   * class to store all sample relative stats
   * @param genotypeStats Stores all genotype relative stats
   * @param sampleToSample Stores sample to sample compare stats
   */
  case class SampleStats(val genotypeStats: mutable.Map[String, mutable.Map[Any, Int]] = mutable.Map(),
                         val sampleToSample: mutable.Map[String, SampleToSampleStats] = mutable.Map()) {
Peter van 't Hof's avatar
Peter van 't Hof committed
119
    /** Add an other class */
Peter van 't Hof's avatar
Peter van 't Hof committed
120
121
122
123
124
125
126
127
128
129
130
131
132
    def +=(other: SampleStats): Unit = {
      for ((key, value) <- other.sampleToSample) {
        if (this.sampleToSample.contains(key)) this.sampleToSample(key) += value
        else this.sampleToSample(key) = value
      }
      for ((field, fieldMap) <- other.genotypeStats) {
        val thisField = this.genotypeStats.get(field)
        if (thisField.isDefined) mergeStatsMap(thisField.get, fieldMap)
        else this.genotypeStats += field -> fieldMap
      }
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
133
134
135
136
137
138
139
  /**
   * General stats class to store vcf stats
   * @param generalStats Stores are general stats
   * @param samplesStats Stores all sample/genotype specific stats
   */
  case class Stats(val generalStats: mutable.Map[String, mutable.Map[Any, Int]] = mutable.Map(),
                   val samplesStats: mutable.Map[String, SampleStats] = mutable.Map()) {
Peter van 't Hof's avatar
Peter van 't Hof committed
140
    /** Add an other class */
Peter van 't Hof's avatar
Peter van 't Hof committed
141
    def +=(other: Stats): Stats = {
Peter van 't Hof's avatar
Peter van 't Hof committed
142
143
144
145
146
147
148
149
150
      for ((key, value) <- other.samplesStats) {
        if (this.samplesStats.contains(key)) this.samplesStats(key) += value
        else this.samplesStats(key) = value
      }
      for ((field, fieldMap) <- other.generalStats) {
        val thisField = this.generalStats.get(field)
        if (thisField.isDefined) mergeStatsMap(thisField.get, fieldMap)
        else this.generalStats += field -> fieldMap
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
151
      this
Peter van 't Hof's avatar
Peter van 't Hof committed
152
153
154
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
155
156
157
158
159
  /**
   * Merge m2 into m1
   * @param m1
   * @param m2
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
160
161
162
163
164
  def mergeStatsMap(m1: mutable.Map[Any, Int], m2: mutable.Map[Any, Int]): Unit = {
    for (key <- m2.keySet)
      m1(key) = m1.getOrElse(key, 0) + m2(key)
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
165
166
167
168
169
  /**
   * Merge m2 into m1
   * @param m1
   * @param m2
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
170
171
172
173
174
175
176
177
178
179
  def mergeNestedStatsMap(m1: mutable.Map[String, mutable.Map[Any, Int]], m2: Map[String, Map[Any, Int]]): Unit = {
    for ((field, fieldMap) <- m2) {
      if (m1.contains(field)) {
        for ((key, value) <- fieldMap) {
          if (m1(field).contains(key)) m1(field)(key) += value
          else m1(field)(key) = value
        }
      } else m1(field) = mutable.Map(fieldMap.toList: _*)
    }
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
180

Peter van 't Hof's avatar
Peter van 't Hof committed
181
  protected var commandArgs: Args = _
Peter van 't Hof's avatar
Peter van 't Hof committed
182

183
184
185
186
187
188
  /**
   * @param args the command line arguments
   */
  def main(args: Array[String]): Unit = {
    logger.info("Started")
    val argsParser = new OptParser
Peter van 't Hof's avatar
Peter van 't Hof committed
189
    commandArgs = argsParser.parse(args, Args()) getOrElse sys.exit(1)
190

Peter van 't Hof's avatar
Peter van 't Hof committed
191
    val reader = new VCFFileReader(commandArgs.inputFile, true)
192
193
194
    val header = reader.getFileHeader
    val samples = header.getSampleNamesInOrder.toList

Peter van 't Hof's avatar
Peter van 't Hof committed
195
    val intervals: List[Interval] = (
Peter van 't Hof's avatar
Peter van 't Hof committed
196
197
      for (
        seq <- header.getSequenceDictionary.getSequences;
198
199
        chunks = (seq.getSequenceLength / 10000000) + (if (seq.getSequenceLength % 10000000 == 0) 0 else 1);
        i <- 1 to chunks
Peter van 't Hof's avatar
Peter van 't Hof committed
200
      ) yield {
Peter van 't Hof's avatar
Peter van 't Hof committed
201
        val size = seq.getSequenceLength / chunks
Peter van 't Hof's avatar
Peter van 't Hof committed
202
        val begin = size * (i - 1) + 1
Peter van 't Hof's avatar
Peter van 't Hof committed
203
204
205
        val end = if (i >= chunks) seq.getSequenceLength else size * i
        new Interval(seq.getSequenceName, begin, end)
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
206
    ).toList
Peter van 't Hof's avatar
Peter van 't Hof committed
207
208
209

    val totalBases = intervals.foldRight(0L)(_.length() + _)

Peter van 't Hof's avatar
Peter van 't Hof committed
210
211
    // Reading vcf records
    logger.info("Start reading vcf records")
Peter van 't Hof's avatar
Peter van 't Hof committed
212
213
214
215
216
217

    def createStats: Stats = {
      val stats = new Stats
      //init stats
      for (sample1 <- samples) {
        stats.samplesStats += sample1 -> new SampleStats
218
        for (sample2 <- samples) {
Peter van 't Hof's avatar
Peter van 't Hof committed
219
          stats.samplesStats(sample1).sampleToSample += sample2 -> new SampleToSampleStats
220
221
        }
      }
Peter van 't Hof's avatar
Peter van 't Hof committed
222
223
224
225
226
      stats
    }

    var variantCounter = 0L
    var baseCounter = 0L
Peter van 't Hof's avatar
Peter van 't Hof committed
227

Peter van 't Hof's avatar
Peter van 't Hof committed
228
    def status(count: Int, interval: Interval): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
229
230
231
232
233
      variantCounter += count
      baseCounter += interval.length()
      val fraction = baseCounter.toFloat / totalBases * 100
      logger.info(interval + " done, " + count + " rows processed")
      logger.info("total: " + variantCounter + " rows processed, " + fraction + "% done")
234
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
235

Peter van 't Hof's avatar
Peter van 't Hof committed
236
237
238
239
240
241
    val statsChunks = for (interval <- intervals.par) yield {
      val reader = new VCFFileReader(commandArgs.inputFile, true)
      var chunkCounter = 0
      val stats = createStats
      logger.info("Starting on: " + interval)

Peter van 't Hof's avatar
Peter van 't Hof committed
242
243
244
      for (
        record <- reader.query(interval.getSequence, interval.getStart, interval.getEnd) if record.getStart <= interval.getEnd
      ) {
Peter van 't Hof's avatar
Peter van 't Hof committed
245
246
247
        mergeNestedStatsMap(stats.generalStats, checkGeneral(record))
        for (sample1 <- samples) yield {
          val genotype = record.getGenotype(sample1)
Peter van 't Hof's avatar
Peter van 't Hof committed
248
          mergeNestedStatsMap(stats.samplesStats(sample1).genotypeStats, checkGenotype(record, genotype))
Peter van 't Hof's avatar
Peter van 't Hof committed
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
          for (sample2 <- samples) {
            val genotype2 = record.getGenotype(sample2)
            if (genotype.getAlleles == genotype2.getAlleles)
              stats.samplesStats(sample1).sampleToSample(sample2).genotypeOverlap += 1
            stats.samplesStats(sample1).sampleToSample(sample2).alleleOverlap += genotype.getAlleles.count(allele => genotype2.getAlleles.exists(_.basesMatch(allele)))
          }
        }
        chunkCounter += 1
      }
      status(chunkCounter, interval)
      stats
    }

    val stats = statsChunks.toList.fold(createStats)(_ += _)

Peter van 't Hof's avatar
Peter van 't Hof committed
264
    logger.info("Done reading vcf records")
265

Peter van 't Hof's avatar
Peter van 't Hof committed
266
267
    writeField("QUAL", stats.generalStats.getOrElse("QUAL", mutable.Map()))
    writeField("general", stats.generalStats.getOrElse("general", mutable.Map()))
Peter van 't Hof's avatar
Peter van 't Hof committed
268
269
270
    writeGenotypeFields(stats, commandArgs.outputDir + "/genotype_", samples)
    writeOverlap(stats, _.genotypeOverlap, commandArgs.outputDir + "/sample_compare/genotype_overlap", samples)
    writeOverlap(stats, _.alleleOverlap, commandArgs.outputDir + "/sample_compare/allele_overlap", samples)
271
272
273
274

    logger.info("Done")
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
275
276
277
278
279
280
281
282
  /**
   * Function to check all general stats, all info expect sample/genotype specific stats
   * @param record
   * @return
   */
  protected def checkGeneral(record: VariantContext): Map[String, Map[Any, Int]] = {
    val buffer = mutable.Map[String, Map[Any, Int]]()

Peter van 't Hof's avatar
Peter van 't Hof committed
283
284
285
286
287
    def addToBuffer(key: String, value: Any): Unit = {
      val map = buffer.getOrElse(key, Map())
      buffer += key -> (map + (value -> (map.getOrElse(value, 0) + 1)))
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
288
    buffer += "QUAL" -> Map(record.getPhredScaledQual -> 1)
Peter van 't Hof's avatar
Peter van 't Hof committed
289

Peter van 't Hof's avatar
Peter van 't Hof committed
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
    addToBuffer("general", "Total")
    if (record.isBiallelic) addToBuffer("general", "Biallelic")
    if (record.isComplexIndel) addToBuffer("general", "ComplexIndel")
    if (record.isFiltered) addToBuffer("general", "Filtered")
    if (record.isFullyDecoded) addToBuffer("general", "FullyDecoded")
    if (record.isIndel) addToBuffer("general", "Indel")
    if (record.isMixed) addToBuffer("general", "Mixed")
    if (record.isMNP) addToBuffer("general", "MNP")
    if (record.isMonomorphicInSamples) addToBuffer("general", "MonomorphicInSamples")
    if (record.isNotFiltered) addToBuffer("general", "NotFiltered")
    if (record.isPointEvent) addToBuffer("general", "PointEvent")
    if (record.isPolymorphicInSamples) addToBuffer("general", "PolymorphicInSamples")
    if (record.isSimpleDeletion) addToBuffer("general", "SimpleDeletion")
    if (record.isSimpleInsertion) addToBuffer("general", "SimpleInsertion")
    if (record.isSNP) addToBuffer("general", "SNP")
    if (record.isStructuralIndel) addToBuffer("general", "StructuralIndel")
    if (record.isSymbolic) addToBuffer("general", "Symbolic")
    if (record.isSymbolicOrSV) addToBuffer("general", "SymbolicOrSV")
    if (record.isVariant) addToBuffer("general", "Variant")
Peter van 't Hof's avatar
Peter van 't Hof committed
309
310

    buffer.toMap
Peter van 't Hof's avatar
Peter van 't Hof committed
311
312
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
313
314
315
316
317
318
319
  /**
   * Function to check sample/genotype specific stats
   * @param record
   * @param genotype
   * @return
   */
  protected def checkGenotype(record: VariantContext, genotype: Genotype): Map[String, Map[Any, Int]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
320
    val buffer = mutable.Map[String, Map[Any, Int]]()
Peter van 't Hof's avatar
Peter van 't Hof committed
321

Peter van 't Hof's avatar
Peter van 't Hof committed
322
    def addToBuffer(key: String, value: Any): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
323
324
325
326
      val map = buffer.getOrElse(key, Map())
      buffer += key -> (map + (value -> (map.getOrElse(value, 0) + 1)))
    }

Peter van 't Hof's avatar
Peter van 't Hof committed
327
328
329
    buffer += "DP" -> Map((if (genotype.hasDP) genotype.getDP else "not set") -> 1)
    buffer += "GQ" -> Map((if (genotype.hasGQ) genotype.getGQ else "not set") -> 1)

Peter van 't Hof's avatar
Peter van 't Hof committed
330
331
    val usedAlleles = (for (allele <- genotype.getAlleles) yield record.getAlleleIndex(allele)).toList

Peter van 't Hof's avatar
Peter van 't Hof committed
332
    addToBuffer("general", "Total")
Peter van 't Hof's avatar
Peter van 't Hof committed
333
334
335
336
337
338
339
340
341
342
343
344
    if (genotype.isHet) addToBuffer("general", "Het")
    if (genotype.isHetNonRef) addToBuffer("general", "HetNonRef")
    if (genotype.isHom) addToBuffer("general", "Hom")
    if (genotype.isHomRef) addToBuffer("general", "HomRef")
    if (genotype.isHomVar) addToBuffer("general", "HomVar")
    if (genotype.isMixed) addToBuffer("general", "Mixed")
    if (genotype.isNoCall) addToBuffer("general", "NoCall")
    if (genotype.isNonInformative) addToBuffer("general", "NonInformative")
    if (genotype.isAvailable) addToBuffer("general", "Available")
    if (genotype.isCalled) addToBuffer("general", "Called")
    if (genotype.isFiltered) addToBuffer("general", "Filtered")

Peter van 't Hof's avatar
Peter van 't Hof committed
345
346
347
348
349
350
351
352
353
354
    if (genotype.hasAD) {
      val ad = genotype.getAD
      for (i <- 0 until ad.size if ad(i) > 0) {
        addToBuffer("AD", ad(i))
        if (i == 0) addToBuffer("AD-ref", ad(i))
        if (i > 0) addToBuffer("AD-alt", ad(i))
        if (usedAlleles.exists(_ == i)) addToBuffer("AD-used", ad(i))
        else addToBuffer("AD-not_used", ad(i))
      }
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
355

Peter van 't Hof's avatar
Peter van 't Hof committed
356
    buffer.toMap
Peter van 't Hof's avatar
Peter van 't Hof committed
357
358
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
359
360
361
362
363
364
365
  /**
   * Function to write stats to tsv files
   * @param stats
   * @param prefix
   * @param samples
   */
  protected def writeGenotypeFields(stats: Stats, prefix: String, samples: List[String]) {
Peter van 't Hof's avatar
Peter van 't Hof committed
366
    val fields = List("DP", "GQ", "AD", "AD-ref", "AD-alt", "AD-used", "AD-not_used", "general")
Peter van 't Hof's avatar
Peter van 't Hof committed
367
    for (field <- fields) {
Peter van 't Hof's avatar
Peter van 't Hof committed
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
      writeGenotypeField(stats, prefix, samples, field)
    }
  }

  /**
   * Function to write 1 specific genotype field
   * @param stats
   * @param prefix
   * @param samples
   * @param field
   */
  protected def writeGenotypeField(stats: Stats, prefix: String, samples: List[String], field: String): Unit = {
    val file = new File(prefix + field + ".tsv")
    file.getParentFile.mkdirs()
    val writer = new PrintWriter(file)
    writer.println(samples.mkString(field + "\t", "\t", ""))
    val keySet = (for (sample <- samples) yield stats.samplesStats(sample).genotypeStats.getOrElse(field, Map[Any, Int]()).keySet).fold(Set[Any]())(_ ++ _)
    for (key <- keySet.toList.sortWith(sortAnyAny(_, _))) {
      val values = for (sample <- samples) yield stats.samplesStats(sample).genotypeStats.getOrElse(field, Map[Any, Int]()).getOrElse(key, 0)
      writer.println(values.mkString(key + "\t", "\t", ""))
Peter van 't Hof's avatar
Peter van 't Hof committed
388
    }
Peter van 't Hof's avatar
Peter van 't Hof committed
389
390
    writer.close()
    plotLine(file)
Peter van 't Hof's avatar
Peter van 't Hof committed
391
392
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
393
394
395
396
397
398
399
  /**
   * Function to write 1 specific general field
   * @param prefix
   * @param data
   * @return
   */
  protected def writeField(prefix: String, data: mutable.Map[Any, Int]): File = {
Peter van 't Hof's avatar
Peter van 't Hof committed
400
401
402
403
404
405
406
407
408
409
410
411
    val file = new File(commandArgs.outputDir + "/" + prefix + ".tsv")
    println(file)
    file.getParentFile.mkdirs()
    val writer = new PrintWriter(file)
    writer.println("\t" + prefix)
    for (key <- data.keySet.toList.sortWith(sortAnyAny(_, _))) {
      writer.println(key + "\t" + data(key))
    }
    writer.close()
    file
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
412
413
414
415
416
417
  /**
   * Function to sort Any values
   * @param a
   * @param b
   * @return
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
418
419
420
421
  def sortAnyAny(a: Any, b: Any): Boolean = {
    a match {
      case ai: Int => {
        b match {
Peter van 't Hof's avatar
Peter van 't Hof committed
422
423
424
          case bi: Int    => ai < bi
          case bi: Double => ai < bi
          case _          => a.toString < b.toString
Peter van 't Hof's avatar
Peter van 't Hof committed
425
426
427
428
429
430
        }
      }
      case _ => a.toString < b.toString
    }
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
431
432
433
434
435
436
437
  /**
   * Function to write sample to sample compare tsv's / heatmaps
   * @param stats
   * @param function function to extract targeted var in SampleToSampleStats
   * @param prefix
   * @param samples
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
438
439
  def writeOverlap(stats: Stats, function: SampleToSampleStats => Int,
                   prefix: String, samples: List[String]): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
440
441
442
443
444
445
446
    val absFile = new File(prefix + ".abs.tsv")
    val relFile = new File(prefix + ".rel.tsv")

    absFile.getParentFile.mkdirs()

    val absWriter = new PrintWriter(absFile)
    val relWriter = new PrintWriter(relFile)
447
448
449
450

    absWriter.println(samples.mkString("\t", "\t", ""))
    relWriter.println(samples.mkString("\t", "\t", ""))
    for (sample1 <- samples) {
Peter van 't Hof's avatar
Peter van 't Hof committed
451
452
      val values = for (sample2 <- samples) yield function(stats.samplesStats(sample1).sampleToSample(sample2))

453
454
      absWriter.println(values.mkString(sample1 + "\t", "\t", ""))

Peter van 't Hof's avatar
Peter van 't Hof committed
455
      val total = function(stats.samplesStats(sample1).sampleToSample(sample1))
456
457
458
459
      relWriter.println(values.map(_.toFloat / total).mkString(sample1 + "\t", "\t", ""))
    }
    absWriter.close()
    relWriter.close()
Peter van 't Hof's avatar
Peter van 't Hof committed
460
461

    plotHeatmap(relFile)
462
463
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
464
465
466
467
  /**
   * Plots heatmaps on target tsv file
   * @param file
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
468
  def plotHeatmap(file: File) {
Peter van 't Hof's avatar
Peter van 't Hof committed
469
470
    executeRscript("plotHeatmap.R", Array(file.getAbsolutePath,
      file.getAbsolutePath.stripSuffix(".tsv") + ".heatmap.png",
Peter van 't Hof's avatar
Peter van 't Hof committed
471
472
      file.getAbsolutePath.stripSuffix(".tsv") + ".heatmap.clustering.png",
      file.getAbsolutePath.stripSuffix(".tsv") + ".heatmap.dendrogram.png"))
Peter van 't Hof's avatar
Peter van 't Hof committed
473
474
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
475
476
477
478
479
  /**
   * Plots line graph with target tsv file
   * @param file
   */
  def plotLine(file: File) {
Peter van 't Hof's avatar
Peter van 't Hof committed
480
481
482
483
    executeRscript("plotXY.R", Array(file.getAbsolutePath,
      file.getAbsolutePath.stripSuffix(".tsv") + ".xy.png"))
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
484
485
486
487
488
  /**
   * Function to execute Rscript as subproces
   * @param resource
   * @param args
   */
Peter van 't Hof's avatar
Peter van 't Hof committed
489
490
491
492
493
494
495
496
497
  def executeRscript(resource: String, args: Array[String]): Unit = {
    val is = getClass.getResourceAsStream(resource)
    val file = File.createTempFile("script.", "." + resource)
    val os = new FileOutputStream(file)
    org.apache.commons.io.IOUtils.copy(is, os)
    os.close()

    val command: String = "Rscript " + file + " " + args.mkString(" ")

498
499
    logger.info("Starting: " + command)
    val process = Process(command).run(ProcessLogger(x => logger.debug(x), x => logger.debug(x)))
Peter van 't Hof's avatar
Peter van 't Hof committed
500
    if (process.exitValue() == 0) logger.info("Done: " + command)
501
    else {
Peter van 't Hof's avatar
Peter van 't Hof committed
502
      logger.warn("Failed: " + command)
503
504
      if (!logger.isDebugEnabled) logger.warn("Use -l debug for more info")
    }
505
506
  }
}