BammetricsReport.scala 18.1 KB
Newer Older
Peter van 't Hof's avatar
Peter van 't Hof committed
1
/**
2 3 4 5 6 7 8 9 10 11 12 13 14
  * Biopet is built on top of GATK Queue for building bioinformatic
  * pipelines. It is mainly intended to support LUMC SHARK cluster which is running
  * SGE. But other types of HPC that are supported by GATK Queue (such as PBS)
  * should also be able to execute Biopet tools and pipelines.
  *
  * Copyright 2014 Sequencing Analysis Support Core - Leiden University Medical Center
  *
  * Contact us at: sasc@lumc.nl
  *
  * A dual licensing mode is applied. The source code within this project is freely available for non-commercial use under an AGPL
  * license; For commercial users or users who do not want to follow the AGPL
  * license, please contact us to obtain a separate license.
  */
15 16
package nl.lumc.sasc.biopet.pipelines.bammetrics

17
import java.io.{File, PrintWriter}
18

Peter van 't Hof's avatar
Peter van 't Hof committed
19
import nl.lumc.sasc.biopet.utils.config.Configurable
20 21 22 23 24 25
import nl.lumc.sasc.biopet.core.report.{
  ReportBuilder,
  ReportBuilderExtension,
  ReportPage,
  ReportSection
}
Peter van 't Hof's avatar
Peter van 't Hof committed
26
import nl.lumc.sasc.biopet.utils.ConfigUtils
27
import nl.lumc.sasc.biopet.utils.rscript.{LinePlot, StackedBarPlot}
Peter van 't Hof's avatar
Peter van 't Hof committed
28
import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb
Peter van 't Hof's avatar
Peter van 't Hof committed
29 30
import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb.Implicts._
import nl.lumc.sasc.biopet.utils.summary.db.SummaryDb._
Peter van 't Hof's avatar
Peter van 't Hof committed
31

32
import scala.concurrent.{Await, Future}
Peter van 't Hof's avatar
Peter van 't Hof committed
33
import scala.concurrent.duration.Duration
34

Peter van 't Hof's avatar
Peter van 't Hof committed
35
class BammetricsReport(val parent: Configurable) extends ReportBuilderExtension {
36
  def builder = BammetricsReport
37
}
38

39
/**
40 41 42 43
  * Object to create a report for [[BamMetrics]]
  *
  * Created by pjvan_thof on 3/30/15.
  */
44
object BammetricsReport extends ReportBuilder {
45

46
  /** Name of report */
47 48
  val reportName = "Bam Metrics"

49 50
  def pipelineName = "bammetrics"

51
  /** Root page for single BamMetrcis report */
Peter van 't Hof's avatar
Peter van 't Hof committed
52 53
  def indexPage: Future[ReportPage] =
    bamMetricsPage(summary, sampleId, libId).map { bamMetricsPage =>
54 55 56 57 58 59 60 61 62 63 64 65 66
      ReportPage(
        bamMetricsPage.subPages ::: List(
          "Versions" -> Future(
            ReportPage(List(),
                       List("Executables" -> ReportSection(
                         "/nl/lumc/sasc/biopet/core/report/executables.ssp")),
                       Map())),
          "Files" -> filesPage(sampleId, libId)
        ),
        List(
          "Report" -> ReportSection(
            "/nl/lumc/sasc/biopet/pipelines/bammetrics/bamMetricsFront.ssp")
        ) ::: bamMetricsPage.sections,
Peter van 't Hof's avatar
Peter van 't Hof committed
67 68 69
        Map()
      )
    }
70

71
  /** Generates a page with alignment stats */
Peter van 't Hof's avatar
Peter van 't Hof committed
72 73 74
  def bamMetricsPage(summary: SummaryDb,
                     sampleId: Option[Int],
                     libId: Option[Int],
Peter van 't Hof's avatar
Peter van 't Hof committed
75
                     metricsTag: String = "bammetrics"): Future[ReportPage] = Future {
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
    val wgsExecuted = summary.getStatsSize(runId,
                                           metricsTag,
                                           "wgs",
                                           sample = sampleId.map(SampleId),
                                           library = libId.map(LibraryId)) >= 1
    val rnaExecuted = summary.getStatsSize(runId,
                                           metricsTag,
                                           "rna",
                                           sample = sampleId.map(SampleId),
                                           library = libId.map(LibraryId)) >= 1

    val insertsizeMetrics = summary
      .getStatKeys(
        runId,
        metricsTag,
        "CollectInsertSizeMetrics",
        sampleId.map(SampleId).getOrElse(NoSample),
        libId.map(LibraryId).getOrElse(NoLibrary),
        Map("metrics" -> List("metrics"))
      )
Peter van 't Hof's avatar
Peter van 't Hof committed
96
      .exists(_._2.isDefined)
97

98 99 100 101 102 103 104 105
    val targetSettings = summary.getSettingKeys(
      runId,
      metricsTag,
      NoModule,
      sample = sampleId.map(SampleId).getOrElse(NoSample),
      library = libId.map(LibraryId).getOrElse(NoLibrary),
      Map("amplicon_name" -> List("amplicon_name"), "roi_name" -> List("roi_name"))
    )
106
    val targets = (
Peter van 't Hof's avatar
Peter van 't Hof committed
107 108
      targetSettings("amplicon_name"),
      targetSettings("roi_name")
Peter van 't Hof's avatar
Peter van 't Hof committed
109
    ) match {
110 111 112 113
      case (Some(amplicon: String), Some(roi: List[_])) => amplicon :: roi.map(_.toString)
      case (_, Some(roi: List[_])) => roi.map(_.toString)
      case _ => Nil
    }
114 115

    ReportPage(
Peter van 't Hof's avatar
Peter van 't Hof committed
116
      if (targets.isEmpty) List()
117 118 119 120 121 122 123 124 125
      else
        List(
          "Targets" -> Future.successful(
            ReportPage(
              List(),
              targets.map(t =>
                t -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/covstatsPlot.ssp",
                                   Map("target" -> Some(t)))),
              Map()))),
126
      List(
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
        "Summary" -> ReportSection(
          "/nl/lumc/sasc/biopet/pipelines/bammetrics/alignmentSummary.ssp"),
        "Mapping Quality" -> ReportSection(
          "/nl/lumc/sasc/biopet/pipelines/bammetrics/mappingQuality.ssp",
          Map("showPlot" -> true)),
        "Clipping" -> ReportSection("/nl/lumc/sasc/biopet/pipelines/bammetrics/clipping.ssp",
                                    Map("showPlot" -> true))
      ) ++
        (if (insertsizeMetrics)
           List(
             "Insert Size" -> ReportSection(
               "/nl/lumc/sasc/biopet/pipelines/bammetrics/insertSize.ssp",
               Map("showPlot" -> true)))
         else Nil) ++ (if (wgsExecuted)
                         List(
                           "Whole genome coverage" -> ReportSection(
                             "/nl/lumc/sasc/biopet/pipelines/bammetrics/wgsHistogram.ssp",
                             Map("showPlot" -> true)))
                       else Nil) ++
        (if (rnaExecuted)
           List(
             "Rna coverage" -> ReportSection(
               "/nl/lumc/sasc/biopet/pipelines/bammetrics/rnaHistogram.ssp",
               Map("showPlot" -> true)))
         else Nil),
152
      Map("metricsTag" -> metricsTag)
153 154 155
    )
  }

156
  /**
157 158 159 160 161 162 163 164
    * Generate a stackbar plot for alignment stats
    *
    * @param outputDir OutputDir for the tsv and png file
    * @param prefix Prefix of the tsv and png file
    * @param summary Summary class
    * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
    * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
    */
165 166
  def alignmentSummaryPlot(outputDir: File,
                           prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
167
                           summary: SummaryDb,
168
                           libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
169
                           sampleId: Option[Int] = None): Unit = {
170 171 172 173 174 175
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")
    val tsvWriter = new PrintWriter(tsvFile)
    if (libraryLevel) tsvWriter.print("Library") else tsvWriter.print("Sample")
    tsvWriter.println("\tMapped\tDuplicates\tUnmapped\tSecondary")

Peter van 't Hof's avatar
Peter van 't Hof committed
176 177 178 179 180 181 182 183
    val statsPaths = Map(
      "Mapped" -> List("flagstats", "Mapped"),
      "Duplicates" -> List("flagstats", "Duplicates"),
      "All" -> List("flagstats", "All"),
      "NotPrimaryAlignment" -> List("flagstats", "NotPrimaryAlignment")
    )

    val results: Map[(Int, Option[Int]), Map[String, Option[Any]]] = if (libraryLevel) {
184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
      summary
        .getStatsForLibraries(runId,
                              "bammetrics",
                              "bamstats",
                              sampleId = sampleId,
                              keyValues = statsPaths)
        .map(x => (x._1._1, Some(x._1._2)) -> x._2)
    } else
      summary
        .getStatsForSamples(runId,
                            "bammetrics",
                            "bamstats",
                            sample = sampleId.map(SampleId),
                            keyValues = statsPaths)
        .map(x => (x._1, None) -> x._2)
Peter van 't Hof's avatar
Peter van 't Hof committed
199

Peter van 't Hof's avatar
Peter van 't Hof committed
200
    for (((s, l), result) <- results) {
Peter van 't Hof's avatar
Peter van 't Hof committed
201
      val sampleName: String = summary.getSampleName(s).map(_.get)
202 203
      val libName: Option[String] =
        l.flatMap(x => Await.result(summary.getLibraryName(x), Duration.Inf))
204
      val sb = new StringBuffer()
205 206
      if (libName.isDefined) sb.append(sampleName + "-" + libName.get + "\t")
      else sb.append(sampleName + "\t")
Peter van 't Hof's avatar
Peter van 't Hof committed
207 208 209 210
      val mapped = ConfigUtils.any2long(result("Mapped"))
      val duplicates = ConfigUtils.any2long(result("Duplicates"))
      val total = ConfigUtils.any2long(result("All"))
      val secondary = ConfigUtils.any2long(result("NotPrimaryAlignment"))
Peter van 't Hof's avatar
Peter van 't Hof committed
211
      sb.append((mapped - duplicates - secondary) + "\t")
212
      sb.append(duplicates + "\t")
Peter van 't Hof's avatar
Peter van 't Hof committed
213
      sb.append((total - mapped) + "\t")
214
      sb.append(secondary)
Peter van 't Hof's avatar
Peter van 't Hof committed
215
      tsvWriter.println(sb.toString)
216 217 218 219 220 221 222 223
    }

    tsvWriter.close()

    val plot = new StackedBarPlot(null)
    plot.input = tsvFile
    plot.output = pngFile
    plot.ylabel = Some("Reads")
224
    plot.width = Some(200 + (results.size * 10))
Peter van 't Hof's avatar
WIP  
Peter van 't Hof committed
225
    plot.title = Some("Aligned_reads")
226 227 228
    plot.runLocal()
  }

229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246
  /**
    * This is a generic method to create plots
    * @param outputDir Outputdir of the plot
    * @param prefix Files will start with this name
    * @param summary Summary where the data is
    * @param libraryLevel If enabled the plots will show data per library
    * @param sampleId If set only this sample is shown
    * @param libraryId If set onlt this library is shown
    * @param statsPaths Paths in summary where the tables can be found
    * @param yKeyList Keys to search from, first has prio over second one
    * @param xKeyList Keys to search from, first has prio over second one
    * @param pipeline Query for the pipeline
    * @param module Query for the module
    * @param xlabel X label shown on the plot
    * @param ylabel Y label shown on the plot
    * @param title Title of the plot
    * @param removeZero
    */
Peter van 't Hof's avatar
Peter van 't Hof committed
247
  def writePlotFromSummary(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
248 249 250 251 252 253
                           prefix: String,
                           summary: SummaryDb,
                           libraryLevel: Boolean = false,
                           sampleId: Option[Int] = None,
                           libraryId: Option[Int] = None,
                           statsPaths: Map[String, List[String]],
254 255
                           yKeyList: List[String],
                           xKeyList: List[String],
Peter van 't Hof's avatar
Peter van 't Hof committed
256 257
                           pipeline: PipelineQuery,
                           module: ModuleQuery,
Peter van 't Hof's avatar
Peter van 't Hof committed
258 259 260 261
                           xlabel: Option[String] = None,
                           ylabel: Option[String] = None,
                           title: Option[String] = None,
                           removeZero: Boolean = true): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
262 263 264 265
    val tsvFile = new File(outputDir, prefix + ".tsv")
    val pngFile = new File(outputDir, prefix + ".png")

    val results: Map[(Int, Option[Int]), Map[String, Option[Array[Any]]]] = if (libraryLevel) {
266 267 268 269 270 271 272 273 274 275 276 277 278
      summary
        .getStatsForLibraries(runId, pipeline, module, sampleId = sampleId, keyValues = statsPaths)
        .map(x =>
          (x._1._1, Some(x._1._2)) -> x._2.map(x =>
            x._1 -> x._2.map(ConfigUtils.any2list(_).toArray)))
    } else
      summary
        .getStatsForSamples(runId,
                            pipeline,
                            module,
                            sample = sampleId.map(SampleId),
                            keyValues = statsPaths)
        .map(x => (x._1, None) -> x._2.map(x => x._1 -> x._2.map(ConfigUtils.any2list(_).toArray)))
Peter van 't Hof's avatar
Peter van 't Hof committed
279

Peter van 't Hof's avatar
Peter van 't Hof committed
280 281
    val tables: Array[Map[String, Array[Any]]] = results.map {
      case ((sample, library), map) =>
282 283
        val sampleName = Await
          .result(summary.getSampleName(sample), Duration.Inf)
Peter van 't Hof's avatar
Peter van 't Hof committed
284
          .getOrElse(throw new IllegalStateException("Sample must be there"))
285 286
        val libraryName =
          library.flatMap(l => Await.result(summary.getLibraryName(l), Duration.Inf))
Peter van 't Hof's avatar
Peter van 't Hof committed
287 288
        val yKey = yKeyList.find(x => map.contains(x) && map(x).isDefined).getOrElse("none")
        val xKey = xKeyList.find(x => map.contains(x) && map(x).isDefined).getOrElse("none")
Peter van 't Hof's avatar
Peter van 't Hof committed
289
        Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
290 291 292 293
          yKeyList.head -> map.getOrElse(yKey, None).getOrElse(Array()),
          (sampleName + libraryName.map("-" + _).getOrElse("")) -> map
            .getOrElse(xKey, None)
            .getOrElse(Array())
Peter van 't Hof's avatar
Peter van 't Hof committed
294
        )
Peter van 't Hof's avatar
Peter van 't Hof committed
295 296
    }.toArray

297
    writeTableToTsv(tsvFile, mergeTables(tables, yKeyList.head), yKeyList.head)
Peter van 't Hof's avatar
Peter van 't Hof committed
298

299 300 301 302 303 304 305
    LinePlot(tsvFile,
             pngFile,
             xlabel = xlabel,
             ylabel = ylabel,
             title = title,
             hideLegend = results.size > 40,
             removeZero = removeZero).runLocal()
Peter van 't Hof's avatar
Peter van 't Hof committed
306 307
  }

308
  /**
309 310 311 312 313 314 315 316
    * Generate a line plot for insertsize
    *
    * @param outputDir OutputDir for the tsv and png file
    * @param prefix Prefix of the tsv and png file
    * @param summary Summary class
    * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
    * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
    */
Peter van 't Hof's avatar
Peter van 't Hof committed
317
  def insertSizePlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
318
                     prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
319
                     summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
320
                     libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
321
                     sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
322
                     libraryId: Option[Int] = None): Unit = {
Peter van 't Hof's avatar
Peter van 't Hof committed
323 324
    val statsPaths = Map(
      "insert_size" -> List("histogram", "insert_size"),
Peter van 't Hof's avatar
Peter van 't Hof committed
325
      "count" -> List("histogram", "All_Reads.fr_count")
Peter van 't Hof's avatar
Peter van 't Hof committed
326
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
327

328 329 330 331 332 333 334 335
    writePlotFromSummary(
      outputDir,
      prefix,
      summary,
      libraryLevel,
      sampleId,
      libraryId,
      statsPaths,
336 337
      "insert_size" :: Nil,
      "count" :: Nil,
338 339 340 341 342 343
      "bammetrics",
      "CollectInsertSizeMetrics",
      "Insert size",
      "Reads",
      "Insert size"
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
344
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
345

346
  def mappingQualityPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
347
                         prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
348
                         summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
349
                         libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
350
                         sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
351 352 353 354
                         libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
      "mapping_quality" -> List("mapping_quality", "histogram", "values"),
      "count" -> List("mapping_quality", "histogram", "counts")
355 356
    )

357 358 359 360 361 362 363 364
    writePlotFromSummary(
      outputDir,
      prefix,
      summary,
      libraryLevel,
      sampleId,
      libraryId,
      statsPaths,
365 366
      "mapping_quality" :: Nil,
      "count" :: Nil,
367 368 369 370 371 372
      "bammetrics",
      "bamstats",
      "Mapping quality",
      "Reads",
      "Mapping quality"
    )
373 374 375
  }

  def clippingPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
376
                   prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
377
                   summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
378
                   libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
379
                   sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
380 381 382 383
                   libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
      "clipping" -> List("clipping", "histogram", "values"),
      "count" -> List("clipping", "histogram", "counts")
384 385
    )

386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401
    writePlotFromSummary(
      outputDir,
      prefix,
      summary,
      libraryLevel,
      sampleId,
      libraryId,
      statsPaths,
      "clipping" :: Nil,
      "count" :: Nil,
      "bammetrics",
      "bamstats",
      "Clipping",
      "Reads",
      "Clipping"
    )
402 403
  }

404
  /**
405 406 407 408 409 410 411 412
    * Generate a line plot for wgs coverage
    *
    * @param outputDir OutputDir for the tsv and png file
    * @param prefix Prefix of the tsv and png file
    * @param summary Summary class
    * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
    * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
    */
Peter van 't Hof's avatar
Peter van 't Hof committed
413
  def wgsHistogramPlot(outputDir: File,
Peter van 't Hof's avatar
Peter van 't Hof committed
414
                       prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
415
                       summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
416
                       libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
417
                       sampleId: Option[Int] = None,
Peter van 't Hof's avatar
Peter van 't Hof committed
418 419
                       libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
420
      "coverage" -> List("histogram", "coverage"),
421 422
      "count" -> List("histogram", "count"),
      "high_quality_coverage_count" -> List("histogram", "high_quality_coverage_count")
Peter van 't Hof's avatar
Peter van 't Hof committed
423
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
424

425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440
    writePlotFromSummary(
      outputDir,
      prefix,
      summary,
      libraryLevel,
      sampleId,
      libraryId,
      statsPaths,
      "coverage" :: Nil,
      "count" :: "high_quality_coverage_count" :: Nil,
      "bammetrics",
      "wgs",
      "Coverage",
      "Bases",
      "Whole genome coverage"
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
441
  }
Peter van 't Hof's avatar
Peter van 't Hof committed
442 443

  /**
444 445 446 447 448 449 450 451
    * Generate a line plot for rna coverage
    *
    * @param outputDir OutputDir for the tsv and png file
    * @param prefix Prefix of the tsv and png file
    * @param summary Summary class
    * @param libraryLevel Default false, when set true plot will be based on library stats instead of sample stats
    * @param sampleId Default it selects all sampples, when sample is giving it limits to selected sample
    */
Peter van 't Hof's avatar
Peter van 't Hof committed
452 453
  def rnaHistogramPlot(outputDir: File,
                       prefix: String,
Peter van 't Hof's avatar
Peter van 't Hof committed
454
                       summary: SummaryDb,
Peter van 't Hof's avatar
Peter van 't Hof committed
455
                       libraryLevel: Boolean = false,
Peter van 't Hof's avatar
Peter van 't Hof committed
456 457 458
                       sampleId: Option[Int] = None,
                       libraryId: Option[Int] = None): Unit = {
    val statsPaths = Map(
Peter van 't Hof's avatar
Peter van 't Hof committed
459 460
      "position" -> List("histogram", "normalized_position"),
      "count" -> List("histogram", "All_Reads.normalized_coverage")
Peter van 't Hof's avatar
Peter van 't Hof committed
461
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
462

463 464 465 466 467 468 469 470
    writePlotFromSummary(
      outputDir,
      prefix,
      summary,
      libraryLevel,
      sampleId,
      libraryId,
      statsPaths,
471 472
      "position" :: Nil,
      "count" :: Nil,
473 474 475 476 477 478
      "bammetrics",
      "rna",
      "Relative position",
      "Coverage",
      "Rna coverage"
    )
Peter van 't Hof's avatar
Peter van 't Hof committed
479 480
  }

Peter van 't Hof's avatar
Peter van 't Hof committed
481
  def mergeTables(tables: Array[Map[String, Array[Any]]],
482 483
                  mergeColumn: String,
                  defaultValue: Any = 0): Map[String, Array[Any]] = {
Peter van 't Hof's avatar
Peter van 't Hof committed
484 485
    val keys = tables.flatMap(x => x(mergeColumn)).distinct
    (for (table <- tables; (columnKey, columnValues) <- table if columnKey != mergeColumn) yield {
486 487
      columnKey -> keys.map(x =>
        table(mergeColumn).zip(columnValues).toMap.getOrElse(x, defaultValue))
Peter van 't Hof's avatar
Peter van 't Hof committed
488 489 490 491
    }).toMap + (mergeColumn -> keys)
  }

  def writeTableToTsv(tsvFile: File, table: Map[String, Array[Any]], firstColumn: String): Unit = {
492
    require(table.map(_._2.length).toList.distinct.size == 1,
493
            "Not all values has the same number or rows")
Peter van 't Hof's avatar
Peter van 't Hof committed
494 495 496
    val keys = table.keys.filterNot(_ == firstColumn).toList.sorted
    val writer = new PrintWriter(tsvFile)
    writer.println((firstColumn :: keys).mkString("\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
497 498 499
    table(firstColumn).zipWithIndex.foreach {
      case (c, i) =>
        writer.println((c :: keys.map(x => table(x)(i))).mkString("\t"))
Peter van 't Hof's avatar
Peter van 't Hof committed
500 501 502
    }
    writer.close()
  }
503
}